notebook.community

Edit and run

Homework#4 Heroes of Pymoli http://gw.bootcampcontent.com/GW-Coding-Boot-Camp/09-12-2017-GW-Arlington-Class-Repository-DATA/tree/master/Homework/Week04/Instructions



In [11]:

    
#Import libraries 
import pandas as pd
import numpy as np 
import os
import json 
os.getcwd()









    Out[11]:





'C:\\dev\\Classes\\Gitclone\\09-12-2017-GW-Arlington-Class-Repository-DATA\\Class_Activities\\04-Pandas'



In [4]:

    
#Import data 
# json.load
# json.loads (it use of only strings)

json_path = "purchase_data.json"
p_data_df = pd.read_json(json_path)
p_data_df.head()
p_data_df.columns
#p_data_df.dtypes

#data = pd.read_json("http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json")
#ata = json.load(open('http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json', 'r'))









    Out[4]:





Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')



In [ ]:

    
#total number of players
#TotalPlayers = p_data_df["SN"].count()
TotalPlayers = p_data_df["SN"].unique()
TotalUniquePlayers = len(TotalPlayers)



In [ ]:

    
#Purchasing Analysis (Total)
#Number of Unique Items
UniqueItems = p_data_df["Item ID"].unique()
NumberUniqueItems = len(UniqueItems)

#Average Purchase Price
averagePurchase = p_data_df["Price"].mean()

#Total Number of Purchases
TotalNumberPurchases = len(p_data_df["Price"])

#Total Revenue
TotalRevenue = sum(p_data_df["Price"])

#p_data_df.describe()
#p_data_df.counts()
#type(p_data_df)

Gender Demographics

Percentage and Count of Male Players

Percentage and Count of Female Players

Percentage and Count of Other / Non-Disclosed



In [16]:

    
#demographics = p_data_df["Gender"].value_counts()
#demographics

total_gender = p_data_df["Gender"].count()
male = p_data_df["Gender"].value_counts()['Male']
female = p_data_df["Gender"].value_counts()['Female']
non_gender_specific = total_gender - male - female
male_percent = (male/total_gender) * 100
female_percent = (female/total_gender) * 100
non_gender_specific_percent = (non_gender_specific/total_gender) * 100

#Everything to a table 
GenderDemographics = pd.DataFrame({"Total Gender": [total_gender],
                                   "Total Male": [male],
                                   "% attended Male": [male_percent],
                                   "Total Female": [female],
                                   "% attended Female": [female_percent],
                                   "% Non Gender Specific":[non_gender_specific],
                                   "% Non Gender Specific Percent":[non_gender_specific_percent],
                                   
                                 })
GenderDemographics
#percentage = pd.DataFrame.((demographics/NumberUniqueItems)*100)
#percentage


#genderdemographics = p_data_df.groupby("Gender").count()
#genderdemographics
#How to change female / male / other to rows 
#gendercount = p_data_df(gender["Age"].value_counts())
#gendercount.heads()
#genderpercentagefemale = TotalUniquePlayers /









    Out[16]:







  
    
      
      % Non Gender Specific
      % Non Gender Specific Percent
      % attended Female
      % attended Male
      Total Female
      Total Gender
      Total Male
    
  
  
    
      0
      1
      1.282051
      16.666667
      82.051282
      13
      78
      64

Purchasing Analysis (Gender) #How to combine them in one single table? The below each broken by gender

Purchase Count (.count)
Average Purchase Price (.mean)
Total Purchase Value (.sum)
Normalized Totals (?????)



In [ ]:

    
genderdf = p_data_df.groupby("Gender")
genderPurchasing = pd.DataFrame(genderdf["Price"].value_counts())
genderPurchasing = pd.DataFrame(genderdf["Price"].count())
genderPurchasingAverage = pd.DataFrame(genderdf["Price"].mean())
genderPurchasingTotal = pd.DataFrame(genderdf["Price"].sum())
genderPurchasing

Age Demographics

The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.) Purchase Count Average Purchase Price Total Purchase Value Normalized Totals



In [9]:

    
agedf = p_data_df.groupby("Age")
agePurchasing = pd.DataFrame(agedf["Price"].value_counts())

Top Spenders

Identify the the top 5 spenders in the game by total purchase value, then list (in a table): SN Purchase Count Average Purchase Price Total Purchase Value



In [5]:

    
spenders = p_data_df.groupby("SN")
#Why meand and sum are the same? 
#spenders.head()
topSpenders = pd.DataFrame(spenders["Price"].count())
topSpenders.head()
topSpendersmeand = pd.DataFrame(spenders["Price"].mean())
topSpendersmeand.head()
#genderPurchasing = pd.DataFrame(genderdf["Price"].value_counts())









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-b141b11344df> in <module>()
      2 #Why meand and sum are the same?
      3 #spenders.head()
----> 4 topSpenders = pd.DataFrame(spenders["Price"] > 4)
      5 topSpenders.head()
      6 topSpendersmeand = pd.DataFrame(spenders["Price"].mean())

TypeError: '>' not supported between instances of 'SeriesGroupBy' and 'int'

# by trying to filter by the max I did this topSpenders = pd.DataFrame(spenders["Price"] > 4) = '>' not supported between instances of 'SeriesGroupBy' and 'int'



In [ ]:

    
#Final Report 
#print ("The total number of players for Pymoli game is " + str(TotalUniquePlayers))
#print ("The total number of unique items for Pymoli game is " + str(NumberUniqueItems))
#print ("The average Purchase price for Pymoli game is (USD) " + str(averagePurchase))
#print ("The total revenue  is (USD) " + str(TotalRevenue))

	Price
count	78.0
mean	1.0
std	0.0
min	1.0
25%	1.0
50%	1.0
75%	1.0
max	1.0