In [11]:
#Import libraries
import pandas as pd
import numpy as np
import os
import json
os.getcwd()
Out[11]:
In [4]:
#Import data
# json.load
# json.loads (it use of only strings)
json_path = "purchase_data.json"
p_data_df = pd.read_json(json_path)
p_data_df.head()
p_data_df.columns
#p_data_df.dtypes
#data = pd.read_json("http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json")
#ata = json.load(open('http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json', 'r'))
Out[4]:
In [ ]:
#total number of players
#TotalPlayers = p_data_df["SN"].count()
TotalPlayers = p_data_df["SN"].unique()
TotalUniquePlayers = len(TotalPlayers)
In [ ]:
#Purchasing Analysis (Total)
#Number of Unique Items
UniqueItems = p_data_df["Item ID"].unique()
NumberUniqueItems = len(UniqueItems)
#Average Purchase Price
averagePurchase = p_data_df["Price"].mean()
#Total Number of Purchases
TotalNumberPurchases = len(p_data_df["Price"])
#Total Revenue
TotalRevenue = sum(p_data_df["Price"])
#p_data_df.describe()
#p_data_df.counts()
#type(p_data_df)
In [16]:
#demographics = p_data_df["Gender"].value_counts()
#demographics
total_gender = p_data_df["Gender"].count()
male = p_data_df["Gender"].value_counts()['Male']
female = p_data_df["Gender"].value_counts()['Female']
non_gender_specific = total_gender - male - female
male_percent = (male/total_gender) * 100
female_percent = (female/total_gender) * 100
non_gender_specific_percent = (non_gender_specific/total_gender) * 100
#Everything to a table
GenderDemographics = pd.DataFrame({"Total Gender": [total_gender],
"Total Male": [male],
"% attended Male": [male_percent],
"Total Female": [female],
"% attended Female": [female_percent],
"% Non Gender Specific":[non_gender_specific],
"% Non Gender Specific Percent":[non_gender_specific_percent],
})
GenderDemographics
#percentage = pd.DataFrame.((demographics/NumberUniqueItems)*100)
#percentage
#genderdemographics = p_data_df.groupby("Gender").count()
#genderdemographics
#How to change female / male / other to rows
#gendercount = p_data_df(gender["Age"].value_counts())
#gendercount.heads()
#genderpercentagefemale = TotalUniquePlayers /
Out[16]:
In [ ]:
genderdf = p_data_df.groupby("Gender")
genderPurchasing = pd.DataFrame(genderdf["Price"].value_counts())
genderPurchasing = pd.DataFrame(genderdf["Price"].count())
genderPurchasingAverage = pd.DataFrame(genderdf["Price"].mean())
genderPurchasingTotal = pd.DataFrame(genderdf["Price"].sum())
genderPurchasing
In [9]:
agedf = p_data_df.groupby("Age")
agePurchasing = pd.DataFrame(agedf["Price"].value_counts())
Out[9]:
In [5]:
spenders = p_data_df.groupby("SN")
#Why meand and sum are the same?
#spenders.head()
topSpenders = pd.DataFrame(spenders["Price"].count())
topSpenders.head()
topSpendersmeand = pd.DataFrame(spenders["Price"].mean())
topSpendersmeand.head()
#genderPurchasing = pd.DataFrame(genderdf["Price"].value_counts())
In [ ]:
#Final Report
#print ("The total number of players for Pymoli game is " + str(TotalUniquePlayers))
#print ("The total number of unique items for Pymoli game is " + str(NumberUniqueItems))
#print ("The average Purchase price for Pymoli game is (USD) " + str(averagePurchase))
#print ("The total revenue is (USD) " + str(TotalRevenue))