In [1]:
#Import libraries
import pandas as pd
import numpy as np
import os
import json
#os.getcwd()
In [2]:
#Import data
# json.load
# json.loads (it use of only strings)
#data = pd.read_json("http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json")
#ata = json.load(open('http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json', 'r'))
json_path = "purchase_data.json"
p_data_df = pd.read_json(json_path)
# to see the data
p_data_df.head()
#p_data_df.columns
#p_data_df.dtypes
#p_data_df.describe()
#p_data_df.counts()
Out[2]:
In [3]:
#total number of players
#TotalPlayers = p_data_df["SN"].count()
TotalPlayers = p_data_df["SN"].unique()
TotalUniquePlayers = len(TotalPlayers)
In [4]:
#Purchasing Analysis (Total)
#Number of Unique Items
UniqueItems = p_data_df["Item ID"].unique()
NumberUniqueItems = len(UniqueItems)
#Average Purchase Price
averagePurchase = p_data_df["Price"].mean()
#Total Number of Purchases
TotalNumberPurchases = len(p_data_df["Price"])
#Total Revenue
TotalRevenue = sum(p_data_df["Price"])
In [5]:
#Everything to a table
PurchasingAnalysis = pd.DataFrame({"Total Players": [TotalUniquePlayers],
"Number of Unique Items": [NumberUniqueItems],
"Average Purchase Price": [averagePurchase],
"Total Number of Purchases": [TotalNumberPurchases],
"Total Revenue": [TotalRevenue]})
PurchasingAnalysis
Out[5]:
In [ ]:
# Gender analysis
In [6]:
#demographics = p_data_df["Gender"].value_counts()
#demographics
totalGender = p_data_df["Gender"].count()
male = p_data_df["Gender"].value_counts()['Male']
female = p_data_df["Gender"].value_counts()['Female']
nonGenderSpecific = totalGender - male - female
malePercent = (male/totalGender) * 100
femalePercent = (female/totalGender) * 100
nonGenderSpecificPercent = (nonGenderSpecific/totalGender) * 100
In [7]:
#Everything to a table
GenderDemographics = pd.DataFrame({"Total Gender": [totalGender],
"Total Male": [male],
"% attended Male": [malePercent],
"Total Female": [female],
"% attended Female": [femalePercent],
"% Non Gender Specific":[nonGenderSpecific],
"% Non Gender Specific Percent":[nonGenderSpecificPercent],
})
GenderDemographics
Out[7]:
In [8]:
#The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
#source: https://chrisalbon.com/python/pandas_binning_data.html
ageBinValue = [0, 9, 14, 19, 24, 29, 34, 39, 44, 100]
ageBinNames = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45<']
p_data_df["Age Demographics"]= pd.cut(p_data_df["Age"], ageBinValue, labels=ageBinNames)
demographic_group = p_data_df.groupby("Age Demographics")
print(demographic_group["Item Name"].count())
print((demographic_group["Price"].mean()))
print((demographic_group["Price"].sum()))
In [ ]:
spenders = p_data_df.groupby("SN")
#spenders.head()
topSpenders = pd.DataFrame(spenders["Price"].value_counts())
topSpenders.sort_values("Price", ascending = False, inplace = True)
#opSpenders
topSpenders.rename(columns = {"Price": "Total Items"}, inplace =True)
#topSpenders
In [10]:
by_itemid = p_data_df.groupby('Item ID')
#counts occurance of item ID by grouping by item ID
pur_by_item = pd.DataFrame(by_itemid['Item ID'].count())
pur_by_item.rename(columns = {"Item ID": "Number of Items Sold"}, inplace = True)
#pur_by_item
#sums Price grouped by item ID
TotalPvalue = pd.DataFrame(by_itemid['Price'].sum())
#Renaming the column
TotalPvalue.rename(columns = {"Price": "Revenue"}, inplace =True)
#Removing duplicates
no_dup_items = p_data_df.drop_duplicates('Item ID')
#TotalPvalue
In [11]:
fivePopular = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)
fivePopular = fivePopular.merge(TotalPvalue, left_on = "Item ID", right_index = True)
fivePopular = fivePopular[['Item ID', "Item Name", "Price", "Number of Items Sold", "Revenue"]]
fivePopular.sort_values("Number of Items Sold", ascending = False, inplace = True)
fivePopular = fivePopular.iloc[0:5][:]
fivePopular
Out[11]:
In [ ]:
fiveProfitable = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)
fiveProfitable = top5_prof.merge(fivePopular, left_on = "Item ID", right_index = True)
#fiveProfitable = top5_prof[['Item ID', "Item Name", "Number of Items Sold", "Price", "Revenue"]]
#fiveProfitable
fiveProfitable.sort_values("Price", ascending = False, inplace = True)
fiveProfitable
In [15]:
print ("The total number of players for Pymoli game is " + str(TotalUniquePlayers))
print ("The total number of unique items for Pymoli game is " + str(NumberUniqueItems))
print ("The average Purchase price for Pymoli game is (USD) " + str(averagePurchase))
print ("The total revenue is (USD) " + str(TotalRevenue))
print ("The majority of the buyers for Pymoligame are in the ages of 20-24 and spend an average of US$3")
print ("")
print ("To consider: The spenders between 40 - 44 spend in average US$4, to increase the revenue it could be an option to target this audience")