In [1]:
#Import libraries 
import pandas as pd
import numpy as np 
import os
import json 
#os.getcwd()

In [2]:
#Import data 
# json.load
# json.loads (it use of only strings)
#data = pd.read_json("http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json")
#ata = json.load(open('http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json', 'r'))

json_path = "purchase_data.json"
p_data_df = pd.read_json(json_path)

# to see the data 
p_data_df.head()
#p_data_df.columns
#p_data_df.dtypes
#p_data_df.describe()
#p_data_df.counts()


Out[2]:
Age Gender Item ID Item Name Price SN
0 20 Male 93 Apocalyptic Battlescythe 4.49 Iloni35
1 21 Male 12 Dawne 3.36 Aidaira26
2 17 Male 5 Putrid Fan 2.63 Irim47
3 17 Male 123 Twilight's Carver 2.55 Irith83
4 22 Male 154 Feral Katana 4.11 Philodil43

Total number of players and purchasing analysis


In [3]:
#total number of players
#TotalPlayers = p_data_df["SN"].count()
TotalPlayers = p_data_df["SN"].unique()
TotalUniquePlayers = len(TotalPlayers)

In [4]:
#Purchasing Analysis (Total)
#Number of Unique Items
UniqueItems = p_data_df["Item ID"].unique()
NumberUniqueItems = len(UniqueItems)

#Average Purchase Price
averagePurchase = p_data_df["Price"].mean()

#Total Number of Purchases
TotalNumberPurchases = len(p_data_df["Price"])

#Total Revenue
TotalRevenue = sum(p_data_df["Price"])

In [5]:
#Everything to a table 
PurchasingAnalysis = pd.DataFrame({"Total Players": [TotalUniquePlayers],
                                   "Number of Unique Items": [NumberUniqueItems],
                                   "Average Purchase Price": [averagePurchase],
                                   "Total Number of Purchases": [TotalNumberPurchases],
                                   "Total Revenue": [TotalRevenue]})
PurchasingAnalysis


Out[5]:
Average Purchase Price Number of Unique Items Total Number of Purchases Total Players Total Revenue
0 2.924359 64 78 74 228.1

In [ ]:
# Gender analysis

In [6]:
#demographics = p_data_df["Gender"].value_counts()
#demographics

totalGender = p_data_df["Gender"].count()
male = p_data_df["Gender"].value_counts()['Male']
female = p_data_df["Gender"].value_counts()['Female']
nonGenderSpecific = totalGender - male - female
malePercent = (male/totalGender) * 100
femalePercent = (female/totalGender) * 100
nonGenderSpecificPercent = (nonGenderSpecific/totalGender) * 100

In [7]:
#Everything to a table 

GenderDemographics = pd.DataFrame({"Total Gender": [totalGender],
                                   "Total Male": [male],
                                   "% attended Male": [malePercent],
                                   "Total Female": [female],
                                   "% attended Female": [femalePercent],
                                   "% Non Gender Specific":[nonGenderSpecific],
                                   "% Non Gender Specific Percent":[nonGenderSpecificPercent],
                                   
                                 })
GenderDemographics


Out[7]:
% Non Gender Specific % Non Gender Specific Percent % attended Female % attended Male Total Female Total Gender Total Male
0 1 1.282051 16.666667 82.051282 13 78 64

Age Demographics


In [8]:
#The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
#source: https://chrisalbon.com/python/pandas_binning_data.html

ageBinValue = [0, 9, 14, 19, 24, 29, 34, 39, 44, 100]
ageBinNames = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45<']
p_data_df["Age Demographics"]= pd.cut(p_data_df["Age"], ageBinValue, labels=ageBinNames)
demographic_group = p_data_df.groupby("Age Demographics")

print(demographic_group["Item Name"].count())
print((demographic_group["Price"].mean()))
print((demographic_group["Price"].sum()))


Age Demographics
10-14     3
15-19    11
20-24    36
25-29     9
30-34     7
35-39     6
40-44     1
45<       0
<10       5
Name: Item Name, dtype: int64
Age Demographics
10-14    2.986667
15-19    2.764545
20-24    3.024722
25-29    2.901111
30-34    1.984286
35-39    3.561667
40-44    4.650000
45<           NaN
<10      2.764000
Name: Price, dtype: float64
Age Demographics
10-14      8.96
15-19     30.41
20-24    108.89
25-29     26.11
30-34     13.89
35-39     21.37
40-44      4.65
45<         NaN
<10       13.82
Name: Price, dtype: float64

Top Spenders


In [ ]:
spenders = p_data_df.groupby("SN")

#spenders.head()
topSpenders = pd.DataFrame(spenders["Price"].value_counts())
topSpenders.sort_values("Price", ascending = False, inplace = True)
#opSpenders

topSpenders.rename(columns = {"Price": "Total Items"},  inplace =True)
#topSpenders

Most Popular Items


In [10]:
by_itemid = p_data_df.groupby('Item ID')
#counts occurance of item ID by grouping by item ID
pur_by_item = pd.DataFrame(by_itemid['Item ID'].count()) 

pur_by_item.rename(columns = {"Item ID": "Number of Items Sold"}, inplace = True)
#pur_by_item

#sums Price grouped by item ID
TotalPvalue = pd.DataFrame(by_itemid['Price'].sum()) 

#Renaming the column
TotalPvalue.rename(columns = {"Price": "Revenue"}, inplace =True)
#Removing duplicates
no_dup_items = p_data_df.drop_duplicates('Item ID')
#TotalPvalue

In [11]:
fivePopular = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)
fivePopular = fivePopular.merge(TotalPvalue, left_on = "Item ID", right_index = True)
fivePopular = fivePopular[['Item ID', "Item Name", "Price", "Number of Items Sold", "Revenue"]]
fivePopular.sort_values("Number of Items Sold", ascending = False, inplace = True)
fivePopular = fivePopular.iloc[0:5][:] 
fivePopular


Out[11]:
Item ID Item Name Price Number of Items Sold Revenue
15 94 Mourning Blade 3.64 3 10.92
0 93 Apocalyptic Battlescythe 4.49 2 8.98
10 126 Exiled Mithril Longsword 1.08 2 2.16
42 64 Fusion Pummel 2.42 2 4.84
36 108 Extraction, Quickblade Of Trembling Hands 2.26 2 4.52

In [ ]:
fiveProfitable = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)

fiveProfitable = top5_prof.merge(fivePopular, left_on = "Item ID", right_index = True)
#fiveProfitable = top5_prof[['Item ID', "Item Name", "Number of Items Sold", "Price", "Revenue"]]
#fiveProfitable
fiveProfitable.sort_values("Price", ascending = False, inplace = True)
fiveProfitable

Final Report


In [15]:
print ("The total number of players for Pymoli game is " + str(TotalUniquePlayers))
print ("The total number of unique items for Pymoli game is " + str(NumberUniqueItems))
print ("The average Purchase price for Pymoli game is (USD) " + str(averagePurchase))
print ("The total revenue  is (USD) " + str(TotalRevenue))
print ("The majority of the buyers for Pymoligame are in the ages of 20-24 and spend an average of US$3")
print ("")
print ("To consider: The spenders between 40 - 44 spend in average US$4, to increase the revenue it could be an option to target this audience")


The total number of players for Pymoli game is 74
The total number of unique items for Pymoli game is 64
The average Purchase price for Pymoli game is (USD) 2.9243589743589733
The total revenue  is (USD) 228.1
The majority of the buyers for Pymoligame are in the ages of 20-24 and spend an average of US$3

To consider: The spenders between 40 - 44 spend in average US$4, to increase the revenue it could be an option to target this audience