Homework#4 Heroes of Pymoli http://gw.bootcampcontent.com/GW-Coding-Boot-Camp/09-12-2017-GW-Arlington-Class-Repository-DATA/tree/master/Homework/Week04/Instructions



In [1]:

    
#Import libraries 
import pandas as pd
import numpy as np 
import os
import json 
#os.getcwd()



In [2]:

    
#Import data 
# json.load
# json.loads (it use of only strings)
#data = pd.read_json("http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json")
#ata = json.load(open('http://localhost:8840/edit/Homework/Week04/Instructions/HeroesOfPymoli/purchase_data.json', 'r'))

json_path = "purchase_data.json"
p_data_df = pd.read_json(json_path)

# to see the data 
p_data_df.head()
#p_data_df.columns
#p_data_df.dtypes
#p_data_df.describe()
#p_data_df.counts()









    Out[2]:







  
    
      
      Age
      Gender
      Item ID
      Item Name
      Price
      SN
    
  
  
    
      0
      20
      Male
      93
      Apocalyptic Battlescythe
      4.49
      Iloni35
    
    
      1
      21
      Male
      12
      Dawne
      3.36
      Aidaira26
    
    
      2
      17
      Male
      5
      Putrid Fan
      2.63
      Irim47
    
    
      3
      17
      Male
      123
      Twilight's Carver
      2.55
      Irith83
    
    
      4
      22
      Male
      154
      Feral Katana
      4.11
      Philodil43

Total number of players and purchasing analysis



In [3]:

    
#total number of players
#TotalPlayers = p_data_df["SN"].count()
TotalPlayers = p_data_df["SN"].unique()
TotalUniquePlayers = len(TotalPlayers)



In [4]:

    
#Purchasing Analysis (Total)
#Number of Unique Items
UniqueItems = p_data_df["Item ID"].unique()
NumberUniqueItems = len(UniqueItems)

#Average Purchase Price
averagePurchase = p_data_df["Price"].mean()

#Total Number of Purchases
TotalNumberPurchases = len(p_data_df["Price"])

#Total Revenue
TotalRevenue = sum(p_data_df["Price"])



In [5]:

    
#Everything to a table 
PurchasingAnalysis = pd.DataFrame({"Total Players": [TotalUniquePlayers],
                                   "Number of Unique Items": [NumberUniqueItems],
                                   "Average Purchase Price": [averagePurchase],
                                   "Total Number of Purchases": [TotalNumberPurchases],
                                   "Total Revenue": [TotalRevenue]})
PurchasingAnalysis









    Out[5]:







  
    
      
      Average Purchase Price
      Number of Unique Items
      Total Number of Purchases
      Total Players
      Total Revenue
    
  
  
    
      0
      2.924359
      64
      78
      74
      228.1



In [ ]:

    
# Gender analysis



In [6]:

    
#demographics = p_data_df["Gender"].value_counts()
#demographics

totalGender = p_data_df["Gender"].count()
male = p_data_df["Gender"].value_counts()['Male']
female = p_data_df["Gender"].value_counts()['Female']
nonGenderSpecific = totalGender - male - female
malePercent = (male/totalGender) * 100
femalePercent = (female/totalGender) * 100
nonGenderSpecificPercent = (nonGenderSpecific/totalGender) * 100



In [7]:

    
#Everything to a table 

GenderDemographics = pd.DataFrame({"Total Gender": [totalGender],
                                   "Total Male": [male],
                                   "% attended Male": [malePercent],
                                   "Total Female": [female],
                                   "% attended Female": [femalePercent],
                                   "% Non Gender Specific":[nonGenderSpecific],
                                   "% Non Gender Specific Percent":[nonGenderSpecificPercent],
                                   
                                 })
GenderDemographics









    Out[7]:







  
    
      
      % Non Gender Specific
      % Non Gender Specific Percent
      % attended Female
      % attended Male
      Total Female
      Total Gender
      Total Male
    
  
  
    
      0
      1
      1.282051
      16.666667
      82.051282
      13
      78
      64

Age Demographics



In [8]:

    
#The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
#source: https://chrisalbon.com/python/pandas_binning_data.html

ageBinValue = [0, 9, 14, 19, 24, 29, 34, 39, 44, 100]
ageBinNames = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45<']
p_data_df["Age Demographics"]= pd.cut(p_data_df["Age"], ageBinValue, labels=ageBinNames)
demographic_group = p_data_df.groupby("Age Demographics")

print(demographic_group["Item Name"].count())
print((demographic_group["Price"].mean()))
print((demographic_group["Price"].sum()))









    



Age Demographics
10-14     3
15-19    11
20-24    36
25-29     9
30-34     7
35-39     6
40-44     1
45<       0
<10       5
Name: Item Name, dtype: int64
Age Demographics
10-14    2.986667
15-19    2.764545
20-24    3.024722
25-29    2.901111
30-34    1.984286
35-39    3.561667
40-44    4.650000
45<           NaN
<10      2.764000
Name: Price, dtype: float64
Age Demographics
10-14      8.96
15-19     30.41
20-24    108.89
25-29     26.11
30-34     13.89
35-39     21.37
40-44      4.65
45<         NaN
<10       13.82
Name: Price, dtype: float64

Top Spenders



In [ ]:

    
spenders = p_data_df.groupby("SN")

#spenders.head()
topSpenders = pd.DataFrame(spenders["Price"].value_counts())
topSpenders.sort_values("Price", ascending = False, inplace = True)
#opSpenders

topSpenders.rename(columns = {"Price": "Total Items"},  inplace =True)
#topSpenders

Most Popular Items



In [10]:

    
by_itemid = p_data_df.groupby('Item ID')
#counts occurance of item ID by grouping by item ID
pur_by_item = pd.DataFrame(by_itemid['Item ID'].count()) 

pur_by_item.rename(columns = {"Item ID": "Number of Items Sold"}, inplace = True)
#pur_by_item

#sums Price grouped by item ID
TotalPvalue = pd.DataFrame(by_itemid['Price'].sum()) 

#Renaming the column
TotalPvalue.rename(columns = {"Price": "Revenue"}, inplace =True)
#Removing duplicates
no_dup_items = p_data_df.drop_duplicates('Item ID')
#TotalPvalue



In [11]:

    
fivePopular = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)
fivePopular = fivePopular.merge(TotalPvalue, left_on = "Item ID", right_index = True)
fivePopular = fivePopular[['Item ID', "Item Name", "Price", "Number of Items Sold", "Revenue"]]
fivePopular.sort_values("Number of Items Sold", ascending = False, inplace = True)
fivePopular = fivePopular.iloc[0:5][:] 
fivePopular









    Out[11]:







  
    
      
      Item ID
      Item Name
      Price
      Number of Items Sold
      Revenue
    
  
  
    
      15
      94
      Mourning Blade
      3.64
      3
      10.92
    
    
      0
      93
      Apocalyptic Battlescythe
      4.49
      2
      8.98
    
    
      10
      126
      Exiled Mithril Longsword
      1.08
      2
      2.16
    
    
      42
      64
      Fusion Pummel
      2.42
      2
      4.84
    
    
      36
      108
      Extraction, Quickblade Of Trembling Hands
      2.26
      2
      4.52



In [ ]:

    
fiveProfitable = no_dup_items.merge(pur_by_item, left_on = "Item ID", right_index = True)

fiveProfitable = top5_prof.merge(fivePopular, left_on = "Item ID", right_index = True)
#fiveProfitable = top5_prof[['Item ID', "Item Name", "Number of Items Sold", "Price", "Revenue"]]
#fiveProfitable
fiveProfitable.sort_values("Price", ascending = False, inplace = True)
fiveProfitable

Final Report



In [15]:

    
print ("The total number of players for Pymoli game is " + str(TotalUniquePlayers))
print ("The total number of unique items for Pymoli game is " + str(NumberUniqueItems))
print ("The average Purchase price for Pymoli game is (USD) " + str(averagePurchase))
print ("The total revenue  is (USD) " + str(TotalRevenue))
print ("The majority of the buyers for Pymoligame are in the ages of 20-24 and spend an average of US$3")
print ("")
print ("To consider: The spenders between 40 - 44 spend in average US$4, to increase the revenue it could be an option to target this audience")









    



The total number of players for Pymoli game is 74
The total number of unique items for Pymoli game is 64
The average Purchase price for Pymoli game is (USD) 2.9243589743589733
The total revenue  is (USD) 228.1
The majority of the buyers for Pymoligame are in the ages of 20-24 and spend an average of US$3

To consider: The spenders between 40 - 44 spend in average US$4, to increase the revenue it could be an option to target this audience

	Age	Gender	Item ID	Item Name	Price	SN
0	20	Male	93	Apocalyptic Battlescythe	4.49	Iloni35
1	21	Male	12	Dawne	3.36	Aidaira26
2	17	Male	5	Putrid Fan	2.63	Irim47
3	17	Male	123	Twilight's Carver	2.55	Irith83
4	22	Male	154	Feral Katana	4.11	Philodil43

	Item ID	Item Name	Price	Number of Items Sold	Revenue
15	94	Mourning Blade	3.64	3	10.92
0	93	Apocalyptic Battlescythe	4.49	2	8.98
10	126	Exiled Mithril Longsword	1.08	2	2.16
42	64	Fusion Pummel	2.42	2	4.84
36	108	Extraction, Quickblade Of Trembling Hands	2.26	2	4.52