In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sea

%matplotlib inline

sea.set_palette("muted")

In [13]:
#Free vs Paid
DF = pd.read_csv("games-features-edit.csv")
f = DF[DF['IsFree'] == True].count()
fr = DF[(DF['IsFree'] == True) & DF['RecommendationCount']].mean()
fs = DF[(DF['IsFree'] == True) & DF['Metacritic']].mean()
print ("There are", f[1], "free games, with an average of", fr[1], "ratings per game.")
print ("There are", f[1], "free games, with an average rating of", fs[0], "per game.")
nf = DF[DF['IsFree'] == False].count()
nfr = DF[(DF['IsFree'] == False) & DF['RecommendationCount']].mean()
nfs = DF[(DF['IsFree'] == False) & DF['Metacritic']].mean()
print ("There are", nf[1], "paid games, with an average of", nfr[1], "ratings per game.")
print ("There are", nf[1], "paid games, with an average rating of", nfs[0], "per game.")
d = {'one' : pd.Series([fr[1], nfr[1]],
               index=['Free R', 'Paid R'])}#Rating
df = pd.DataFrame(d)
df.plot(kind='bar', title='Ratings Per Game', legend=False)
plt.savefig("../report/freevnon-ratings-bar.png", bbox_inches='tight');


There are 959 free games, with an average of 5457.68345324 ratings per game.
There are 959 free games, with an average rating of 69.5714285714 per game.
There are 11579 paid games, with an average of 3227.21833257 ratings per game.
There are 11579 paid games, with an average rating of 71.5413533835 per game.

In [14]:
d = {'one' : pd.Series([fs[0], nfs[0]],
               index=['Free S', 'Paid S'])}#Scoring
df = pd.DataFrame(d)
df.plot(kind='bar', title='Metacritic Mean Rating', legend=False)
plt.savefig("../report/freevnon-metacritic-bar.png", bbox_inches='tight');



In [15]:
#Recommendations by Metacritic (Y,X)
c = plt.scatter(DF['Metacritic'],DF['RecommendationCount'], color=sea.color_palette()[0])
plt.xlabel('Metacritic Score')
plt.ylabel('Recommendation Count')
plt.xlim((20,100))
plt.ylim(0, 100000)
plt.savefig("../report/metacritic-recommendations-scatter.png", bbox_inches='tight');



In [16]:
#Genre Comparsions
gng = DF[DF['GenreIsNonGame'] == True].count()
gngr = DF[(DF['GenreIsNonGame'] == True) & DF['RecommendationCount']].mean()
gngs = DF[(DF['GenreIsNonGame'] == True) & DF['Metacritic']].mean()
print ("There are", gng[1], "NonGame games, with an average of", gngr[1], "ratings per game.")
print ("There are", gng[1], "NonGame games, with an average rating of", gngs[0], "per game.")
gi = DF[DF['GenreIsIndie'] == True].count()
gir = DF[(DF['GenreIsIndie'] == True) & DF['RecommendationCount']].mean()
gis = DF[(DF['GenreIsIndie'] == True) & DF['Metacritic']].mean()
print ("There are", gi[1], "Indie games, with an average of", gir[1], "ratings per game.")
print ("There are", gi[1], "Indie games, with an average rating of", gis[0], "per game.")
gac = DF[DF['GenreIsAction'] == True].count()
gacr = DF[(DF['GenreIsAction'] == True) & DF['RecommendationCount']].mean()
gacs = DF[(DF['GenreIsAction'] == True) & DF['Metacritic']].mean()
print ("There are", gac[1], "Action games, with an average of", gacr[1], "ratings per game.")
print ("There are", gac[1], "Action games, with an average rating of", gacs[0], "per game.")
gad = DF[DF['GenreIsAdventure'] == True].count()
gadr = DF[(DF['GenreIsAdventure'] == True) & DF['RecommendationCount']].mean()
gads = DF[(DF['GenreIsAdventure'] == True) & DF['Metacritic']].mean()
print ("There are", gad[1], "Adventure games, with an average of", gadr[1], "ratings per game.")
print ("There are", gad[1], "Adventure games, with an average rating of", gads[0], "per game.")
gc = DF[DF['GenreIsCasual'] == True].count()
gcr = DF[(DF['GenreIsCasual'] == True) & DF['RecommendationCount']].mean()
gcs = DF[(DF['GenreIsCasual'] == True) & DF['Metacritic']].mean()
print ("There are", gc[1], "Casual games, with an average of", gcr[1], "ratings per game.")
print ("There are", gc[1], "Casual games, with an average rating of", gcs[0], "per game.")
gst = DF[DF['GenreIsStrategy'] == True].count()
gstr = DF[(DF['GenreIsStrategy'] == True) & DF['RecommendationCount']].mean()
gsts = DF[(DF['GenreIsStrategy'] == True) & DF['Metacritic']].mean()
print ("There are", gst[1], "Strategy games, with an average of", gstr[1], "ratings per game.")
print ("There are", gst[1], "Strategy games, with an average rating of", gsts[0], "per game.")
grpg = DF[DF['GenreIsRPG'] == True].count()
grpgr = DF[(DF['GenreIsRPG'] == True) & DF['RecommendationCount']].mean()
grpgs = DF[(DF['GenreIsRPG'] == True) & DF['Metacritic']].mean()
print ("There are", grpg[1], "RPG games, with an average of", grpgr[1], "ratings per game.")
print ("There are", grpg[1], "RPG games, with an average rating of", grpgs[0], "per game.")
gsi = DF[DF['GenreIsSimulation'] == True].count()
gsir = DF[(DF['GenreIsSimulation'] == True) & DF['RecommendationCount']].mean()
gsis = DF[(DF['GenreIsSimulation'] == True) & DF['Metacritic']].mean()
print ("There are", gsi[1], "Simulation games, with an average of", gsir[1], "ratings per game.")
print ("There are", gsi[1], "Simulation games, with an average rating of", gsis[0], "per game.")
gea = DF[DF['GenreIsEarlyAccess'] == True].count()
gear = DF[(DF['GenreIsEarlyAccess'] == True) & DF['RecommendationCount']].mean()
geas = DF[(DF['GenreIsEarlyAccess'] == True) & DF['Metacritic']].mean()
print ("There are", gea[1], "EarlyAccess games, with an average of", gear[1], "ratings per game.")
print ("There are", gea[1], "EarlyAccess games, with an average rating of", geas[0], "per game.")
gftp = DF[DF['GenreIsFreeToPlay'] == True].count()
gftpr = DF[(DF['GenreIsFreeToPlay'] == True) & DF['RecommendationCount']].mean()
gftps = DF[(DF['GenreIsFreeToPlay'] == True) & DF['Metacritic']].mean()
print ("There are", gftp[1], "FreeToPlay games, with an average of", gftpr[1], "ratings per game.")
print ("There are", gftp[1], "FreeToPlay games, with an average rating of", gftps[0], "per game.")
gsp = DF[DF['GenreIsSports'] == True].count()
gspr = DF[(DF['GenreIsSports'] == True) & DF['RecommendationCount']].mean()
gsps = DF[(DF['GenreIsSports'] == True) & DF['Metacritic']].mean()
print ("There are", gsp[1], "Sports games, with an average of", gspr[1], "ratings per game.")
print ("There are", gsp[1], "Sports games, with an average rating of", gsps[0], "per game.")
gr = DF[DF['GenreIsRacing'] == True].count()
grr = DF[(DF['GenreIsRacing'] == True) & DF['RecommendationCount']].mean()
grs = DF[(DF['GenreIsRacing'] == True) & DF['Metacritic']].mean()
print ("There are", gr[1], "Racing games, with an average of", grr[1], "ratings per game.")
print ("There are", gr[1], "Racing games, with an average rating of", grs[0], "per game.")
gmmo = DF[DF['GenreIsMassivelyMultiplayer'] == True].count()
gmmor = DF[(DF['GenreIsMassivelyMultiplayer'] == True) & DF['RecommendationCount']].mean()
gmmos = DF[(DF['GenreIsMassivelyMultiplayer'] == True) & DF['Metacritic']].mean()
print ("There are", gmmo[1], "MassivelyMultiplayer games, with an average of", gmmor[1], "ratings per game.")
print ("There are", gmmo[1], "MassivelyMultiplayer games, with an average rating of", gmmos[0], "per game.")
d = {'one' : pd.Series([gngr[1], gir[1], gacr[1], gadr[1], gcr[1], gstr[1], grpgr[1],
                       gsir[1], gear[1], gftpr[1], gspr[1], grr[1], gmmor[1]],
               index=['NonGame', 'Indie', 'Action', 'Adventure', 'Casual', 'Strategy', 'RPG',
                     'Simulation', 'Early Access', 'Free To Play', 'Sports', 'Racing', 'MMO'])}#Rating
df = pd.DataFrame(d)
df.plot(kind='bar', title='Ratings Count by Genre', legend=False)
plt.savefig("../report/genre-ratings-bar.png", bbox_inches='tight');


There are 293 NonGame games, with an average of 1193.77777778 ratings per game.
There are 293 NonGame games, with an average rating of nan per game.
There are 7256 Indie games, with an average of 1986.77110157 ratings per game.
There are 7256 Indie games, with an average rating of 69.9184890656 per game.
There are 5276 Action games, with an average of 5310.43261231 ratings per game.
There are 5276 Action games, with an average rating of 71.1583793738 per game.
There are 4012 Adventure games, with an average of 2844.31339713 ratings per game.
There are 4012 Adventure games, with an average rating of 70.4026666667 per game.
There are 3270 Casual games, with an average of 1795.94736842 ratings per game.
There are 3270 Casual games, with an average rating of 70.9619047619 per game.
There are 2492 Strategy games, with an average of 1353.18717504 ratings per game.
There are 2492 Strategy games, with an average rating of 72.1371237458 per game.
There are 1971 RPG games, with an average of 4167.58914729 ratings per game.
There are 1971 RPG games, with an average rating of 72.9390862944 per game.
There are 1954 Simulation games, with an average of 2586.89671362 ratings per game.
There are 1954 Simulation games, with an average rating of 69.896 per game.
There are 1307 EarlyAccess games, with an average of 4476.14917127 ratings per game.
There are 1307 EarlyAccess games, with an average rating of 73.0 per game.
There are 619 FreeToPlay games, with an average of 6328.10313901 ratings per game.
There are 619 FreeToPlay games, with an average rating of 68.9310344828 per game.
There are 490 Sports games, with an average of 2403.68888889 ratings per game.
There are 490 Sports games, with an average rating of 73.16 per game.
There are 450 Racing games, with an average of 2021.02173913 ratings per game.
There are 450 Racing games, with an average rating of 70.65 per game.
There are 356 MassivelyMultiplayer games, with an average of 5463.95867769 ratings per game.
There are 356 MassivelyMultiplayer games, with an average rating of 72.0 per game.

In [17]:
d = {'one' : pd.Series([gngs[0], gis[0], gacs[0], gads[0], gcs[0], gsts[0], grpgs[0],
                       gsis[0], geas[0], gftps[0], gsps[0], grs[0], gmmos[0]],
               index=['NonGame', 'Indie', 'Action', 'Adventure', 'Casual', 'Strategy', 'RPG',
                     'Simulation', 'Early Access', 'Free To Play', 'Sports', 'Racing', 'MMO'])}#Scoring
df = pd.DataFrame(d)
df.plot(kind='bar', legend=False, title="Mean Metacritic Score by Genre")
plt.savefig("../report/genre-metacritic-bar.png", bbox_inches='tight');



In [18]:
#Recommendations by Price (Y,X)
c = plt.scatter(DF['PriceInitial'],DF['RecommendationCount'], color=sea.color_palette()[0])
plt.xlabel("Initial Price in USD")
plt.ylabel("Number of Recommendations")
plt.xlim((0,80))
plt.ylim((0, 100000))
plt.savefig("../report/price-recommendations-scatter.png", bbox_inches='tight');



In [19]:
#Metacritic score by Price (Y,X)
c = plt.scatter(DF['PriceInitial'],DF['Metacritic'], color=sea.color_palette()[0])
plt.xlabel("Initial Price in USD")
plt.ylabel("Mean Metacritic Score")
plt.xlim((0,80))
plt.ylim(20, 100)
plt.savefig("../report/price-metacritic-scatter.png", bbox_inches='tight');


Include scatter plots without selective axes ranges


In [20]:
#Recommendations by Metacritic (Y,X)
c = plt.scatter(DF['Metacritic'],DF['RecommendationCount'], color=sea.color_palette()[0])
plt.xlabel('Metacritic Score')
plt.ylabel('Recommendation Count')
plt.savefig("../report/metacritic-recommendations-scatter-all.png", bbox_inches='tight');



In [21]:
#Recommendations by Price (Y,X)
c = plt.scatter(DF['PriceInitial'],DF['RecommendationCount'], color=sea.color_palette()[0])
plt.xlabel("Initial Price in USD")
plt.ylabel("Number of Recommendations")
plt.savefig("../report/price-recommendations-scatter-all.png", bbox_inches='tight');



In [22]:
#Metacritic score by Price (Y,X)
c = plt.scatter(DF['PriceInitial'],DF['Metacritic'], color=sea.color_palette()[0])
plt.xlabel("Initial Price in USD")
plt.ylabel("Mean Metacritic Score")
plt.savefig("../report/price-metacritic-scatter-all.png", bbox_inches='tight');