In [52]:
import dataset
import pandas as pd
import numpy as np
import os
import seaborn as sns
import scipy

%matplotlib inline
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [3]:
db = dataset.connect(os.environ['POSTGRES_URI'])
games_df = pd.read_sql('SELECT * FROM game_crawl_view;', os.environ['POSTGRES_URI'], index_col='steam_app_id',
                      parse_dates=['release_date', 'crawl_time'])
games_df.head()


Out[3]:
crawl_time game_name short_description reviews_last_30_days pct_positive_reviews_last_30_days reviews_all_time pct_positive_reviews_all_time release_date title developer publisher num_achievements full_price long_description metacritic_score is_dlc genres details tags
steam_app_id
5 2017-04-25 01:45:36.531985+00:00 None None NaN NaN NaN NaN NaT None None None NaN NaN None NaN None None None None
7 2017-04-25 01:45:49.480343+00:00 None None NaN NaN NaN NaN NaT None None None NaN NaN None NaN None None None None
8 2017-04-25 01:46:01.917411+00:00 None None NaN NaN NaN NaN NaT None None None NaN NaN None NaN None None None None
10 2017-04-25 01:46:15.009013+00:00 Counter-Strike Play the world's number 1 online action game. ... 745.0 96.0 39394.0 97.0 2000-11-01 Counter-Strike Valve Valve NaN 9.99 ABOUT THIS GAME\nPlay the world's number 1 onl... 88.0 False [Action] [Multi-player, Valve Anti-Cheat enabled] [Action, FPS, Multiplayer, Shooter, Classic, T...
20 2017-04-25 01:46:28.021976+00:00 Team Fortress Classic One of the most popular online action games of... 35.0 71.0 1842.0 81.0 1999-04-01 Team Fortress Classic Valve Valve NaN 4.99 ABOUT THIS GAME\nOne of the most popular onlin... NaN False [Action] [Multi-player, Valve Anti-Cheat enabled] [Classic, Shooter, Class-Based, Team-Based, Fi...

How many apps did we crawl in total?


In [4]:
len(games_df)


Out[4]:
40216

And how many of these did we actually get info for?


In [5]:
games_df = games_df[~pd.isnull(games_df['game_name'])]
len(games_df)


Out[5]:
26525

How many games vs DLC?


In [6]:
sns.countplot(x='is_dlc', data=games_df)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd1dc96f7f0>

Looking at only games from now on...


In [7]:
games_df = games_df[games_df['is_dlc'] == False]

Counts by various columns


In [8]:
games_df['publisher'].value_counts()


Out[8]:
Ubisoft                                      142
Big Fish Games                               137
SEGA                                         134
Strategy First                               133
THQ Nordic                                   106
KISS ltd                                     103
Sekai Project                                 97
KOEI TECMO GAMES CO., LTD.                    97
Square Enix                                   93
Degica                                        79
Choice of Games                               77
Devolver Digital                              70
Activision                                    68
Paradox Interactive                           67
1C Company                                    65
Buka Entertainment                            63
United Independent Entertainment GmbH         58
Daedalic Entertainment                        57
Focus Home Interactive                        52
MumboJumbo                                    52
Slitherine Ltd.                               51
Back To Basics Gaming                         50
Kalypso Media Digital                         49
AGM PLAYISM                                   47
Libredia                                      47
Black Shell Media                             46
Artifex Mundi                                 45
Plug In Digital                               45
Headup Games                                  44
Aldorlea Games                                44
                                            ... 
cbritez                                        1
Cuddles and Snowflake                          1
Borderline Inappropriate Game Studios Ltd      1
Eerie Bear Games                               1
Aperico Software                               1
Interplay Corp                                 1
Games Foundry                                  1
Xtase Studios                                  1
Poorwill Games                                 1
Eden Agency                                    1
K Monkey                                       1
kunst-stoff GmbH                               1
نقش الضوء                                      1
Colonthree Enterprises                         1
SmashRiot LLC                                  1
Poking Water Games                             1
bocoboc                                        1
Delmunsoft                                     1
Thrillion                                      1
1C Company, Playrix                            1
rocketship                                     1
Endemic Interactive                            1
Maverick Game Studio                           1
Queen Bee Games                                1
XPEC Entertainment Inc.                        1
Ivan Zanotti's MyMadness Works                 1
Team Junkfish                                  1
Fiassink Games                                 1
Meepower                                       1
Red Nexus Games Inc.                           1
Name: publisher, dtype: int64

In [9]:
games_df['developer'].value_counts()


Out[9]:
KOEI TECMO GAMES CO., LTD.                                98
Choice of Games                                           77
SEGA                                                      57
MumboJumbo                                                45
Humongous Entertainment                                   36
Hosted Games                                              36
Telltale Games                                            36
HeR Interactive                                           33
Valve                                                     31
EnsenaSoft                                                30
Warfare Studios                                           30
Milestone S.r.l.                                          26
Daedalic Entertainment                                    26
Winged Cloud                                              25
Bohemia Interactive                                       23
Ubisoft Montreal                                          23
Paradox Development Studio                                22
United Independent Entertainment GmbH                     22
Winter Wolves                                             21
Frontwing                                                 20
id Software                                               20
Team17 Digital Ltd                                        20
Sandlot Games                                             18
DotEmu                                                    18
Eipix Entertainment                                       18
MicroProse Software, Inc                                  18
Forever Entertainment S. A.                               17
FIVE-BN GAMES                                             17
PopCap Games, Inc.                                        17
SQUARE ENIX                                               16
                                                          ..
Stegalosaurus Game Development                             1
Interplay                                                  1
Joshua Hughes                                              1
Unlikely Rogue                                             1
Onur Vural and Leigh Christian                             1
EXE-CREATE                                                 1
Code Horizon                                               1
Space Paw Studios                                          1
Jagex , Artplant                                           1
Σεμιόν Ούγλεβ                                              1
Lupus Solus Studio                                         1
KintoGames , Dolores Entertainment                         1
Kirill Gurman                                              1
Cyberlore                                                  1
Geomertex                                                  1
cbritez                                                    1
Benjamin Famiglietti                                       1
Cuddles and Snowflake                                      1
Forbes Consult Ltd                                         1
Eerie Bear Games                                           1
Aperico Software                                           1
Class of 2015-2016                                         1
Stephane Graziano , Franck Graziano , Mathieu Lamarche     1
Light Vision Interactive                                   1
Snowrunner Games                                           1
Applava                                                    1
Mini IT                                                    1
SkywardRiver , Zetasis , BeNjO , Nikakosa , Rex            1
Chaos Concept                                              1
Red Nexus Games Inc.                                       1
Name: developer, dtype: int64

Plotting various review metrics


In [45]:
sns.distplot(games_df['metacritic_score'].dropna(), bins=25)


Out[45]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd1d98fc550>

In [46]:
sns.distplot(games_df['pct_positive_reviews_all_time'].dropna(), bins=25)


Out[46]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd1d9636240>

In [14]:
sns.distplot(games_df['pct_positive_reviews_last_30_days'].dropna())


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fd1dee54e80>

In [29]:
nonnull_reviews_df = games_df[
    ~pd.isnull(games_df['metacritic_score'])
    & ~pd.isnull(games_df['pct_positive_reviews_all_time'])
]
sns.jointplot(x='metacritic_score', y='pct_positive_reviews_all_time', data=nonnull_reviews_df,
              stat_func=scipy.stats.spearmanr)


Out[29]:
<seaborn.axisgrid.JointGrid at 0x7fd1dcd21320>

Some of those scores are really low... curious what the games are


In [24]:
def find_worst_games(metric):
    return (nonnull_reviews_df.sort_values(
        metric
    )[:5][['game_name', 'pct_positive_reviews_all_time', 'metacritic_score']])

In [25]:
find_worst_games('pct_positive_reviews_all_time')


Out[25]:
game_name pct_positive_reviews_all_time metacritic_score
steam_app_id
210500 Fray: Reloaded Edition 0.0 27.0
210490 Fray: Reloaded Edition 0.0 27.0
256050 Adventure Park 9.0 49.0
238870 Citadels 11.0 20.0
216130 Gemini Wars 13.0 53.0

In [26]:
find_worst_games('metacritic_score')


Out[26]:
game_name pct_positive_reviews_all_time metacritic_score
steam_app_id
238870 Citadels 11.0 20.0
10220 Postal III 41.0 24.0
210500 Fray: Reloaded Edition 0.0 27.0
210490 Fray: Reloaded Edition 0.0 27.0
22310 Rogue Warrior 59.0 29.0

In [49]:
def find_best_games(metric):
    return (nonnull_reviews_df.sort_values(
        metric, ascending=False
    )[:5][['game_name', 'pct_positive_reviews_all_time', 'metacritic_score']])

In [50]:
find_best_games('pct_positive_reviews_all_time')


Out[50]:
game_name pct_positive_reviews_all_time metacritic_score
steam_app_id
267980 Hostile Waters: Antaeus Rising 100.0 80.0
470800 She Remembered Caterpillars 100.0 83.0
375170 Gunpowder 100.0 76.0
373990 Metrico+ 100.0 66.0
366780 Magical Eyes - Red is for Anguish 100.0 77.0

In [51]:
find_best_games('metacritic_score')


Out[51]:
game_name pct_positive_reviews_all_time metacritic_score
steam_app_id
70 Half-Life 96.0 96.0
219 Half-Life 2 96.0 96.0
220 Half-Life 2 96.0 96.0
7710 BioShock™ 94.0 96.0
7670 BioShock™ 94.0 96.0