In [1]:
from __future__ import print_function
import unicodecsv as csv
import json
import re
import requests
import time

In [2]:
# this gets updated every day 
data = requests.get("http://api.steampowered.com/ISteamApps/GetAppList/v0001/").json()
apps = data['applist']['apps']['app']
len(apps)


Out[2]:
24466

In [3]:
last_appid_read = 0

for app in apps:
    appid = app['appid']
    
    last_appid_read += 1
    
    if int(appid) == 460010:
        break

last_appid_read


Out[3]:
24464

In [4]:
apps[24464]


Out[4]:
{u'appid': 460040, u'name': u'Empires Dedicated Server'}

In [5]:
apps[0]


Out[5]:
{u'appid': 5, u'name': u'Dedicated Server'}

In [6]:
def clean_text(input_str):
    input1 = re.sub('\r|\n|\r\n','',input_str)
    input2 = re.sub('<[^>]+>','',input1)
    input3 = re.sub('\s+',' ',input2)
    input4 = input3.lower()
    
    # no need for more preprocessing because sklearn's tfidfvectorizer will clean it up prior to
    # featurizing
    
    return input4

In [7]:
base_url = "http://store.steampowered.com/api/appdetails?appids="

apps_remaining = apps[last_appid_read:]

with open('../../data/steam/data.csv','a') as csvfile:
    fieldnames = ['appid','type','name','detailed_description','about_the_game','categories','genres']
    writer = csv.DictWriter(csvfile,fieldnames=fieldnames)
    
#     writer.writeheader()
    
    for app in apps_remaining:
        appid = app['appid']
        address = base_url+str(appid)
        
        try:
            data = requests.get(address).json()  
        except requests.ConnectionError:
            print("failed to retrieve appid {0}, let's wait a minute then try again".format(appid))
            # wait a bit then try again
            time.sleep(60)
            try:
                data = requests.get(address).json()
            except requests.ConnectionError:
                print("failed to retrieve appid {0}, skipping".format(appid))
                # but if if fails again give up
                continue
        
        success = data[str(appid)]['success']
              
        if not (success == "True" or success == True):
            
            print("no success for appid {0}, got success={1}".format(appid,success))
            time.sleep(2)
            continue
        
        
        app_data = data[str(appid)]['data']
        
        data_dict = dict()
        data_dict['appid'] = appid      
        data_dict['name'] = app_data['name']
        data_dict['type'] = app_data['type']
        
        try:
            data_dict['detailed_description'] = clean_text(app_data['detailed_description'])          
        except KeyError:
            data_dict['detailed_description'] = ''
        
        try:
            data_dict['about_the_game'] =  clean_text(app_data['about_the_game'])           
        except KeyError:
            data_dict['about_the_game'] = ''
               
        try:
            data_dict['categories'] =   ",".join(map(lambda d: d['description'],app_data['categories']))
        except KeyError:
            data_dict['categories'] = 'Uncategorized'
        
        try:
            data_dict['genres'] = ",".join(map(lambda d: d['description'],app_data['genres']))
        except KeyError:
            data_dict['genres'] = 'Uncategorized'
                       
        writer.writerow(data_dict)          

        time.sleep(3)


no success for appid 460040, got success=False

In [ ]:


In [ ]: