In [1]:
from collections import namedtuple
import sqlite3
DROP_ALL_TABLES = False
Dora R. is a great investigator, and she has access to a large database that she is always checking and she can add and take from it at any time. Her database is called "A gazeta de Geringontzan" - os simply "agazeta.db"
In [2]:
conn = sqlite3.connect('agazeta.db')
In [3]:
c = conn.cursor()
Dora R. has a setup, in case she is creating a new database she enters those small keys to have some meat in her sandwich.
Also, she is going to run her investigation skills to understand what is happening on the historics of people, but first things have to work.
Dora starts by seeing if she can get back the first 2 api's from the data base.
In [44]:
try:
c.execute('SELECT * FROM apikeys LIMIT 1')
except Exception, e:
c.execute( 'CREATE TABLE apikeys (id integer primary key, username text, token text, working integer, email text, subscribed integer)')
print 'Look! It is working! I have the top two api usernames and tokens from a gazeta!'
#for row in c.execute('SELECT username FROM apikeys LIMIT 2'):
# print row
She is very excited, it is working! First thing she grabs her pen and paper and starts to look for special things on the api's, maybe she would find something interesting.
In [5]:
APIkey = namedtuple('APIkey', ['username', 'token'])
for row in c.execute('SELECT username, token FROM apikeys WHERE working == 1'):
APIkey.username = row[0]
APIkey.token = row[1]
She gets everything read with the last of the username and tokens
In [6]:
get = 'https://trackobot.com/profile/history.json?page={page}&username={username}&token={token}'.format(page=1,
username=APIkey.username,
token=APIkey.token)
In [7]:
import urllib
import json
Dora R. hates when her data comes with a "u" in front, it gets messy and hard to read, so it is pretty much manicure.
In [8]:
def json_load_byteified(file_handle):
return _byteify(
json.load(file_handle, object_hook = _byteify),
ignore_dicts = True
)
def json_loads_byteified(json_text):
return _byteify(
json.loads(json_text, object_hook = _byteify),
ignore_dicts = True
)
def _byteify( data, ignore_dicts = False):
if isinstance(data, unicode):
return data.encode('utf-8')
if isinstance(data, list):
return [ _byteify( item, ignore_dicts=True) for item in data ]
if isinstance(data, dict) and not ignore_dicts:
return {
_byteify(key, ignore_dicts=True): _byteify(value, ignore_dicts=True)
for key, value in data.iteritems()
}
return data
In [9]:
response = urllib.urlopen(get)
In [10]:
resp = response.read()
In [11]:
data = json_loads_byteified(resp)
Dora R. now wants to see what is inside the 'data' json, its keys and values
In [12]:
data.keys()
Out[12]:
In [13]:
data['meta'].keys()
Out[13]:
In [14]:
data['meta']
Out[14]:
This is the best thing she could find! Here Dora can see how many pages there are, so she can use it to 'spider' around the data. Her eyes shine when she sees all that many possibilities, all those secrets!
In [15]:
data['history'][0].keys()
Out[15]:
With 'history' she sees that here lies what is really important, the games!
Now she will run her small trick that returns the number of turns a game had
In [22]:
def total_turns(data, j):
a = 0
for i in range(len(data['history'][j]['card_history'])):
a = data['history'][j]['card_history'][i]['turn'] if data['history'][j]['card_history'][i]['turn'] > a else a
return a
If she wanted to see who won and with which deck, she would need to do this...
In [23]:
print data['history'][0]['id'],'-', data['history'][0]['added'], data['history'][0]['mode']
print data['history'][0]['hero'], data['history'][0]['hero_deck'], 'vs', data['history'][0]['opponent'], data['history'][0]['opponent_deck']
print 'First player' if not data['history'][0]['coin'] else 'Second player'
print 'Turns', total_turns(data, 0)
print data['history'][0]['result']
Dora R. is also curious about the cards that were used during the game
In [24]:
data['history'][0]['card_history']
Out[24]:
Now Dora is much more confident, she wants to get all the ranked games from this data and this page!
In [25]:
# Only ranked games
for i in range(len(data['history'])):
if data['history'][i]['mode'] != 'ranked':
continue
print data['history'][i]['id'],'-', data['history'][i]['added']
print data['history'][i]['hero'], data['history'][i]['hero_deck'], 'vs', data['history'][i]['opponent'], data['history'][0]['opponent_deck']
print 'First player' if not data['history'][i]['coin'] else 'Second player'
print 'Turns', total_turns(data, i)
print data['history'][i]['result']
It all seens perfect, with just one problem, the datetime is weird... she will have to convert it befor being able to add it to the database.
In [26]:
data['history'][0]['added']
Out[26]:
In [27]:
from datetime import datetime
datetime.strptime('2016-12-02T02:29:09.000Z', '%Y-%m-%dT%H:%M:%S.000Z')
Out[27]:
Dora R. prefers to work with POSIX dates, she says that they are better to make searchs in the tables... maybe she does not know of the julian date calendar.
In [28]:
import time
d = datetime.strptime(data['history'][0]['added'], '%Y-%m-%dT%H:%M:%S.000Z')
print str(int(time.mktime(d.timetuple())))+'!!!',
print "POSIX seconds! This will help alot when I try to find my data in time windows!"
Hell yes! Her tricks do work perfectly, now she can enter the date and time in a more sane order on the database!
Dora R. has enough to be sure that it is possible to get the data that she wants out of the api, now she needs to add into the database.
The API and Keys table is simple, just username and token, along with a working statement, so in case a player regenerates the value we may set this flag as 0 to 'unusable', she finds out it would be great to have the email of the person in the database, so she can talk with the person ans kindly asks for her to donate the key again, and to send her findings directly into the email if they are subscribed.
Now she needs a table for her findings... she will add the game results from a date window from an apikey and load it up to the database, first it will need an ID, for each new entry, also, she will parse the data instead of loading JSON's, she won't be running it relational, but she need the speed and reliability of a proper SQL machine. Also, she is not going to store games that are NOT ranked, casual and arena games can skew too much the data.
In [29]:
try:
c.execute('SELECT * FROM archive LIMIT 1')
print 'Yeap, working'
except Exception, e:
c.execute('''CREATE TABLE archive (id integer primary key, matchid integer,
date_posix integer, rank integer, hero text, hero_deck text, opponent_hero text, opponent_deck text,
coin integer, turns integer, result integer, cards text, opponent_cards text)''')
print 'Table archive is done!'
The setup is good, now Dora R. must take care so she does not add duplicated games to the archive.
First she will make a process that will run through the apikeys and then it will capture the games from each layer and pipeline it to another process that will write it into the archive.
In [30]:
querie = 'SELECT username, token FROM apikeys WHERE working == 1'
In [31]:
def api_getter (verbose=False, limit = 0):
APIkey = namedtuple('APIkey', ['username', 'token'])
querie = 'SELECT username, token FROM apikeys WHERE working == 1'
if limit:
querie = 'SELECT username, token FROM apikeys WHERE working == 1 LIMIT {limit}'.format(limit=limit)
sqlret = [sqlret for sqlret in c.execute(querie)]
for row in sqlret:
APIkey.username = row[0]
APIkey.token = row[1]
if verbose:
print '{username} {token}'.format(username=APIkey.username, token=APIkey.token)
yield (row[0], row[1])
def posix_conversion(date = '2016-12-02T02:29:09.000Z'):
d = datetime.strptime( date, '%Y-%m-%dT%H:%M:%S.000Z')
return int(time.mktime(d.timetuple()))
In [32]:
def Maexxna(from_date, limit=0, verbose=False):
# from_date = datetime
from_date = time.mktime(from_date.timetuple())
get = 'https://trackobot.com/profile/history.json?page={page}&username={username}&token={token}'
for api in api_getter(limit = limit):
page = 1
end = False
try_and_error = 3
data_history =[]
while(True):
try:
response = urllib.urlopen(get.format(page=page, username=api[0], token=api[1]))
try_and_error = 3
except Exception as e:
print 'error', e
if try_and_error:
try_and_error -= 1
continue
else:
break
resp = response.read()
data = json_loads_byteified(resp)
if verbose:
print 'game historic', len(data['history']),
for n in range(len(data['history'])):
if posix_conversion(data['history'][n]['added']) < from_date:
del data['history'][n:len(data['history'])]
end = True
break
if verbose: print 'valid games',len(data['history'])
data_history += data['history']
# end of the loop
if data['meta']['total_pages'] == data['meta']['current_page'] or end:
break
page += 1
yield data_history
Maexxna is ready to run, Dora R. makes a last check to see if it can return stuff properly.
In [33]:
from datetime import timedelta
d = datetime.today() - timedelta(days=7)
for i in Maexxna(from_date=d):
print len(i)
print 'Done'
'It is working perfectly!!!' Dora yells, her function works and returns what she needs, altought it may return "empty" history, but thats something she may deal with without much problem. Now she must add those informations to the archive
In [34]:
import pandas as pd
def Starseeker(verbose = False, iterator = False):
def _turns(data):
a = 0
for i in range(len(data)):
a = data[i]['turn'] if data[i]['turn'] > a else a
return a
for files in Maexxna(from_date = datetime.today() - timedelta(days=7)):
df = pd.DataFrame(columns=['id','matchid', 'date_posix', 'rank', 'hero', 'hero_deck',
'opponent_hero', 'opponent_deck', 'coin', 'turns', 'result',
'cards', 'opponent_cards'
])
index = 0
match = []
for i in range(len(files)):
# Only ranked games are going to be stored
if files[i]['mode'] != 'ranked':
continue
game = files[i]
my_cards = []
opponent_cards = []
for card in game['card_history']:
if card['player'] == 'me':
my_cards.append(card['card']['id'])
else:
opponent_cards.append(card['card']['id'])
my_cards = ', '.join(my_cards)
opponent_cards = ', '.join(opponent_cards)
df.loc[index] = [ int(game['id']),int(game['id']), posix_conversion(game['added']),
None if game['rank'] is None else int(game['rank']) , game['hero'], game['hero_deck'],
game['opponent'], game['opponent_deck'], 1 if game['coin'] else 0,
_turns(game['card_history']), 1 if game['result'] == 'win' else 0,
my_cards, opponent_cards ]
df = df.fillna(method='ffill')
index += 1
match = map(tuple, df.values)
sql = 'INSERT OR REPLACE INTO archive VALUES ((SELECT id FROM archive WHERE matchid == ?)'+',?'*12+')'
c.executemany( sql , match)
In [35]:
Starseeker()
In [36]:
c.execute('SELECT * from archive')
[description[0] for description in c.description]
Out[36]:
In [37]:
val = [val[1:] for val in c.execute('SELECT * from archive')]
cols = [description[0] for description in c.description]
dt = pd.DataFrame(val, columns = cols[1:])
In [38]:
dt
Out[38]:
In [39]:
heros = [b for b in dt['opponent_hero'].unique()]
In [40]:
for h in heros:
_h = dt[dt['hero'] == h]['result']
_o = dt[dt['opponent_hero'] == h]['result']
_o = pd.Series([0 if o else 1 for o in _o.values])
print h, _h.sum()+_o.sum(), len(_h)+len(_o), float(_h.sum()+_o.sum())/ float(len(_h)+len(_o))
In [41]:
pd.Series([0 if o else 1 for o in _o.values])
Out[41]:
In [42]:
decks = [b for b in dt['opponent_hero'].unique()]
for h in heros:
_h = list(dt[dt['hero'] == h]['hero_deck'].unique())
_o = list(dt[dt['opponent_hero'] == h]['opponent_deck'].unique())
_h = list(set(_h+_o))
print h, _h
In [43]:
list(_o) + list(_h)
Out[43]:
In [ ]: