In [3]:
import lib.dbUtil as dbUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint
import pickle
from tqdm import tqdm, tqdm_notebook
import sys
import os

In [4]:
'''
get pids
for all pids
    get all games played
    collect all games
    for each game
        mark boot
        mark blind with lowest val
        mark chaals with value, ordered by chips
        mark win/loss
'''
getPidsGplayQ = "select distinct pid from m_table_installs_by_pid where time between '{0}' and '{0}' + 1  and installos = 'gplay' and game_id = 3"
getLast10GamesQ = "select pid, genus, min(time) as game_time from m_table_economy_cash where pid = {0} and  time between '{1}' and '{1}' + 1 and phylum = 'boot' and game_id = 3 group by pid, genus order by game_time desc limit 10"
getMovesQ = "select count, phylum from m_table_economy_cash where time between '{2}' and '{2}' + 1 and pid = {0} and genus = '{1}' and game_id = 3 "
getPidsReturn= '''select i_pid
from
        (select distinct pid as i_pid from m_table_installs_by_pid where time between '{0}' and '{0}' + 1  and installos = 'gplay' and game_id = 3)
join
        (select distinct pid as l_pid from m_table_count_user_load where time between '{0}' + 1 and '{0}' + 7  and installos = 'gplay' and game_id = 3)
        
on i_pid = l_pid '''

In [5]:
# column structure = ["boot", "blind", "+blind", "chaal", "+chaal", "win", "loose"]
    
def parseMoves(pid, genus, startDate):
    result = dbUtil.runQuery(getMovesQ.format(pid, genus, str(startDate)))
    moves = {'boot':[], 'blind':[],'chaal':[],'win':[]}
    row = result.fetchone()
    while row != None:
        #print(row)
        if 'boot' in row[1]:
            moves['boot'].append(-1* int(row[0]))
        elif 'blind' in row[1]:
            moves['blind'].append(-1* int(row[0]))
        elif 'chaal' in row[1] or 'show' in row[1]:
            moves['chaal'].append(-1* int(row[0]))
        elif 'win' in row[1]:
            moves['win'].append(-1* int(row[0]))
        row = result.fetchone()
    print(moves)
    data = []
    cols = ["boot", "blind", "blind_", "chaal", "chaal_", "win", "loose"]
    # ordering boot
    data.append((1,0,0,0,0,0,0))
    
    # ordering blind
    blindMoves = moves['blind']
    blindMoves.sort()
    blindM = (0,1,0,0,0,0,0)
    blindPlusM = (0,0,1,0,0,0,0)
    for i, _ in enumerate(blindMoves):
        if i == 0:
            data.append(blindM)
        else:
            if blindMoves[i-1] < blindMoves[i]:
                data.append(blindPlusM)
            else:
                data.append(blindM)
    # ordering chaal
    chaalMoves = moves['chaal']
    chaalMoves.sort()
    chaalM = (0,0,0,1,0,0,0)
    chaalPlusM = (0,0,0,0,1,0,0)
    for i, _ in enumerate(chaalMoves):
        if i == 0:
            data.append(chaalM)
        else:
            if chaalMoves[i-1] < chaalMoves[i]:
                data.append(chaalPlusM)
            else:
                data.append(chaalM)
    # ordering win/loose
    if len(moves['win']) >0:
        data.append((0,0,0,0,0,1,0))
    else:
        data.append((0,0,0,0,0,0,1))
    #df = pd.DataFrame(data, columns = cols)
    return data

def collectPidData(pid, startDate):
    pidData = []
    result = dbUtil.runQuery(getLast10GamesQ.format(pid, str(startDate)))
    games = []
    row = result.fetchone()
    while row != None:
        games.append(row[1])
        row = result.fetchone()
    index = len(games) -1 
    while(index >=0):
        gData = parseMoves(pid, games[index], startDate)
        pidData.extend(gData)
        index = index -1
    cols = ["boot", "blind", "blind_", "chaal", "chaal_", "win", "loose"]
    df = pd.DataFrame(pidData, columns = cols)
    return df
def getAllInstallPids(startDate):
    iPids = []
    result = dbUtil.runQuery(getPidsGplayQ.format(str(startDate)))
    row = result.fetchone()
    while row != None:
        iPids.append(row[0])
        row = result.fetchone()
    print(len(iPids), " Install pids found")
    for i, pid in enumerate(iPids):
        df = collectPidData(pid, startDate)
        df.to_csv(os.path.join("../data", str(pid) + ".csv"))

In [6]:
def getIsReturnData(startDate):
    iPids = []
    result = dbUtil.runQuery(getPidsGplayQ.format(str(startDate)))
    row = result.fetchone()
    while row != None:
        iPids.append(row[0])
        row = result.fetchone()
    
    rPids = []
    result = dbUtil.runQuery(getPidsReturn.format(str(startDate)))
    row = result.fetchone()
    while row != None:
        rPids.append(row[0])
        row = result.fetchone()
    returnData = []
    for pid in iPids:
        if pid in rPids:
            returnData.append((pid, 1))
        else:
            returnData.append((pid, 0))
    cols = ["pid", "isReturn"]
    df = pd.DataFrame(returnData, columns = cols)
    return df

In [17]:
def getAllInstallPidsV2(startDate):
    iPidFile = os.path.join("../data",str(startDate),str(startDate)+".csv")
    idf = pd.read_csv(iPidFile)
    pids = idf['pid']
    for i, pid in enumerate(pids):
        if os.path.isfile(os.path.join("../data",str(startDate),str(pid)+".csv")):
            print (pid, " Already exist..")
        else:
            df = collectPidData(pid, startDate)
            df.to_csv(os.path.join("../data",str(startDate), str(pid) + ".csv"))

In [16]:
getAllInstallPidsV2(date(2017,8,7))


100286021812
10068784163517
100861316455
100260605909
10047461739434
1002118592407
1006332032555
10000571018529
1007270615910
100274702845
1002194101959
100646647738
100848533877
100356170777
100814772849
1005863096436

In [7]:
df = getIsReturnData(date(2017,8,7))
df.to_csv(os.path.join("../data", str(date(2017,8,7)) + ".csv"))

In [4]:
getAllInstallPids(date(2017,8,7))


75301  Install pids found
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [12800], 'win': [], 'blind': [100, 400, 1600, 3200]}
{'boot': [100], 'chaal': [1600, 1600], 'win': [-10700], 'blind': [100, 200, 400, 800]}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [400], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [1600, 6400, 12800, 12800, 12800], 'win': [], 'blind': [200]}
{'boot': [50], 'chaal': [], 'win': [], 'blind': []}
{'boot': [50], 'chaal': [1600], 'win': [], 'blind': [100]}
{'boot': [50], 'chaal': [], 'win': [], 'blind': []}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [1000, 4000, 16000, 32000]}
{'boot': [500], 'chaal': [64000], 'win': [], 'blind': [2000, 8000, 32000, 32000]}
{'boot': [500], 'chaal': [64000], 'win': [], 'blind': [1000, 4000, 16000, 32000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [2000, 8000, 16000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [1000, 4000, 16000, 16000]}
{'boot': [100], 'chaal': [100, 400], 'win': [-1200], 'blind': []}
{'boot': [100], 'chaal': [400], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [100], 'win': [-500], 'blind': []}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [400], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [100], 'win': [-500], 'blind': []}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [100], 'chaal': [], 'win': [], 'blind': []}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [1000]}
{'boot': [500], 'chaal': [2000, 4000, 16000, 32000, 64000], 'win': [-245500], 'blind': []}
{'boot': [500], 'chaal': [32000], 'win': [], 'blind': [1000, 4000, 8000, 16000]}
{'boot': [500], 'chaal': [], 'win': [-47000], 'blind': [2000, 4000, 8000]}
{'boot': [500], 'chaal': [8000, 16000, 32000, 32000], 'win': [], 'blind': [1000]}
{'boot': [500], 'chaal': [4000, 8000, 8000], 'win': [-49500], 'blind': [1000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [2000]}
{'boot': [500], 'chaal': [32000], 'win': [-100500], 'blind': [1000, 2000, 4000]}
{'boot': [500], 'chaal': [4000, 8000, 16000, 32000, 64000], 'win': [-239000], 'blind': [1000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': []}
{'boot': [500], 'chaal': [64000], 'win': [], 'blind': [2000, 16000, 32000]}
{'boot': [500], 'chaal': [64000], 'win': [-389500], 'blind': [2000, 16000, 32000, 32000]}
{'boot': [500], 'chaal': [64000, 64000], 'win': [-453000], 'blind': [4000, 32000, 32000, 32000]}
{'boot': [500], 'chaal': [64000, 64000], 'win': [-529000], 'blind': [1000, 16000, 32000]}
{'boot': [500], 'chaal': [64000], 'win': [-177000], 'blind': [4000, 16000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [4000, 16000]}
{'boot': [500], 'chaal': [], 'win': [-193500], 'blind': [2000, 8000, 32000, 32000]}
{'boot': [500], 'chaal': [8000], 'win': [], 'blind': [1000, 4000, 4000, 4000]}
{'boot': [500], 'chaal': [64000], 'win': [-193000], 'blind': [1000, 16000]}
{'boot': [500], 'chaal': [], 'win': [], 'blind': [1000]}
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-4-d1502770b8f3> in <module>()
----> 1 getAllInstallPids(date(2017,8,7))

<ipython-input-3-8c8c9b987e6a> in getAllInstallPids(startDate)
     81     print(len(iPids), " Install pids found")
     82     for i, pid in enumerate(iPids):
---> 83         df = collectPidData(pid, startDate)
     84         df.to_csv(os.path.join("../data", str(pid) + ".csv"))
     85         if i == 10:

<ipython-input-3-8c8c9b987e6a> in collectPidData(pid, startDate)
     66     index = len(games) -1
     67     while(index >=0):
---> 68         gData = parseMoves(pid, games[index], startDate)
     69         pidData.extend(gData)
     70         index = index -1

<ipython-input-3-8c8c9b987e6a> in parseMoves(pid, genus, startDate)
      2 
      3 def parseMoves(pid, genus, startDate):
----> 4     result = dbUtil.runQuery(getMovesQ.format(pid, genus, str(startDate)))
      5     moves = {'boot':[], 'blind':[],'chaal':[],'win':[]}
      6     row = result.fetchone()

/Users/ajeetjha/sandbox/honest-blackops/src/lib/dbUtil.py in runQuery(query)
     23         conn = connect()
     24     cur = conn.cursor()
---> 25     cur.execute(query)
     26 
     27     return cur

<string> in __new__(_cls, name, type_code, display_size, internal_size, precision, scale, null_ok)

KeyboardInterrupt: