In [1]:
import sys
import os
sys.path.append("/Users/ajeetjha/sandbox/sherlock/blackops/scripts")
import lib.dbUtil as dbUtil
import lib.genericUtil as gUtil
import lib.mongoUtil as mUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint

In [2]:
startDate = date(2017, 3, 2)
# for W1 keep the day as 14 days older to current date
#

In [3]:
'''
Possible features
max_chips/1Lakh
win/boot :avg log(win/boot),  a skilled player will win big whenever he wins
total number of games
try total friends playing game : mUtil.getAllFbPlaying(<pid>)
total friends : gUtil.getFbFriendsCount(<pid>)
log(max_chips)

'''


Out[3]:
'\nPossible features\nmax_chips/1Lakh\nwin/boot :avg log(win/boot),  a skilled player will win big whenever he wins\ntotal number of games\ntry total friends playing game : mUtil.getAllFbPlaying(<pid>)\ntotal friends : gUtil.getFbFriendsCount(<pid>)\nlog(max_chips)\n\n'

In [4]:
getInstallDayGameDataQ = '''select t3.pid, t3.chaals, t3.wins, t3.blinds, t3.max_chips , t4.times_loaded, avg_win_by_boot,avg_loss_by_boot, num_games
from
        (
        select t2.*
        from
                (select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$' and game_id = 3 ) as t1
                join
                (select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds , max(final::bigint) as max_chips , avg(win_by_boot) as avg_win_by_boot, avg(abs(loss_by_boot)) as avg_loss_by_boot, count(distinct genus) as num_games
                from  
                (       select t11.* ,case when t11.count < 0 then abs(t11.count/boot) else  0 end   as loss_by_boot, case when t11.count > 0 then (t11.count/boot) else  0 end   as win_by_boot
                        from
                        (select * from m_table_tpg_cash where time between '${date}$' -1 and '${date}$'  and phylum = 'game' and class = 'public') as t11
                        join
                        (select pid, genus, sum(abs(count)) as boot, min(time) as game_time from m_table_economy_cash where phylum = 'boot' and time between '${date}$' -1 and '${date}$' and class = 'public' group by pid, genus) as t12
                        on t11.pid = t12.pid and t11.genus = t12.genus ) group by pid)as t2
        on t1.pid = t2.pid
        ) as t3
left join
        (select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$'  and '${date}$' + 1 and game_id = 3 group by pid  ) as t4
on t3.pid = t4.pid
'''

getFirst5GameDataDayQ = '''
select t3.*, t4.times_loaded
from
        (
        select t2.*
        from
                (select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
                join
                (select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds 
                from  
                (select pid, num_chaals, num_wins, num_blinds , ROW_NUMBER () OVER ( PARTITION BY pid ORDER BY m_table_tpg_cash.time, genus) as game_number from  m_table_tpg_cash where  phylum = 'game' and time between '${date}$' - 1 and '${date}$')
                where game_number < 6
                group by pid )as t2
        on t1.pid = t2.pid
        ) as t3
left join
        (select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$'  and '${date}$' + 1 and game_id = 3 group by pid  ) as t4
on t3.pid = t4.pid
'''

getFirst5GameDataWeekQ= '''
select t3.*, t4.times_loaded
from
        (
        select t2.*
        from
                (select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
                join
                (select pid, round(sum(num_chaals) *1.0 /(sum(num_blinds) +1), 5)  as chaal_by_blind, avg(win_by_boot) as avg_win_by_boot, avg(abs(loss_by_boot)) as avg_loss_by_boot
                from  
                (       select * ,case when count < 0 then abs(count/boot) else  0 end   as loss_by_boot, case when count > 0 then (count/boot) else  0 end   as win_by_boot
                        from
                                ( select t11.* , boot, ROW_NUMBER () OVER ( PARTITION BY t11.pid ORDER BY game_time, t12.genus) as game_number
                                
                                from
                                     (   (select * from m_table_tpg_cash where time between '${date}$' -1 and '${date}$'  and phylum = 'game' and class = 'public') as t11
                                        join
                                        (select pid, genus, sum(abs(count)) as boot, min(time) as game_time from m_table_economy_cash where phylum = 'boot' and time between '${date}$' -1 and '${date}$' and class = 'public' group by pid, genus) as t12
                                        on t11.pid = t12.pid and t11.genus = t12.genus 
                                      ) 
                                 )
                                 where game_number < 6
                  )
                  group by pid )as t2
        on t1.pid = t2.pid
        ) as t3
left join
        (select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' + 6 and '${date}$' + 13 and game_id = 3 group by pid  ) as t4
on t3.pid = t4.pid
'''

getInstallDayGameDataW1ReturnQ = '''
select t3.pid, t3.chaals, t3.wins, t3.blinds, t3.max_chips , t4.times_loaded
from
        (
        select t2.*
        from
                (select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
                join
                (select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds , max(final::bigint) as max_chips from  m_table_tpg_cash where  phylum = 'game' and time between '${date}$' - 1 and '${date}$' group by pid )as t2
        on t1.pid = t2.pid
        ) as t3
left join
        (select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' + 6 and '${date}$' + 13 and game_id = 3 group by pid  ) as t4
on t3.pid = t4.pid
'''

In [6]:
def getChipsWonRatioByGames(start = startDate):
    data = []
    cols = ['pid', 'chaals','wins', 'blinds', 'max_chips','times_loaded', 'avg_win_by_boot','avg_loss_by_boot', 'num_games', 'gameFriends','fbFriends']
    query = getInstallDayGameDataQ.replace('${date}$', str(start))
    #print(query)
    count = 0
    result = dbUtil.runQuery(query)
    count = count+1
    total = result.rowcount
    row = result.fetchone()
    while row != None:
        if row[5] == None:
            tl = 0
        else:
            tl = 1
        rowData = (row[0], row[1], row[2],row[3], row[4], tl, row[6], row[7], int(row[8]) if row[8] != None else 0, mUtil.getAllFbPlaying(str(row[0])), gUtil.getFbFriendsCount(str(row[0])))
        data.append(rowData)
        #print(rowData)
        print(("Done: ", count, total))
        count = count+1
        row = result.fetchone()
        
    df = pd.DataFrame(data, columns = cols)
    
    return df

def getFirst5GameData(start = startDate):
    data = []
    cols = ['pid', 'chaal_by_blind','avg_win_by_boot','avg_loss_by_boot','times_loaded']
    query = getFirst5GameDataDayQ.replace('${date}$', str(start))
    #print(query)
    result = dbUtil.runQuery(query)
    row = result.fetchone()
    while row != None:
        if row[4] == None:
            tl = 0
        else:
            tl = 1
        data.append((row[0], float(row[1]), int(row[2]),int(row[3]), int(tl)))
        #print((ratio, nGames))
        row = result.fetchone()
    df = pd.DataFrame(data, columns = cols)
    
    return df

def getChipsWonRatioByGames_OLD(start = startDate):
    data = []
    cols = ['pid', 'chaals','wins', 'blinds', 'max_chips','times_loaded']
    query = getInstallDayGameDataW1ReturnQ.replace('${date}$', str(start))
    #print(query)
    count = 0
    result = dbUtil.runQuery(query)
    count = count+1
    total = result.rowcount
    row = result.fetchone()
    while row != None:
        if row[5] == None:
            tl = 0
        else:
            tl = 1
        rowData = (row[0], row[1], row[2],row[3], row[4], tl)
        data.append(rowData)
        #print(rowData)
        print(("Done: ", count, total))
        count = count+1
        row = result.fetchone()
        
    df = pd.DataFrame(data, columns = cols)
    
    return df

In [7]:
df = getFirst5GameData(startDate)

In [8]:
gUtil.dumpPDtoCsv(df,'d1_w1_data/first5_day_2017_3_2_D1.csv')

In [ ]: