In [1]:
import sys
import os
sys.path.append("/Users/ajeetjha/sandbox/sherlock/blackops/scripts")
import lib.dbUtil as dbUtil
import lib.genericUtil as gUtil
import lib.mongoUtil as mUtil
import pandas as pd
import numpy as np
from datetime import datetime, date, time, timedelta
import pprint
In [2]:
startDate = date(2017, 3, 2)
# for W1 keep the day as 14 days older to current date
#
In [3]:
'''
Possible features
max_chips/1Lakh
win/boot :avg log(win/boot), a skilled player will win big whenever he wins
total number of games
try total friends playing game : mUtil.getAllFbPlaying(<pid>)
total friends : gUtil.getFbFriendsCount(<pid>)
log(max_chips)
'''
Out[3]:
In [4]:
getInstallDayGameDataQ = '''select t3.pid, t3.chaals, t3.wins, t3.blinds, t3.max_chips , t4.times_loaded, avg_win_by_boot,avg_loss_by_boot, num_games
from
(
select t2.*
from
(select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$' and game_id = 3 ) as t1
join
(select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds , max(final::bigint) as max_chips , avg(win_by_boot) as avg_win_by_boot, avg(abs(loss_by_boot)) as avg_loss_by_boot, count(distinct genus) as num_games
from
( select t11.* ,case when t11.count < 0 then abs(t11.count/boot) else 0 end as loss_by_boot, case when t11.count > 0 then (t11.count/boot) else 0 end as win_by_boot
from
(select * from m_table_tpg_cash where time between '${date}$' -1 and '${date}$' and phylum = 'game' and class = 'public') as t11
join
(select pid, genus, sum(abs(count)) as boot, min(time) as game_time from m_table_economy_cash where phylum = 'boot' and time between '${date}$' -1 and '${date}$' and class = 'public' group by pid, genus) as t12
on t11.pid = t12.pid and t11.genus = t12.genus ) group by pid)as t2
on t1.pid = t2.pid
) as t3
left join
(select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' and '${date}$' + 1 and game_id = 3 group by pid ) as t4
on t3.pid = t4.pid
'''
getFirst5GameDataDayQ = '''
select t3.*, t4.times_loaded
from
(
select t2.*
from
(select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
join
(select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds
from
(select pid, num_chaals, num_wins, num_blinds , ROW_NUMBER () OVER ( PARTITION BY pid ORDER BY m_table_tpg_cash.time, genus) as game_number from m_table_tpg_cash where phylum = 'game' and time between '${date}$' - 1 and '${date}$')
where game_number < 6
group by pid )as t2
on t1.pid = t2.pid
) as t3
left join
(select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' and '${date}$' + 1 and game_id = 3 group by pid ) as t4
on t3.pid = t4.pid
'''
getFirst5GameDataWeekQ= '''
select t3.*, t4.times_loaded
from
(
select t2.*
from
(select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
join
(select pid, round(sum(num_chaals) *1.0 /(sum(num_blinds) +1), 5) as chaal_by_blind, avg(win_by_boot) as avg_win_by_boot, avg(abs(loss_by_boot)) as avg_loss_by_boot
from
( select * ,case when count < 0 then abs(count/boot) else 0 end as loss_by_boot, case when count > 0 then (count/boot) else 0 end as win_by_boot
from
( select t11.* , boot, ROW_NUMBER () OVER ( PARTITION BY t11.pid ORDER BY game_time, t12.genus) as game_number
from
( (select * from m_table_tpg_cash where time between '${date}$' -1 and '${date}$' and phylum = 'game' and class = 'public') as t11
join
(select pid, genus, sum(abs(count)) as boot, min(time) as game_time from m_table_economy_cash where phylum = 'boot' and time between '${date}$' -1 and '${date}$' and class = 'public' group by pid, genus) as t12
on t11.pid = t12.pid and t11.genus = t12.genus
)
)
where game_number < 6
)
group by pid )as t2
on t1.pid = t2.pid
) as t3
left join
(select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' + 6 and '${date}$' + 13 and game_id = 3 group by pid ) as t4
on t3.pid = t4.pid
'''
getInstallDayGameDataW1ReturnQ = '''
select t3.pid, t3.chaals, t3.wins, t3.blinds, t3.max_chips , t4.times_loaded
from
(
select t2.*
from
(select distinct pid from m_table_installs_by_pid where time between '${date}$' - 1 and '${date}$') as t1
join
(select pid, sum(num_chaals) as chaals, sum(num_wins) as wins, sum(num_blinds) as blinds , max(final::bigint) as max_chips from m_table_tpg_cash where phylum = 'game' and time between '${date}$' - 1 and '${date}$' group by pid )as t2
on t1.pid = t2.pid
) as t3
left join
(select pid, count(*) as times_loaded from m_table_count_user_load where time between '${date}$' + 6 and '${date}$' + 13 and game_id = 3 group by pid ) as t4
on t3.pid = t4.pid
'''
In [6]:
def getChipsWonRatioByGames(start = startDate):
data = []
cols = ['pid', 'chaals','wins', 'blinds', 'max_chips','times_loaded', 'avg_win_by_boot','avg_loss_by_boot', 'num_games', 'gameFriends','fbFriends']
query = getInstallDayGameDataQ.replace('${date}$', str(start))
#print(query)
count = 0
result = dbUtil.runQuery(query)
count = count+1
total = result.rowcount
row = result.fetchone()
while row != None:
if row[5] == None:
tl = 0
else:
tl = 1
rowData = (row[0], row[1], row[2],row[3], row[4], tl, row[6], row[7], int(row[8]) if row[8] != None else 0, mUtil.getAllFbPlaying(str(row[0])), gUtil.getFbFriendsCount(str(row[0])))
data.append(rowData)
#print(rowData)
print(("Done: ", count, total))
count = count+1
row = result.fetchone()
df = pd.DataFrame(data, columns = cols)
return df
def getFirst5GameData(start = startDate):
data = []
cols = ['pid', 'chaal_by_blind','avg_win_by_boot','avg_loss_by_boot','times_loaded']
query = getFirst5GameDataDayQ.replace('${date}$', str(start))
#print(query)
result = dbUtil.runQuery(query)
row = result.fetchone()
while row != None:
if row[4] == None:
tl = 0
else:
tl = 1
data.append((row[0], float(row[1]), int(row[2]),int(row[3]), int(tl)))
#print((ratio, nGames))
row = result.fetchone()
df = pd.DataFrame(data, columns = cols)
return df
def getChipsWonRatioByGames_OLD(start = startDate):
data = []
cols = ['pid', 'chaals','wins', 'blinds', 'max_chips','times_loaded']
query = getInstallDayGameDataW1ReturnQ.replace('${date}$', str(start))
#print(query)
count = 0
result = dbUtil.runQuery(query)
count = count+1
total = result.rowcount
row = result.fetchone()
while row != None:
if row[5] == None:
tl = 0
else:
tl = 1
rowData = (row[0], row[1], row[2],row[3], row[4], tl)
data.append(rowData)
#print(rowData)
print(("Done: ", count, total))
count = count+1
row = result.fetchone()
df = pd.DataFrame(data, columns = cols)
return df
In [7]:
df = getFirst5GameData(startDate)
In [8]:
gUtil.dumpPDtoCsv(df,'d1_w1_data/first5_day_2017_3_2_D1.csv')
In [ ]: