In [1]:
%matplotlib notebook
from matplotlib import pyplot as plt
import matplotlib as mpl
In [2]:
from tqdm import tqdm
In [3]:
from datetime import datetime
In [4]:
import json
In [5]:
import pandas as pd
In [7]:
from pymongo import MongoClient
client = MongoClient("mongodb://zui:F0reverqwerty@localhost:27017/")
db = client['hkns3']
In [8]:
def write_csv(filename, l):
with open(filename, "w") as f:
f.write("i,item_id,time,rank\n")
c = 0
for entry in tqdm(l):
time = entry['time']
for i, v in enumerate(entry['bests']):
rank = i + 1
_id = v
f.write(f"{c},{_id},{time},{rank}\n")
c += 1
In [10]:
ts = list(db['raw'].find({"type":"top"})) + list(db['raw_time'].find({"type":"top"}))
In [11]:
write_csv("tops.csv", ts)
In [12]:
df_ts = pd.read_csv("tops.csv", index_col=0)
In [14]:
del ts
In [16]:
df_ts_unique = df_ts['item_id'].unique()
In [68]:
def get_time_series(_id):
dfs = df_ts[df_ts['item_id'] == _id][['time', 'rank']]
dfs['ntime'] = ((dfs['time']).astype(int))
rankings = []
times = []
for x in dfs[['ntime', 'rank']].values:
times.append((datetime.fromtimestamp(x[0],tz=BOSTON).isoformat()))
rankings.append(int(x[1]))
return {
"id": _id,
"ranking": rankings,
"times": times,
}
def get_time_series_alt(_id):
dfs = df_ts[df_ts['item_id'] == _id][['time', 'rank']]
dfs['dtime'] = (pd.to_datetime(dfs['time']*10**9))
return dfs[['dtime', 'rank']].to_dict(orient='list')
def insert_timeseries():
col = db['time_series2']
for _id in tqdm(set(df_ts_unique)):
_id = int(_id)
ts = get_time_series(_id)
col.insert_one(ts)
In [69]:
insert_timeseries()
In [61]:
get_time_series_alt(14352386)['dtime'][0].to_pydatetime().isoformat()
Out[61]:
In [62]:
datetime.now(tz=BOSTON).isoformat()
Out[62]:
In [67]:
get_time_series(14352386)
Out[67]:
In [ ]:
In [48]:
import pytz
In [51]:
BOSTON = pytz.timezone('America/New_York')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
df_times = pd.DataFrame(list(db['raw'].find({"type":"max_id"})))
In [ ]:
df_times['datetime'] = pd.to_datetime(df_times['time']*10**9)
In [ ]:
df_times
In [ ]:
df_times.plot(x='datetime', y='test')
In [ ]:
df_times.set_value(0, 'test', 0)
In [ ]:
def derivative_time():
prev_id = 14349758
prev_time = 1494944438.3075223
for row in df_times.itertuples():
index = row[0]
if index > 0:
time_diff = (row[3] - prev_time)
id_dff = row[2] - prev_id
df_times.set_value(index, 'time_diff',(time_diff*1.0))
df_times.set_value(index, 'test',(id_dff/time_diff*1.0))
prev_id = row[2]
prev_time = row[3]
In [ ]:
derivative_time()
In [ ]:
df_times
In [ ]:
df_times['time'][0]
In [ ]:
df_times['time_diff'] = pd.Series()
In [ ]:
bs = list(db['raw'].find({"type":"best"}))
In [ ]:
len(bs[0]['bests'])
In [ ]:
df_bs = pd.DataFrame(columns=['_id', 'time', 'rank'])
In [ ]:
df_bs.append({'_id': 1, 'time': 0, 'rank': 0}, ignore_index=True)
In [ ]:
df_bs
In [ ]:
with open("bests.csv", "w") as b:
b.write("i,item_id,time,rank\n")
c = 0
for entry in tqdm(bs):
time = entry['time']
for i, v in enumerate(entry['bests']):
rank = i + 1
_id = v
b.write(f"{c},{_id},{time},{rank}\n")
c += 1
In [ ]:
df_bs = pd.read_csv("bests.csv", index_col=0)
In [ ]:
df_bs[df_bs['item_id'] == 14326439]
In [ ]:
df_bs.groupby('item_id').size().sort_values()
In [ ]:
df_bs['datetime'] = pd.to_datetime(df_bs['time']*10**9)
In [ ]:
df_bs[df_bs['item_id'] == 14330547].plot(x='datetime', y='rank')
In [ ]:
df_bs
Now let do for top
In [ ]:
ts = list(db['raw'].find({"type":"top"}))
with open("tops.csv", "a") as tf:
# tf.write("i,item_id,time,rank\n")
c = 2355047
for entry in tqdm(ts):
time = entry['time']
for i, v in enumerate(entry['bests']):
rank = i + 1
_id = v
tf.write(f"{c},{_id},{time},{rank}\n")
c += 1
In [7]:
df_ts = pd.read_csv("tops.csv", index_col=0)
In [ ]:
df_ts['datetime'] = pd.to_datetime(df_ts['time']*10**9)
In [ ]:
plt.gca().invert_yaxis()
In [ ]:
df_ts.groupby('item_id').size().sort_values()
In [ ]:
df_ts_ts2 = df_ts[df_ts['item_id'] == 14218408][['time', 'rank']].set_index('time')
In [8]:
df_ts_unique = df_ts['item_id'].unique()
In [ ]:
In [ ]:
json.dumps(df_ts_ts2.to_dict())
In [ ]:
df_ts_ts2 = df_ts[df_ts['item_id'] == 14218408][['time', 'rank']]
In [ ]:
df_ts_ts2['ntime'] = ((df_ts_ts2['time']*10**5).astype(int))
In [ ]:
bb = [(str(x[0]), int(x[1])) for x in df_ts_ts2[['ntime', 'rank']].values]
In [ ]:
json.dumps({"rankings": bb})
In [ ]:
str(df_ts_ts2[['ntime', 'rank']].values[1][0])
In [ ]:
bb
In [13]:
def get_time_series(_id):
dfs = df_ts[df_ts['item_id'] == _id][['time', 'rank']]
dfs['ntime'] = ((dfs['time']*10**5).astype(int))
bb = [(str(x[0]), int(x[1])) for x in dfs[['ntime', 'rank']].values]
return {
"id": _id,
"rankings": bb
}
def insert_timeseries():
col = db['time_series']
for _id in tqdm(df_ts_unique):
_id = int(_id)
ts = get_time_series(_id)
col.insert_one(ts)
In [15]:
insert_timeseries()
In [ ]: