In [2]:
import pandas as pd
import os
In [3]:
dfs = []
for d in os.scandir('results/top-words'):
df_read = pd.read_csv(d.path, names=['word', 'score'])
df_read['date'] = d.name
dfs.append(df_read)
In [4]:
df = pd.concat(dfs)
In [5]:
df.shape
Out[5]:
In [6]:
df[df.date.str.startswith('2017-07-19')].head()
Out[6]:
In [12]:
date_sums = df.groupby('date').score.apply(lambda x: x/sum(x))
In [21]:
df['place'] = df.groupby('date').score.cumcount()
In [15]:
df['prop'] = date_sums
In [39]:
scores = df.groupby('word').place.mean()
In [41]:
scores.sort_values().head(20)
Out[41]:
In [12]:
df.groupby('date').head(3)
Out[12]: