Subreddit disabled button on 2015-03-19, thread here
Google BigQuery
SELECT author, num_comments, score, ups, downs, gilded, created_utc FROM [fh-bigquery:reddit_posts.full_corpus_201509]
WHERE created BETWEEN 1425168000 AND 1427846400
AND subreddit = 'Swingers'
In [6]:
!pip install bokeh
import pandas as pd
import seaborn as sns
from bokeh.charts import TimeSeries, output_file, show
%matplotlib inline
In [7]:
posts_df = pd.DataFrame.from_csv("reddit_posts_swingers_201503.csv")
In [8]:
posts_df[0:5]
Out[8]:
In [9]:
posts_df['created'] = pd.to_datetime(posts_df.created_utc, unit='s')
posts_df['created_date'] = posts_df.created.dt.date
In [10]:
posts_df['downs'] = posts_df.score - posts_df.ups
In [11]:
posts_time_ups = posts_df.set_index('created_date').ups.sort_index()
posts_time_ups[0:5]
Out[11]:
In [12]:
posts_date_df = posts_df.set_index('created').sort_index()
posts_date_df[0:5]
Out[12]:
In [13]:
posts_groupby = posts_date_df.groupby([pd.TimeGrouper('1D', closed='left')])
In [14]:
posts_groupby.mean().num_comments.plot(kind='barh', figsize=[8,8])
Out[14]:
In [15]:
posts_groupby.mean().ups.plot(kind='barh', figsize=[8,8])
Out[15]:
In [ ]:
In [ ]: