Subreddit disabled comments on 2016-04-15, thread here
BigQuery:
SELECT author, num_comments, score, ups, downs, gilded, created_utc FROM [fh-bigquery:reddit_posts.2016_04]
WHERE subreddit = 'The_Donald'
https://storage.cloud.google.com/staeiou_reddit/reddit_posts_the_donald_201604
In [1]:
import pandas as pd
import seaborn as sns
%matplotlib inline
In [2]:
posts_df = pd.DataFrame.from_csv("reddit_posts_the_donald_201604.csv")
In [3]:
posts_df[0:5]
Out[3]:
In [4]:
posts_df['created'] = pd.to_datetime(posts_df.created_utc, unit='s')
posts_df['created_date'] = posts_df.created.dt.date
In [5]:
posts_df['downs'] = posts_df.score - posts_df.ups
In [6]:
posts_time_ups = posts_df.set_index('created_date').ups.sort_index()
posts_time_ups[0:5]
Out[6]:
In [7]:
posts_date_df = posts_df.set_index('created').sort_index()
posts_date_df[0:5]
Out[7]:
In [8]:
posts_groupby = posts_date_df.groupby([pd.TimeGrouper('1D', closed='left')])
In [9]:
posts_groupby.mean().num_comments.plot(kind='barh', figsize=[8,8])
Out[9]:
In [10]:
posts_groupby.mean().ups.plot(kind='barh', figsize=[8,8])
Out[10]:
In [ ]: