In [47]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
import datetime
In [2]:
df = pd.read_json("https://s3.amazonaws.com/far-right/twitter/mb_protests.json")
In [3]:
df.columns
Out[3]:
In [4]:
print("Total number of tweets = {}".format(len(df)))
In [5]:
# Lowercase the hashtags and tweet body
df['hashtags'] = df['hashtags'].str.lower()
df['text'] = df['text'].str.lower()
In [6]:
print("Total number of tweets containing hashtag 'wall' = {}".format(len(df[df['hashtags'].str.contains('wall')])))
In [7]:
print("Total number of tweets whose body contains 'wall' = {}".format(len(df[df['text'].str.contains('wall')])))
In [8]:
wall_tweets = df[(df['hashtags'].str.contains('wall')) | (df['text'].str.contains('wall'))].copy()
In [9]:
print("Total number of tweets about the 'wall' = {}".format(len(wall_tweets)))
In [11]:
def months_between(end, start):
return (end.year - start.year)*12 + end.month - start.month
In [12]:
wall_tweets['created'] = pd.to_datetime(wall_tweets['created'])
wall_tweets['user_created'] = pd.to_datetime(wall_tweets['user_created'])
In [13]:
wall_tweets['user_tenure'] = wall_tweets[['created', \
'user_created']].apply(lambda row: months_between(row[0], row[1]), axis=1)
In [17]:
tenure_grouping = wall_tweets.groupby('user_tenure').size() / len(wall_tweets) * 100
fig, ax = plt.subplots()
ax.plot(tenure_grouping.index, tenure_grouping.values)
ax.set_ylabel("% of tweets")
ax.set_xlabel("Acct tenure in months")
plt.show()
In [22]:
tweets_per_user = wall_tweets.groupby('user_name').size().sort_values(ascending=False)
fig, ax = plt.subplots()
ax.plot(tweets_per_user.values)
plt.show()
In [35]:
wall_tweets.groupby(['user_name', 'user_description']).size().sort_values(ascending=False).head(20).to_frame()
Out[35]:
In [57]:
plt.boxplot(wall_tweets['friends_count'].values, vert=False)
plt.show()
In [58]:
wall_tweets['friends_count'].describe()
Out[58]:
In [39]:
wall_tweets.groupby('user_location').size().sort_values(ascending=False)
Out[39]:
In [ ]: