In [47]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta
import datetime

In [2]:
df = pd.read_json("https://s3.amazonaws.com/far-right/twitter/mb_protests.json")

In [3]:
df.columns


Out[3]:
Index(['created', 'friends_count', 'hashtags', 'id_str', 'text',
       'user_created', 'user_description', 'user_followers', 'user_location',
       'user_name'],
      dtype='object')

In [4]:
print("Total number of tweets = {}".format(len(df)))


Total number of tweets = 116526
How many tweets are about the 'wall'?

In [5]:
# Lowercase the hashtags and tweet body
df['hashtags'] = df['hashtags'].str.lower()
df['text'] = df['text'].str.lower()

In [6]:
print("Total number of tweets containing hashtag 'wall' = {}".format(len(df[df['hashtags'].str.contains('wall')])))


Total number of tweets containing hashtag 'wall' = 6372

In [7]:
print("Total number of tweets whose body contains 'wall' = {}".format(len(df[df['text'].str.contains('wall')])))


Total number of tweets whose body contains 'wall' = 7241

In [8]:
wall_tweets = df[(df['hashtags'].str.contains('wall')) | (df['text'].str.contains('wall'))].copy()

In [9]:
print("Total number of tweets about the 'wall' = {}".format(len(wall_tweets)))


Total number of tweets about the 'wall' = 7241
What is the average twitter tenure of people who tweeted about the wall?

In [11]:
def months_between(end, start):
    return (end.year - start.year)*12 + end.month - start.month

In [12]:
wall_tweets['created'] = pd.to_datetime(wall_tweets['created'])
wall_tweets['user_created'] = pd.to_datetime(wall_tweets['user_created'])

In [13]:
wall_tweets['user_tenure'] = wall_tweets[['created', \
                            'user_created']].apply(lambda row: months_between(row[0], row[1]), axis=1)

In [17]:
tenure_grouping = wall_tweets.groupby('user_tenure').size() / len(wall_tweets) * 100

fig, ax = plt.subplots()

ax.plot(tenure_grouping.index, tenure_grouping.values)

ax.set_ylabel("% of tweets")
ax.set_xlabel("Acct tenure in months")

plt.show()


There are a couple of users tweeting multiple times, but most tweets come from distinct twitter handles

In [22]:
tweets_per_user = wall_tweets.groupby('user_name').size().sort_values(ascending=False)

fig, ax = plt.subplots()

ax.plot(tweets_per_user.values)

plt.show()


Who are the 'top tweeters' + descriptions?

In [35]:
wall_tweets.groupby(['user_name', 'user_description']).size().sort_values(ascending=False).head(20).to_frame()


Out[35]:
0
user_name user_description
realtrumpNOT parody account 117
latinagirlpwr I love Obama but I'm still a #NasyWomen and I'm with her! #NotMyFreakingPresisdent 63
OccupyWeather #OccupyWallStreet: see these smart & highly effective alternatives to camping in a frozen park: http://t.co/BifM2W4X http://t.co/fZFoQsWz http://t.co/MfyMRYqt 61
timothypwoodwa1 Let ALL races UNITE to Fix our Country for ALL legal Americans to live Safe & Prosper. ~ Independent Thinker ~ 57
tiwhitter Heart belongs to Jesus, avid reader, writer, blogger, theologian, cat lady, music lover, believer in miracles, imperfect above all else. 54
timothypwoodwa1 Let ALL races UNITE to Fix our Country for ALL legal Americans to live Safe & Prosper. ~ Independent Thinker ~ Rodents, Porn & Ghost AccountsWILL BE BLOCKED! 39
tamilartist Artist 33
Crusaydah WAR. 27
DNdamagi U dont know me well enough to judge me,but when you mess up, Im calling u out 24
GadflyMorse Morse's Political Gadfly...It's About Truth 21
aarenjobs Uncover Your Strengths Discover Your Future 21
saramarietweets Voluntary benefits & sales entrepreneur. Obsessed & focused! Dr in #Education student & MBA @LibertyU. Musician. ❤️ @AndrewBrenner. #TrumpTrain 21
drumpfshit360 Beyond the headlines to tell the Fake News from many points of views 15
USofA_af Proud Deplorable.Woman for Trump.No Apologies. 15
hisey_mike Homeless Advocate AIDS Advocate 14
UckfayRumptay Historian. Queer. Feminist. NY. It's not a conversation, it's direction. #TheResistance #GrabYourWallet #TrumpLeaks #AnonFamily #OpTrump Trumpanzee blocker. 14
shepherdwolves PHOTO/VIDEO content in the #MEDIA folder- Run by a pack of fearless, progressive leaders. TeePublic link below: B&W photographic, political t-shirts 13
NorthCaliGrl Mom⏐wife⏐nurse. This is my weekend Twitter acct. Lover of meditation, historical fiction, dogs, tea, soccer & lipstick. Anglophile. Not a fan of illiberal left. 13
TarragonWarren Against globalism, imperialism, Zionism & liberalism. For the people. Nationalists of the world unite! 12
dreamedofdust ⚡️Breaking News in the fight to MAGA 🇺🇸 Yuge Exposé of Swamp Corruption💥Now make like a tree and stick about! 12
What is the reach of these tweets in terms of followers?

In [57]:
plt.boxplot(wall_tweets['friends_count'].values, vert=False)
plt.show()



In [58]:
wall_tweets['friends_count'].describe()


Out[58]:
count      7241.000000
mean       1749.011324
std        9197.940081
min           0.000000
25%         150.000000
50%         430.000000
75%        1121.000000
max      228852.000000
Name: friends_count, dtype: float64
Location of the tweets?

In [39]:
wall_tweets.groupby('user_location').size().sort_values(ascending=False)


Out[39]:
user_location
United States                     373
California, USA                   127
Brooklyn, NY                      121
One Nation Under God, USA          96
Los Angeles, CA                    82
USA                                81
New York, NY                       76
Chicago, IL                        74
Washington, DC                     62
Florida, USA                       49
Seattle, WA                        48
New York, USA                      46
New Jersey, USA                    45
San Francisco, CA                  40
Los Angeles                        39
San Diego, CA                      39
Philadelphia, PA                   36
New York City                      34
New York City, USA                 33
London, England                    33
Chicago                            32
Dallas, TX                         32
Columbus, OH                       31
Boston, MA                         30
NYC                                26
Oakland, CA                        26
Texas, USA                         26
Houston, TX                        25
New York                           24
Portland, OR                       24
                                 ... 
Niterói - RJ                        1
No Where                            1
Oaxaca de Juárez, Oaxaca            1
Ottawa, Ontario, Canada             1
Ottawa, Ontario                     1
Ottawa                              1
Oslo, Norway                        1
Orchard Park, NY                    1
Orange County, California           1
On various fibers, tubes            1
On the beaches, breach AGAIN        1
On the Left                         1
Omaha, NE                           1
Olympia, WA                         1
Oh-Ree-Gon, USA                     1
Oakland, SF Bay Area                1
Noneofyourbiz                       1
ON EARTH                            1
OHIO                                1
Novadic, Playing & Sharing          1
Not actually in Tehran, Iran        1
Not Los Angeles, Not New York.      1
Not Bellaire                        1
North Texas                         1
North Jersey                        1
North Dorchester, Boston            1
North Dakota                        1
North Atlanta, GA                   1
North                               1
Mumbai                              1
dtype: int64

In [ ]: