In [1]:
import tweepy as tw
import json
import pandas as pd

from collections import defaultdict

In [2]:
with open("../twitter_config.json") as fp:
    creds = json.load(fp)
print creds.keys()

auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth)

print "Tweepy ready for search"


[u'consumer_secret', u'access_token', u'consumer_key', u'access_token_secret']
Tweepy ready for search

In [3]:
query = "Presidential Election"

In [4]:
statuses = api.search(q=query, count=1000)

In [11]:
statuses[0].text


Out[11]:
u'RT @JasonWhitely: #BREAKING: Texas breaks the 15-million mark in registered voters. About 1.3M more since the last presidential election. h\u2026'

In [16]:
def dict2df(data):
    return pd.DataFrame(data.items(), columns=["item", "counts"]).sort_values("counts", ascending=False)

def get_entities(statuses):
    hashtags = defaultdict(int)
    mentions = defaultdict(int)
    keys = ("hashtags", "user_mentions")
    for s in statuses:
        entities = s.entities
        if "hashtags" in entities:
            e = map(lambda x: x["text"], entities["hashtags"])
            for t in e:
                hashtags[t] += 1
        if "user_mentions" in entities:
            e = map(lambda x: x["screen_name"], entities["user_mentions"])
            for t in e:
                mentions[t] += 1
    return dict2df(hashtags), dict2df(mentions)


def get_text(status):
    return status.text.lower().split(" ")

In [17]:
get_text(statuses[0])


Out[17]:
[u'rt',
 u'@jasonwhitely:',
 u'#breaking:',
 u'texas',
 u'breaks',
 u'the',
 u'15-million',
 u'mark',
 u'in',
 u'registered',
 u'voters.',
 u'about',
 u'1.3m',
 u'more',
 u'since',
 u'the',
 u'last',
 u'presidential',
 u'election.',
 u'h\u2026']

In [6]:
hashtags, mentions = get_entities(statuses)

In [7]:
len(statuses)


Out[7]:
100

In [8]:
hashtags


Out[8]:
item counts
4 BREAKING 4
13 Trump 3
5 ARGOP 2
0 HillaryClinton 1
1 mentalhealth 1
2 News 1
3 Candidates 1
6 bbcqt 1
7 VOTEFORBOLBI 1
8 TakeAStand 1
9 election 1
10 World 1
11 NigerianDailyNews 1
12 election2016 1

In [9]:
mentions


Out[9]:
item counts
5 BBCWorld 23
0 WesleyRickard 5
32 JasonWhitely 4
4 realDonaldTrump 3
20 latimes 2
7 JohnJHarwood 2
8 Yaboy_Skeete 2
31 Claire_Phipps 2
12 ARGOP 2
16 SopanDeb 2
27 DrHerukhuti 1
24 CNN 1
25 htTweets 1
26 tomhanks 1
30 koreykuhl 1
28 lauramannino 1
29 BarbaraAResEsq 1
22 MarkHalperin 1
33 CharmaineYoest 1
34 BarbaraMcDWhitt 1
35 yanakoste 1
23 masatheman 1
18 AARPNY 1
21 BBCNews 1
19 PBSKIDS 1
1 OldSchoolCincy 1
17 Ekklesia_co_uk 1
15 MrDane1982 1
14 DannyBowman10 1
13 intlspectator 1
11 yashwantraj 1
10 BrookeBCNN 1
9 _treychapman 1
6 SportsDayHS 1
3 kjbohan 1
2 HillaryClinton 1
36 IBJIYONGI 1

In [ ]: