In [1]:
import tweepy as tw
import json
import pandas as pd

from collections import defaultdict
import os
from IPython.display import clear_output

In [2]:
TWITTER_CONFIG_FILE="twitter_config.json"

In [3]:
if not os.path.isfile(TWITTER_CONFIG_FILE):
    with open("twitter_config.sample.json") as fp:
        creds = json.load(fp)
        for k in sorted(creds.keys()):
            v = raw_input("Enter %s:\t" % k)
            creds[k] = v
    print creds
    with open(TWITTER_CONFIG_FILE, "wb+") as fp:
        json.dump(creds, fp, indent=4, sort_keys=True)
    clear_output()
    print "Printed credentials to file %s" % TWITTER_CONFIG_FILE

In [4]:
with open("twitter_config.json") as fp:
    creds = json.load(fp)
print creds.keys()


[u'access_token', u'consumer_key', u'access_token_secret', u'consumer_secret']

In [5]:
auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth)

print "Tweepy ready for search"


Tweepy ready for search

In [6]:
statuses = api.search(q="Donald Trump", count=1000)

In [7]:
def dict2df(data):
    return pd.DataFrame(data.items(), columns=["item", "counts"]).sort_values("counts", ascending=False)

def get_entities(statuses):
    hashtags = defaultdict(int)
    mentions = defaultdict(int)
    keys = ("hashtags", "user_mentions")
    for s in statuses:
        entities = s.entities
        if "hashtags" in entities:
            e = map(lambda x: x["text"], entities["hashtags"])
            for t in e:
                hashtags[t] += 1
        if "user_mentions" in entities:
            e = map(lambda x: x["screen_name"], entities["user_mentions"])
            for t in e:
                mentions[t] += 1
    return dict2df(hashtags), dict2df(mentions)

In [8]:
hashtags, mentions = get_entities(statuses)

In [9]:
len(statuses)


Out[9]:
100

In [10]:
hashtags


Out[10]:
item counts
14 NBA 2
15 MAGA 2
26 Pistons 2
25 Trump 2
19 Lakers 2
18 Forbes400 2
16 76ers 2
28 Kobe 2
5 Cavs 2
7 Mavericks 2
6 NeverTrump 2
20 PodestaEmails 1
27 WATCH 1
2 Trump2016 1
3 ctot 1
24 ccot 1
23 TrumpTrain 1
22 maga 1
21 CNN 1
8 SmartNews 1
4 MSNBC 1
9 NoMercy 1
17 tcot 1
1 tgdn 1
13 News 1
12 RiseUpAsOne 1
11 ImWithHer 1
10 JPNET 1
0 dlknowles 1

In [11]:
mentions


Out[11]:
item counts
7 realDonaldTrump 6
31 BernieSanders 5
0 Darren32895836 4
1 WesleyRickard 4
64 LindaSuhler 3
39 HistoryToLearn 3
32 ImBluetrek 3
21 LDVisuals 2
72 CNNPolitics 2
54 PageSix 2
46 jftrent 2
25 YouTube 2
74 WayneDupreeShow 2
12 makecomicsgreat 2
11 Forbes 2
53 digg 1
58 WaladShami 1
57 CTmagazine 1
56 CoryTownes 1
55 Humans_vs_Trump 1
4 SpecialKMB1969 1
13 RogerJStoneJr 1
60 liladowns 1
52 CNBC 1
51 wikileaks 1
50 FileSEE28 1
49 hectormorenco 1
48 TheFix 1
47 RubinReport 1
59 ericbradner 1
... ... ...
44 richardhine 1
14 SaraRamirez 1
29 jebnyc 1
15 AMDWaters 1
16 maryamare101 1
17 Politics_PR 1
18 djonathandavids 1
19 PrisonPlanet 1
20 ANI_news 1
22 ArmorCavSpin 1
23 Slate 1
24 JudgeJeanine 1
26 dalas_azahar 1
27 SheeeRatchet 1
28 Cernovich 1
30 USAforTrump2016 1
43 plantblogger 1
10 WSJPolitics 1
9 jphoornstra 1
33 timesofindia 1
34 ABCPolitics 1
35 HillaryPix 1
36 USAneedsTRUMP 1
8 rkylesmith 1
38 MagicRoyalty 1
6 FoxNews 1
40 ShahzadaJamal 1
41 AndreaMann 1
42 realkingrobbo 1
37 thinkprogress 1

75 rows × 2 columns


In [ ]: