In [1]:
import tweepy as tw
import json
import pandas as pd
from collections import defaultdict
In [2]:
with open("../twitter_config.json") as fp:
creds = json.load(fp)
print creds.keys()
auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth)
print "Tweepy ready for search"
In [3]:
query = "Presidential Election"
In [4]:
statuses = api.search(q=query, count=1000)
In [11]:
statuses[0].text
Out[11]:
In [16]:
def dict2df(data):
return pd.DataFrame(data.items(), columns=["item", "counts"]).sort_values("counts", ascending=False)
def get_entities(statuses):
hashtags = defaultdict(int)
mentions = defaultdict(int)
keys = ("hashtags", "user_mentions")
for s in statuses:
entities = s.entities
if "hashtags" in entities:
e = map(lambda x: x["text"], entities["hashtags"])
for t in e:
hashtags[t] += 1
if "user_mentions" in entities:
e = map(lambda x: x["screen_name"], entities["user_mentions"])
for t in e:
mentions[t] += 1
return dict2df(hashtags), dict2df(mentions)
def get_text(status):
return status.text.lower().split(" ")
In [17]:
get_text(statuses[0])
Out[17]:
In [6]:
hashtags, mentions = get_entities(statuses)
In [7]:
len(statuses)
Out[7]:
In [8]:
hashtags
Out[8]:
In [9]:
mentions
Out[9]:
In [ ]: