notebook.community

Edit and run



In [1]:

    
import tweepy as tw
import json
import pandas as pd

from collections import defaultdict



In [2]:

    
with open("../twitter_config.json") as fp:
    creds = json.load(fp)
print creds.keys()

auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth)

print "Tweepy ready for search"









    



[u'consumer_secret', u'access_token', u'consumer_key', u'access_token_secret']
Tweepy ready for search



In [3]:

    
query = "Presidential Election"



In [4]:

    
statuses = api.search(q=query, count=1000)



In [11]:

    
statuses[0].text









    Out[11]:





u'RT @JasonWhitely: #BREAKING: Texas breaks the 15-million mark in registered voters. About 1.3M more since the last presidential election. h\u2026'



In [16]:

    
def dict2df(data):
    return pd.DataFrame(data.items(), columns=["item", "counts"]).sort_values("counts", ascending=False)

def get_entities(statuses):
    hashtags = defaultdict(int)
    mentions = defaultdict(int)
    keys = ("hashtags", "user_mentions")
    for s in statuses:
        entities = s.entities
        if "hashtags" in entities:
            e = map(lambda x: x["text"], entities["hashtags"])
            for t in e:
                hashtags[t] += 1
        if "user_mentions" in entities:
            e = map(lambda x: x["screen_name"], entities["user_mentions"])
            for t in e:
                mentions[t] += 1
    return dict2df(hashtags), dict2df(mentions)


def get_text(status):
    return status.text.lower().split(" ")



In [17]:

    
get_text(statuses[0])









    Out[17]:





[u'rt',
 u'@jasonwhitely:',
 u'#breaking:',
 u'texas',
 u'breaks',
 u'the',
 u'15-million',
 u'mark',
 u'in',
 u'registered',
 u'voters.',
 u'about',
 u'1.3m',
 u'more',
 u'since',
 u'the',
 u'last',
 u'presidential',
 u'election.',
 u'h\u2026']



In [6]:

    
hashtags, mentions = get_entities(statuses)



In [7]:

    
len(statuses)









    Out[7]:





100



In [8]:

    
hashtags









    Out[8]:






  
    
      
      item
      counts
    
  
  
    
      4
      BREAKING
      4
    
    
      13
      Trump
      3
    
    
      5
      ARGOP
      2
    
    
      0
      HillaryClinton
      1
    
    
      1
      mentalhealth
      1
    
    
      2
      News
      1
    
    
      3
      Candidates
      1
    
    
      6
      bbcqt
      1
    
    
      7
      VOTEFORBOLBI
      1
    
    
      8
      TakeAStand
      1
    
    
      9
      election
      1
    
    
      10
      World
      1
    
    
      11
      NigerianDailyNews
      1
    
    
      12
      election2016
      1



In [9]:

    
mentions









    Out[9]:






  
    
      
      item
      counts
    
  
  
    
      5
      BBCWorld
      23
    
    
      0
      WesleyRickard
      5
    
    
      32
      JasonWhitely
      4
    
    
      4
      realDonaldTrump
      3
    
    
      20
      latimes
      2
    
    
      7
      JohnJHarwood
      2
    
    
      8
      Yaboy_Skeete
      2
    
    
      31
      Claire_Phipps
      2
    
    
      12
      ARGOP
      2
    
    
      16
      SopanDeb
      2
    
    
      27
      DrHerukhuti
      1
    
    
      24
      CNN
      1
    
    
      25
      htTweets
      1
    
    
      26
      tomhanks
      1
    
    
      30
      koreykuhl
      1
    
    
      28
      lauramannino
      1
    
    
      29
      BarbaraAResEsq
      1
    
    
      22
      MarkHalperin
      1
    
    
      33
      CharmaineYoest
      1
    
    
      34
      BarbaraMcDWhitt
      1
    
    
      35
      yanakoste
      1
    
    
      23
      masatheman
      1
    
    
      18
      AARPNY
      1
    
    
      21
      BBCNews
      1
    
    
      19
      PBSKIDS
      1
    
    
      1
      OldSchoolCincy
      1
    
    
      17
      Ekklesia_co_uk
      1
    
    
      15
      MrDane1982
      1
    
    
      14
      DannyBowman10
      1
    
    
      13
      intlspectator
      1
    
    
      11
      yashwantraj
      1
    
    
      10
      BrookeBCNN
      1
    
    
      9
      _treychapman
      1
    
    
      6
      SportsDayHS
      1
    
    
      3
      kjbohan
      1
    
    
      2
      HillaryClinton
      1
    
    
      36
      IBJIYONGI
      1



In [ ]:

	item	counts
4	BREAKING	4
13	Trump	3
5	ARGOP	2
0	HillaryClinton	1
1	mentalhealth	1
2	News	1
3	Candidates	1
6	bbcqt	1
7	VOTEFORBOLBI	1
8	TakeAStand	1
9	election	1
10	World	1
11	NigerianDailyNews	1
12	election2016	1

	item	counts
5	BBCWorld	23
0	WesleyRickard	5
32	JasonWhitely	4
4	realDonaldTrump	3
20	latimes	2
7	JohnJHarwood	2
8	Yaboy_Skeete	2
31	Claire_Phipps	2
12	ARGOP	2
16	SopanDeb	2
27	DrHerukhuti	1
24	CNN	1
25	htTweets	1
26	tomhanks	1
30	koreykuhl	1
28	lauramannino	1
29	BarbaraAResEsq	1
22	MarkHalperin	1
33	CharmaineYoest	1
34	BarbaraMcDWhitt	1
35	yanakoste	1
23	masatheman	1
18	AARPNY	1
21	BBCNews	1
19	PBSKIDS	1
1	OldSchoolCincy	1
17	Ekklesia_co_uk	1
15	MrDane1982	1
14	DannyBowman10	1
13	intlspectator	1
11	yashwantraj	1
10	BrookeBCNN	1
9	_treychapman	1
6	SportsDayHS	1
3	kjbohan	1
2	HillaryClinton	1
36	IBJIYONGI	1