notebook.community

Edit and run



In [2]:

    
%matplotlib inline

#import envoy 
import json
import pymongo 
from bson import json_util # From  pymongo
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as md
from datetime import datetime
import twitter
import networkx as nx
print "imported"









    



imported



In [4]:

    
htgs = pd.read_csv("../data/ferguson_hashtags.csv")
htgs.head()









    



/home/ubuntu/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1130: DtypeWarning: Columns (10,11,12) have mixed types. Specify dtype option on import or set low_memory=False.
  data = self._reader.read(nrows)






    Out[4]:






  
    
      
      id
      _iso_created_at
      user.screen_name
      entities.hashtags.0.text
      entities.hashtags.1.text
      entities.hashtags.2.text
      entities.hashtags.3.text
      entities.hashtags.4.text
      entities.hashtags.5.text
      entities.hashtags.6.text
      entities.hashtags.7.text
      entities.hashtags.8.text
      entities.hashtags.9.text
    
  
  
    
      0
       5.343279e+17
                                2014-11-17T12:51:02.000Z
          AmirahOna
            NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      1
       5.343279e+17
                                2014-11-17T12:51:04.000Z
       NewsyBarbara
            NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      2
                NaN
       { "$date" : { "$numberLong" : "-92233720368547...
                NaN
            NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      3
       5.343279e+17
                                2014-11-17T12:51:12.000Z
        muhdfikhri_
            NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      4
       5.343279e+17
                                2014-11-17T12:51:14.000Z
          MykeBusch
       Ferguson
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN



In [5]:

    
htgs.columns = ["id", "_iso_created_at", "user.screen_name", "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9", "h10" ]
htgs[:10]









    Out[5]:






  
    
      
      id
      _iso_created_at
      user.screen_name
      h1
      h2
      h3
      h4
      h5
      h6
      h7
      h8
      h9
      h10
    
  
  
    
      0
       5.343279e+17
                                2014-11-17T12:51:02.000Z
            AmirahOna
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      1
       5.343279e+17
                                2014-11-17T12:51:04.000Z
         NewsyBarbara
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      2
                NaN
       { "$date" : { "$numberLong" : "-92233720368547...
                  NaN
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      3
       5.343279e+17
                                2014-11-17T12:51:12.000Z
          muhdfikhri_
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      4
       5.343279e+17
                                2014-11-17T12:51:14.000Z
            MykeBusch
           Ferguson
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      5
       5.343279e+17
                                2014-11-17T12:51:15.000Z
             alexvdl0
           Ferguson
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      6
       5.343279e+17
                                2014-11-17T12:51:18.000Z
       NubianQueenIAm
       MichaelBrown
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      7
       5.343279e+17
                                2014-11-17T12:51:18.000Z
       YourAnonGlobal
          Anonymous
       Ferguson
       OpKKK
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      8
       5.343279e+17
                                2014-11-17T12:51:19.000Z
         syukrimanutd
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
    
    
      9
       5.343279e+17
                                2014-11-17T12:51:23.000Z
            maverckmr
                NaN
            NaN
         NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN
       NaN



In [7]:

    
# htgs[["h1", "h2"]].head()
# htgs.loc[:, "h1":"h10"].head()

all_htgs = np.array((htgs["h1"].dropna()))

for column in htgs.loc[:, "h2":"h10"].columns:
    np.append(all_htgs, htgs[column], axis=0)

# all_htgs.append(htgs[column])

# all_htgs.head()
# all_htgs[:10]



In [9]:

    
# default dict info from http://evanmuehlhausen.com/simple-counters-in-python-with-benchmarks/
from collections import defaultdict

# orginal recipe tags
tags = defaultdict(int)

# dict for retweet tags counts
# rtags = defaultdict(int)

# counter for the number of orginal tweets from thie user
original_count = 0

# go through tweets of the user
for htg in all_htgs:
    tags[htg] +=1
    # get the tags
#     tags = [hashtags["text"] for hashtags in tweet["entities"]["hashtags"]]
    
    # update the dict that is keeping count of the tag fq.
#     for tag in tags:
#         if "retweeted_status" in tweet:
#             rtags[tag] += 1
#         else:
#             otags[tag] += 1
#             original_count += 1

print "Go on..."









    



Go on...



In [12]:

    
# operator info: http://stackoverflow.com/questions/613183/sort-a-python-dictionary-by-value
import operator

sorted_tags = sorted(tags.items(), key = operator.itemgetter(1))

sorted_tags.reverse()

for tag in sorted_tags[45:100]:
    print tag[0], ": ", tag[1]









    



MUFC :  7197
FoxNews :  7149
EEUU :  6935
AA :  6448
soracist :  6403
HandsUpWalkOut :  6352
TamirRice :  6325
Rams :  6281
Boston :  6245
dontsellshots :  6093
BreakingNews :  6085
Chicago :  5973
JusticeForZemir :  5848
FergusonProud :  5724
stoptheparade :  5584
Yeremiito21 :  5440
RIPMikeBrown :  5424
JusticeforMichael :  5359
WhitePrivilege :  5295
NYPD :  5146
inners :  5118
FergusonShooting :  5057
LAPD :  5041
Ayotzinapa :  5028
PoliceState :  4911
android :  4904
mufc :  4845
opKKK :  4777
TIMEPOY :  4730
stl :  4704
StLouis :  4637
supportdarrenwilson :  4551
HandsUp :  4508
blacklivesmatter :  4440
LondonToFerguson :  4383
PALESTINE :  4364
Walmart :  4205
ViolenceWillNotBeTolerated :  4199
Breaking :  4180
WhereisJustice :  4125
ICantBreathe :  4026
oakland :  3942
Portland :  3939
Police :  3937
Iraq :  3920
Shaw :  3847
America :  3834
MLK :  3783
Justice :  3742
TCOT :  3730
police :  3685
obstinate :  3674
usa :  3552
LosAngeles :  3543
RunForJustice :  3514



In [27]:

    
hashtags = pd.DataFrame(sorted_tags, columns = ["hashtag", "num"])
hashtags.sort("num", inplace=True)
hashtags.head()









    Out[27]:






  
    
      
      hashtag
      num
    
  
  
    
      86012 
         VouPraBelieve
       1
    
    
      114678
       CurvesBeautiful
       1
    
    
      114679
               depeche
       1
    
    
      114680
         ThisIsUsParty
       1
    
    
      114681
                 ImMad
       1



In [28]:

    
cutoff = hashtags[(hashtags["hashtag"] != "Ferguson") & (hashtags["hashtag"] != "ferguson") & (hashtags["num"] > 2500)]
len(cutoff)
cutoff.reset_index(drop=True).head()









    Out[28]:






  
    
      
      hashtag
      num
    
  
  
    
      0
                   FBI
       2516
    
    
      1
              Sharpton
       2545
    
    
      2
               unarmed
       2558
    
    
      3
       PoliceBrutality
       2563
    
    
      4
                  Cuba
       2605



In [30]:

    
h = np.array(cutoff["hashtag"])



In [31]:

    
plt.figure(figsize=(5, 20))

y_pos = np.arange(len(cutoff))
plt.barh(y_pos, cutoff["num"])
plt.yticks(y_pos, h)
plt.xlabel('Ammount')
plt.ylabel('Hashtags')

plt.title('Overall Hashtag Usage After #Ferguson')
plt.show()

	id	_iso_created_at	user.screen_name	entities.hashtags.0.text	entities.hashtags.1.text	entities.hashtags.2.text	entities.hashtags.3.text	entities.hashtags.4.text	entities.hashtags.5.text	entities.hashtags.6.text	entities.hashtags.7.text	entities.hashtags.8.text	entities.hashtags.9.text
0	5.343279e+17	2014-11-17T12:51:02.000Z	AmirahOna	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	5.343279e+17	2014-11-17T12:51:04.000Z	NewsyBarbara	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	{ "$date" : { "$numberLong" : "-92233720368547...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	5.343279e+17	2014-11-17T12:51:12.000Z	muhdfikhri_	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	5.343279e+17	2014-11-17T12:51:14.000Z	MykeBusch	Ferguson	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	hashtag	num
86012	VouPraBelieve	1
114678	CurvesBeautiful	1
114679	depeche	1
114680	ThisIsUsParty	1
114681	ImMad	1

	hashtag	num
0	FBI	2516
1	Sharpton	2545
2	unarmed	2558
3	PoliceBrutality	2563
4	Cuba	2605