In [1]:
import twitter_parser

In [ ]:
twitter_parser.scrape_page('https://twitter.com/search?f=news&vertical=news&q=water%20since%3A2015-09-01%20until%3A2015-10-18&src=typd&lang=en')

In [2]:
prsd_tweets = twitter_parser.parse_folder()


Found files
Data/dec13-dec6.txt
Data/dec17-dec14.txt
Data/dec30-dec18.txt
Data/dec5-nov30.txt
Total amount of tweets: 28069
Fifty most common words in tweets:
[('water', 30549), ('news', 3557), ('nov', 1703), ('flint', 1041), ('conservation', 958), ('drinking', 896), ('california', 815), ('crisis', 794), ('clean', 771), ('world', 754), ('watkins', 714), ('alyssa', 711), ('city', 710), ('hot', 692), ('climate', 668), ('video', 649), ('state', 624), ('drink', 611), ('drought', 606), ('supply', 602), ('yukohill', 574), ('health', 510), ('us', 508), ('flood', 508), ('energy', 485), ('levels', 481), ('global', 481), ('michigan', 479), ('food', 468), ('india', 452), ('main', 451), ('people', 447), ('wendy', 440), ('epa', 432), ('emergency', 427), ('change', 416), ('break', 416), ('floods', 403), ('air', 389), ('one', 360), ('finds', 359), ('tap', 359), ('power', 354), ('social', 353), ('uk', 350), ('first', 346), ('chennai', 346), ('home', 337), ('media', 335), ('today', 334)]

In [5]:
%matplotlib inline 
import pandas as pd
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 8  # that's default image size for this interactive session

region = {}
region['UK'] = twitter_parser.daily_count_words(['uk'], prsd_tweets)
region['US'] =  twitter_parser.daily_count_words(['us'], prsd_tweets)
region['California'] =  twitter_parser.daily_count_words(['california'], prsd_tweets)
region['India'] =  twitter_parser.daily_count_words(['india'], prsd_tweets)
region['World'] =  twitter_parser.daily_count_words(['world','global'], prsd_tweets)

cond = {}
cond['Droughts'] =  twitter_parser.daily_count_words(['drought'], prsd_tweets) 
cond['Floods'] =  twitter_parser.daily_count_words(['flood'], prsd_tweets)

science = {}
science['Mars'] =  twitter_parser.daily_count_words(['mars'], prsd_tweets)
science['Study'] =  twitter_parser.daily_count_words(['study'], prsd_tweets)
science['Climate'] =  twitter_parser.daily_count_words(['climate'], prsd_tweets)
science['Ice'] =  twitter_parser.daily_count_words(['ice'], prsd_tweets)
science['Plastic'] =  twitter_parser.daily_count_words(['plastic'], prsd_tweets)
science['Contamination'] =  twitter_parser.daily_count_words(['toxic','contaminat'], prsd_tweets) 

health = {}
health['Drink'] =  twitter_parser.daily_count_words(['drink'], prsd_tweets) 
health['Fish'] =  twitter_parser.daily_count_words(['fish'], prsd_tweets) 
health['Health'] =  twitter_parser.daily_count_words(['health'], prsd_tweets) 
health['Clean'] =  twitter_parser.daily_count_words(['clean'], prsd_tweets) 
health['Monitoring'] =  twitter_parser.daily_count_words(['monitor','assesment'], prsd_tweets) 
health['Water Poverty'] =  twitter_parser.daily_count_words(['poverty'], prsd_tweets) 

work = {}

work['Farmers'] =  twitter_parser.daily_count_words(['farmer'], prsd_tweets) 
work['Industry'] =  twitter_parser.daily_count_words(['industr'], prsd_tweets) 


news = {}
news['CTV'] =  twitter_parser.daily_count_words(['ctv'], prsd_tweets) 
news['CBC'] =  twitter_parser.daily_count_words(['cbc'], prsd_tweets) 
news['BBC'] =  twitter_parser.daily_count_words(['bbc'], prsd_tweets) 
news['CNN'] =  twitter_parser.daily_count_words(['cnn'], prsd_tweets) 
news['NBC'] =  twitter_parser.daily_count_words(['nbc'], prsd_tweets) 
news['FOXTV'] =  twitter_parser.daily_count_words(['foxtv'], prsd_tweets) 

elec = {}
elec['NDP'] =  twitter_parser.daily_count_words(['ndp'], prsd_tweets)
elec['Conservative'] =  twitter_parser.daily_count_words(['pc'], prsd_tweets)
elec['Liberal'] =  twitter_parser.daily_count_words(['liberal'], prsd_tweets)

typ={}
typ['Sea'] =  twitter_parser.daily_count_words(['sea'], prsd_tweets) 
typ['Ocean'] =  twitter_parser.daily_count_words(['ocean'], prsd_tweets) 
typ['Lake'] =  twitter_parser.daily_count_words(['lake'], prsd_tweets) 
typ['River'] =  twitter_parser.daily_count_words(['river'], prsd_tweets) 
typ['Wetlands'] =  twitter_parser.daily_count_words(['wetlands','marsh','bog','swamp'], prsd_tweets) 

cities = {}
cities['Baltimore city, US'] =  twitter_parser.daily_count_words(['baltimore'], prsd_tweets)
cities['Bay city , US'] =  twitter_parser.daily_count_words(['bay city'], prsd_tweets)
cities['Desmoine city, US'] =  twitter_parser.daily_count_words(['desmoine'], prsd_tweets)
cities['Lancaster city, US'] =  twitter_parser.daily_count_words(['lancaster'], prsd_tweets)
cities['Portland city, US'] =  twitter_parser.daily_count_words(['portland'], prsd_tweets)
cities['Tulsa city, US'] =  twitter_parser.daily_count_words(['tulsa'], prsd_tweets)
cities['Wisconsin city, US'] =  twitter_parser.daily_count_words(['wisconsin'], prsd_tweets)


cities_w = {}
cities_w['Cork city, Ireland'] =  twitter_parser.daily_count_words(['cork'], prsd_tweets)
cities_w['Limerick city, Ireland'] =  twitter_parser.daily_count_words(['limerick'], prsd_tweets)
cities_w['Toronto city, Canada'] =  twitter_parser.daily_count_words(['toronto'], prsd_tweets)
cities_w['Dhaka city, Bangladesh'] =  twitter_parser.daily_count_words(['dhaka'], prsd_tweets)
cities_w['Manchester city, England'] =  twitter_parser.daily_count_words(['manchester'], prsd_tweets)
cities_w['Mumbai city, India'] =  twitter_parser.daily_count_words(['mumbai'], prsd_tweets)
cities_w['Tulcea city, Romania'] =  twitter_parser.daily_count_words(['tulcea'], prsd_tweets)

us_pre = {}
us_pre['Obama'] =  twitter_parser.daily_count_words(['obama'], prsd_tweets)
us_pre['Trump'] =  twitter_parser.daily_count_words(['trump'], prsd_tweets)


df1 = pd.DataFrame(region )
df2 = pd.DataFrame(cond )
df3 = pd.DataFrame(science )
df4 = pd.DataFrame(health )
df5 = pd.DataFrame(work )
df6 = pd.DataFrame(news )
df7 = pd.DataFrame(elec)
df8 = pd.DataFrame(typ)
df9 = pd.DataFrame(cities)
df10 = pd.DataFrame(cities_w)
df11 = pd.DataFrame(us_pre)


df1[1:-2].plot(kind='area')
df2[1:-2].plot(kind='area')
df3[1:-2].plot(kind='area')
df4[1:-2].plot(kind='area')
df5[1:-2].plot(kind='area')
df6[1:-2].plot(kind='area')
df7[1:-2].plot(kind='area')
df8[1:-2].plot(kind='area')
df9[1:-2].plot(kind='area')
df10[1:-2].plot(kind='area')
df11[1:-2].plot(kind='area')


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x110c72810>

In [4]:
df1[-10:]


Out[4]:
California India UK US World
2015-12-23 22 7 46 188 20
2015-12-24 8 9 23 140 23
2015-12-25 34 11 18 95 20
2015-12-26 42 66 142 148 69
2015-12-27 21 74 214 234 43
2015-12-28 23 43 237 251 63
2015-12-29 52 22 118 286 59
2015-12-30 10 5 41 88 12
2016-01-04 0 2 0 1 0
2016-03-04 0 0 0 0 0

In [4]:
import twitter_semantics


Using Theano backend.

In [5]:
prsd_tweets, m = twitter_semantics.semantic_analysis(prsd_tweets)


Found file 'weights.h5' in root folder
Running semantics analysis with preloaded weights
If you want to re-train the model, please, delete 'weights.h5' file and run this method again
For different weight file::: Please, pass the filepath as second argument
Reading Stanford Semantics Database
Found 1600000 entries
Preparing tweets
Compiling Keras model...
Loading Weights from file 'weights.h5'
Updating parsed tweets
Predicting sentiments...

/usr/local/lib/python2.7/site-packages/theano/scan_module/scan_perform_ext.py:133: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility
  from scan_perform.scan_perform import *

In [10]:
prsd_tweets[0]


Out[10]:
{'date': datetime.datetime(2015, 12, 13, 0, 0),
 'emo': 0.5551493763923645,
 'id': 0,
 'text': 'Nutella, rice crackers, instant noodles, flavored mineral water, and  breakfast biscuits are all on on the list... http://fb.me/72ugXjppE'}

In [12]:
s = twitter_semantics.daily_count_semantics_for_words(['study','ice'], prsd_tweets)

In [16]:
df_s = pd.DataFrame(s)
df_s[-10:]


Out[16]:
negative positive tolerant
2015-12-21 NaN 37 36
2015-12-22 NaN 26 17
2015-12-23 NaN 13 20
2015-12-24 NaN 7 20
2015-12-25 NaN 11 6
2015-12-26 NaN 23 20
2015-12-27 NaN 37 30
2015-12-28 NaN 44 39
2015-12-29 NaN 37 23
2015-12-30 NaN 19 6

In [17]:
df_s.fillna(0, inplace=True)
df_sa = pd.DataFrame()
df_sa['Negative'] = df_s['negative']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa['Tolerant'] = df_s['tolerant']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa['Positive'] = df_s['positive']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa.plot(kind='area',colormap='winter')


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x1603b8b50>

In [67]:
import matplotlib.pylab as pylab
font = {'family' : 'normal',
        'weight' : 'normal',
        'size'   : 18}
pylab.rcParams['figure.figsize'] = 12, 8 
pylab.rc('font', **font)
df_sa.plot(kind='area',colormap='winter')


Out[67]:
<matplotlib.axes._subplots.AxesSubplot at 0x1721be5d0>

In [22]:
prsd_tweets = twitter_parser.parse_folder(nltk_lib=True)


Found files
Data/dec13-dec6.txt
Data/dec17-dec14.txt
Data/dec30-dec18.txt
Data/dec5-nov30.txt
Total amount of tweets: 28069
Fifty most common words in tweets:
[('water', 30549), ('news', 3557), ('nov', 1703), ('flint', 1041), ('conservation', 958), ('drinking', 896), ('california', 815), ('crisis', 794), ('clean', 771), ('world', 754), ('watkins', 714), ('alyssa', 711), ('city', 710), ('hot', 692), ('climate', 668), ('video', 649), ('state', 624), ('drink', 611), ('drought', 606), ('supply', 602), ('yukohill', 574), ('health', 510), ('us', 508), ('flood', 508), ('energy', 485), ('levels', 481), ('global', 481), ('michigan', 479), ('food', 468), ('india', 452), ('main', 451), ('people', 447), ('wendy', 440), ('epa', 432), ('emergency', 427), ('change', 416), ('break', 416), ('floods', 403), ('air', 389), ('one', 360), ('finds', 359), ('tap', 359), ('power', 354), ('social', 353), ('uk', 350), ('first', 346), ('chennai', 346), ('home', 337), ('media', 335), ('today', 334)]

In [ ]: