In [1]:
import twitter_parser
In [ ]:
twitter_parser.scrape_page('https://twitter.com/search?f=news&vertical=news&q=water%20since%3A2015-09-01%20until%3A2015-10-18&src=typd&lang=en')
In [2]:
prsd_tweets = twitter_parser.parse_folder()
In [5]:
%matplotlib inline
import pandas as pd
import matplotlib.pylab as pylab
pylab.rcParams['figure.figsize'] = 16, 8 # that's default image size for this interactive session
region = {}
region['UK'] = twitter_parser.daily_count_words(['uk'], prsd_tweets)
region['US'] = twitter_parser.daily_count_words(['us'], prsd_tweets)
region['California'] = twitter_parser.daily_count_words(['california'], prsd_tweets)
region['India'] = twitter_parser.daily_count_words(['india'], prsd_tweets)
region['World'] = twitter_parser.daily_count_words(['world','global'], prsd_tweets)
cond = {}
cond['Droughts'] = twitter_parser.daily_count_words(['drought'], prsd_tweets)
cond['Floods'] = twitter_parser.daily_count_words(['flood'], prsd_tweets)
science = {}
science['Mars'] = twitter_parser.daily_count_words(['mars'], prsd_tweets)
science['Study'] = twitter_parser.daily_count_words(['study'], prsd_tweets)
science['Climate'] = twitter_parser.daily_count_words(['climate'], prsd_tweets)
science['Ice'] = twitter_parser.daily_count_words(['ice'], prsd_tweets)
science['Plastic'] = twitter_parser.daily_count_words(['plastic'], prsd_tweets)
science['Contamination'] = twitter_parser.daily_count_words(['toxic','contaminat'], prsd_tweets)
health = {}
health['Drink'] = twitter_parser.daily_count_words(['drink'], prsd_tweets)
health['Fish'] = twitter_parser.daily_count_words(['fish'], prsd_tweets)
health['Health'] = twitter_parser.daily_count_words(['health'], prsd_tweets)
health['Clean'] = twitter_parser.daily_count_words(['clean'], prsd_tweets)
health['Monitoring'] = twitter_parser.daily_count_words(['monitor','assesment'], prsd_tweets)
health['Water Poverty'] = twitter_parser.daily_count_words(['poverty'], prsd_tweets)
work = {}
work['Farmers'] = twitter_parser.daily_count_words(['farmer'], prsd_tweets)
work['Industry'] = twitter_parser.daily_count_words(['industr'], prsd_tweets)
news = {}
news['CTV'] = twitter_parser.daily_count_words(['ctv'], prsd_tweets)
news['CBC'] = twitter_parser.daily_count_words(['cbc'], prsd_tweets)
news['BBC'] = twitter_parser.daily_count_words(['bbc'], prsd_tweets)
news['CNN'] = twitter_parser.daily_count_words(['cnn'], prsd_tweets)
news['NBC'] = twitter_parser.daily_count_words(['nbc'], prsd_tweets)
news['FOXTV'] = twitter_parser.daily_count_words(['foxtv'], prsd_tweets)
elec = {}
elec['NDP'] = twitter_parser.daily_count_words(['ndp'], prsd_tweets)
elec['Conservative'] = twitter_parser.daily_count_words(['pc'], prsd_tweets)
elec['Liberal'] = twitter_parser.daily_count_words(['liberal'], prsd_tweets)
typ={}
typ['Sea'] = twitter_parser.daily_count_words(['sea'], prsd_tweets)
typ['Ocean'] = twitter_parser.daily_count_words(['ocean'], prsd_tweets)
typ['Lake'] = twitter_parser.daily_count_words(['lake'], prsd_tweets)
typ['River'] = twitter_parser.daily_count_words(['river'], prsd_tweets)
typ['Wetlands'] = twitter_parser.daily_count_words(['wetlands','marsh','bog','swamp'], prsd_tweets)
cities = {}
cities['Baltimore city, US'] = twitter_parser.daily_count_words(['baltimore'], prsd_tweets)
cities['Bay city , US'] = twitter_parser.daily_count_words(['bay city'], prsd_tweets)
cities['Desmoine city, US'] = twitter_parser.daily_count_words(['desmoine'], prsd_tweets)
cities['Lancaster city, US'] = twitter_parser.daily_count_words(['lancaster'], prsd_tweets)
cities['Portland city, US'] = twitter_parser.daily_count_words(['portland'], prsd_tweets)
cities['Tulsa city, US'] = twitter_parser.daily_count_words(['tulsa'], prsd_tweets)
cities['Wisconsin city, US'] = twitter_parser.daily_count_words(['wisconsin'], prsd_tweets)
cities_w = {}
cities_w['Cork city, Ireland'] = twitter_parser.daily_count_words(['cork'], prsd_tweets)
cities_w['Limerick city, Ireland'] = twitter_parser.daily_count_words(['limerick'], prsd_tweets)
cities_w['Toronto city, Canada'] = twitter_parser.daily_count_words(['toronto'], prsd_tweets)
cities_w['Dhaka city, Bangladesh'] = twitter_parser.daily_count_words(['dhaka'], prsd_tweets)
cities_w['Manchester city, England'] = twitter_parser.daily_count_words(['manchester'], prsd_tweets)
cities_w['Mumbai city, India'] = twitter_parser.daily_count_words(['mumbai'], prsd_tweets)
cities_w['Tulcea city, Romania'] = twitter_parser.daily_count_words(['tulcea'], prsd_tweets)
us_pre = {}
us_pre['Obama'] = twitter_parser.daily_count_words(['obama'], prsd_tweets)
us_pre['Trump'] = twitter_parser.daily_count_words(['trump'], prsd_tweets)
df1 = pd.DataFrame(region )
df2 = pd.DataFrame(cond )
df3 = pd.DataFrame(science )
df4 = pd.DataFrame(health )
df5 = pd.DataFrame(work )
df6 = pd.DataFrame(news )
df7 = pd.DataFrame(elec)
df8 = pd.DataFrame(typ)
df9 = pd.DataFrame(cities)
df10 = pd.DataFrame(cities_w)
df11 = pd.DataFrame(us_pre)
df1[1:-2].plot(kind='area')
df2[1:-2].plot(kind='area')
df3[1:-2].plot(kind='area')
df4[1:-2].plot(kind='area')
df5[1:-2].plot(kind='area')
df6[1:-2].plot(kind='area')
df7[1:-2].plot(kind='area')
df8[1:-2].plot(kind='area')
df9[1:-2].plot(kind='area')
df10[1:-2].plot(kind='area')
df11[1:-2].plot(kind='area')
Out[5]:
In [4]:
df1[-10:]
Out[4]:
In [4]:
import twitter_semantics
In [5]:
prsd_tweets, m = twitter_semantics.semantic_analysis(prsd_tweets)
In [10]:
prsd_tweets[0]
Out[10]:
In [12]:
s = twitter_semantics.daily_count_semantics_for_words(['study','ice'], prsd_tweets)
In [16]:
df_s = pd.DataFrame(s)
df_s[-10:]
Out[16]:
In [17]:
df_s.fillna(0, inplace=True)
df_sa = pd.DataFrame()
df_sa['Negative'] = df_s['negative']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa['Tolerant'] = df_s['tolerant']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa['Positive'] = df_s['positive']/(df_s['negative']+df_s['positive']+df_s['tolerant'])*100
df_sa.plot(kind='area',colormap='winter')
Out[17]:
In [67]:
import matplotlib.pylab as pylab
font = {'family' : 'normal',
'weight' : 'normal',
'size' : 18}
pylab.rcParams['figure.figsize'] = 12, 8
pylab.rc('font', **font)
df_sa.plot(kind='area',colormap='winter')
Out[67]:
In [22]:
prsd_tweets = twitter_parser.parse_folder(nltk_lib=True)
In [ ]: