In [299]:
!pip install oauth2
!pip install django
!pip install textblob
!pip install plotly
In [300]:
import oauth2 as oauth
import urllib2 as urllib
import json
from django.utils.encoding import smart_str
import matplotlib.pyplot as plt
import bokeh
from bokeh.plotting import figure, ColumnDataSource
from bokeh.io import show, output_notebook
from bokeh.layouts import row, column, gridplot
from bokeh.models import HoverTool
from bokeh.models.widgets import Panel, Tabs
import pandas as pd
import numpy as np
from textblob import TextBlob
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
plotly.tools.set_credentials_file(username='imdimd123', api_key='CAQgLv5gFyj0ZMubi9IQ')
In [301]:
# coding: utf-8
def is_ascii(s):
return all(ord(c) < 128 for c in s)
api_key = "YU0KmCt8JMhSml0EwurQDvUtv"
api_secret = "Iuu7zawVK2zmMJMeRgFGDl19LsqhkQs19MmKNUZSrCh7qk5oCq"
access_token_key = "825902122893971458-KMgjVVWqMibax6bDz8IwqOhzzQCQgnf"
access_token_secret = "wdfiGxkPBoRa6pWyhQE5FW2soUUAMeOCnuzKpVeSvIlGI"
_debug = 0
oauth_token = oauth.Token(key=access_token_key, secret=access_token_secret)
oauth_consumer = oauth.Consumer(key=api_key, secret=api_secret)
signature_method_hmac_sha1 = oauth.SignatureMethod_HMAC_SHA1()
http_method = "GET"
http_handler = urllib.HTTPHandler(debuglevel=_debug)
https_handler = urllib.HTTPSHandler(debuglevel=_debug)
def twitterreq(url, method, parameters):
req = oauth.Request.from_consumer_and_token(oauth_consumer,
token=oauth_token, http_method=http_method, http_url=url,
parameters=parameters)
req.sign_request(signature_method_hmac_sha1, oauth_consumer, oauth_token)
headers = req.to_header()
if http_method == "POST":
encoded_post_data = req.to_postdata()
else:
encoded_post_data = None
url = req.to_url()
opener = urllib.OpenerDirector()
opener.add_handler(http_handler)
opener.add_handler(https_handler)
response = opener.open(url, encoded_post_data)
return response
#def fetchsamples():
# url = "https://stream.twitter.com/1.1/statuses/sample.json"
# parameters = []
# response = twitterreq(url, "GET", parameters)
# for line in response:
# print line.strip()
def create_dictionary(sentiment):
afinn_file = sentiment
scores = {}
for line in afinn_file:
term, score = line.split('\t')
scores[term] = int(score)
return scores
def create_west_coast_list(tweets):
tweets_list = tweets
list_of_tweets = []
for line in tweets_list:
tweet = json.loads(line)
if tweet.has_key('user'):
user = tweet["user"]
if(user.has_key("location") and user["location"] is not None):
location = user["location"]
if(type(location) is not str):
location = smart_str(location)
if(not is_ascii(location)):
continue
#print(location)
if(location.endswith(", CA") or location.endswith(", OR") or location.endswith(", WA") or location.endswith(", HI") or location.endswith(", AK")):
if tweet.has_key('text'):
text = tweet['text']
text = smart_str(text)
text = unicode( text, "utf-8" )
list_of_tweets.append(text)
return list_of_tweets
def create_east_coast_list(tweets):
tweets_list = tweets
list_of_tweets = []
for line in tweets_list:
tweet = json.loads(line)
if tweet.has_key('user'):
user = tweet["user"]
if(user.has_key("location") and user["location"] is not None):
location = user["location"]
if(type(location) is not str):
location = smart_str(location)
if(not is_ascii(location)):
continue
#print(location)
if(location.endswith(", VA") or location.endswith(", DE") or location.endswith(", DC") or location.endswith(", PA") or location.endswith(", NJ") or location.endswith(", NY") or location.endswith(", ME") or location.endswith(", MA") or location.endswith(", NH") or location.endswith(", VT") or location.endswith(", CT")):
if tweet.has_key('text'):
text = tweet['text']
text = smart_str(text)
text = unicode( text, "utf-8" )
list_of_tweets.append(text)
return list_of_tweets
def create_south_list(tweets):
tweets_list = tweets
list_of_tweets = []
for line in tweets_list:
tweet = json.loads(line)
if tweet.has_key('user'):
user = tweet["user"]
if(user.has_key("location") and user["location"] is not None):
location = user["location"]
if(type(location) is not str):
location = smart_str(location)
if(not is_ascii(location)):
continue
#print(location)
if(location.endswith(", TX") or location.endswith(", OK") or location.endswith(", LA") or location.endswith(", MS") or location.endswith(", AR") or location.endswith(", FL") or location.endswith(", AL") or location.endswith(", GA") or location.endswith(", SC") or location.endswith(", NC") or location.endswith(", MD") or location.endswith(", TN") or location.endswith(", WV")):
if tweet.has_key('text'):
text = tweet['text']
text = smart_str(text)
text = unicode( text, "utf-8" )
list_of_tweets.append(text)
return list_of_tweets
def create_midwest_list(tweets):
tweets_list = tweets
list_of_tweets = []
for line in tweets_list:
tweet = json.loads(line)
if tweet.has_key('user'):
user = tweet["user"]
if(user.has_key("location") and user["location"] is not None):
location = user["location"]
if(type(location) is not str):
location = smart_str(location)
if(not is_ascii(location)):
continue
#print(location)
if(location.endswith(", ND") or location.endswith(", SD") or location.endswith(", NE") or location.endswith(", KS") or location.endswith(", MN") or location.endswith(", IO") or location.endswith(", MO") or location.endswith(", WI") or location.endswith(", MI") or location.endswith(", OH") or location.endswith(", IL") or location.endswith(", IN") or location.endswith(", KY")):
if tweet.has_key('text'):
text = tweet['text']
text = smart_str(text)
text = unicode( text, "utf-8" )
list_of_tweets.append(text)
return list_of_tweets
def create_rocky_mountains_list(tweets):
tweets_list = tweets
list_of_tweets = []
for line in tweets_list:
tweet = json.loads(line)
if tweet.has_key('user'):
user = tweet["user"]
if(user.has_key("location") and user["location"] is not None):
location = user["location"]
if(type(location) is not str):
location = smart_str(location)
if(not is_ascii(location)):
continue
#print(location)
if(location.endswith(", MT") or location.endswith(", ID") or location.endswith(", WY") or location.endswith(", NE") or location.endswith(", UT") or location.endswith(", CO") or location.endswith(", AZ") or location.endswith(", NM")):
if tweet.has_key('text'):
text = tweet['text']
if(is_ascii(text)):
if(type(text) is not str):
continue
list_of_tweets.append(text)
return list_of_tweets
def calculate_scores(tweets, dictionary):
scores = []
temp = []
for i in tweets:
score = TextBlob(i).sentiment.polarity
if(score != 0):
scores.append(score)
temp.append(i)
return temp, scores
if __name__ == '__main__':
list_tweets = []
scores_west_coast = []
scores_east_coast = []
scores_south = []
scores_midwest = []
scores_rocky_mountains = []
# fetchsamples()
sent_file = open("AFINN-111.txt")
#tweet_file = open("three_minutes_tweets.json")
tweet_file = open("tweets.json")
dictionary = create_dictionary(sent_file)
west_coast_tweets = create_west_coast_list(tweet_file)
sent_file = open("AFINN-111.txt")
#tweet_file = open("three_minutes_tweets.json")
tweet_file = open("tweets.json")
dictionary = create_dictionary(sent_file)
east_coast_tweets = create_east_coast_list(tweet_file)
sent_file = open("AFINN-111.txt")
#tweet_file = open("three_minutes_tweets.json")
tweet_file = open("tweets.json")
dictionary = create_dictionary(sent_file)
south_tweets = create_south_list(tweet_file)
sent_file = open("AFINN-111.txt")
#tweet_file = open("three_minutes_tweets.json")
tweet_file = open("tweets.json")
dictionary = create_dictionary(sent_file)
midwest_tweets = create_midwest_list(tweet_file)
sent_file = open("AFINN-111.txt")
#tweet_file = open("three_minutes_tweets.json")
tweet_file = open("tweets.json")
dictionary = create_dictionary(sent_file)
rocky_mountains_tweets = create_rocky_mountains_list(tweet_file)
west_coast_tweets, scores_west_coast = calculate_scores(west_coast_tweets, dictionary)
east_coast_tweets, scores_east_coast = calculate_scores(east_coast_tweets, dictionary)
south_tweets, scores_south = calculate_scores(south_tweets, dictionary)
midwest_tweets, scores_midwest = calculate_scores(midwest_tweets, dictionary)
rocky_mountains_tweets, scores_rocky_mountains = calculate_scores(rocky_mountains_tweets, dictionary)
# print(west_coast_tweets)
"""print(east_coast_tweets)
print(south_tweets)
print(midwest_tweets)
print(rocky_mountains_tweets)
print(scores_west_coast)
print(scores_east_coast)
print(scores_south)
print(scores_midwest)
print(scores_rocky_mountains)"""
In [302]:
west_coast_df = pd.DataFrame({
"west coast tweets": west_coast_tweets,
"scores tweets west coast": scores_west_coast
})
west_coast_df
Out[302]:
In [303]:
east_coast_df = pd.DataFrame({
"east coast tweets": east_coast_tweets,
"scores tweets east coast": scores_east_coast
})
east_coast_df
Out[303]:
In [304]:
south_df = pd.DataFrame({
"south tweets": south_tweets,
"scores tweets south": scores_south
})
south_df
Out[304]:
In [305]:
midwest_df = pd.DataFrame({
"midwest tweets": midwest_tweets,
"scores tweets midwest": scores_midwest
})
midwest_df
Out[305]:
In [306]:
rocky_mountains_df = pd.DataFrame({
"rocky mountains tweets": rocky_mountains_tweets,
"scores tweets rocky mountains": scores_rocky_mountains
})
rocky_mountains_df
Out[306]:
In [307]:
y0 = scores_west_coast
y1 = scores_east_coast
y2 = scores_south
y3 = scores_midwest
y4 = scores_rocky_mountains
trace0 = go.Box(
y=y0,
name= "west_coast",
boxpoints='all',
boxmean='sd'
)
trace1 = go.Box(
y=y1,
name= "east_coast",
boxpoints='all',
boxmean='sd'
)
trace2 = go.Box(
y=y2,
name= "south",
boxpoints='all',
boxmean='sd'
)
trace3 = go.Box(
y=y3,
name= "midwest",
boxpoints='all',
boxmean='sd'
)
trace4 = go.Box(
y=y4,
name= "rocky_mountains",
boxpoints='all',
boxmean='sd'
)
data = [trace0, trace1, trace2, trace3, trace4]
py.iplot(data)
Out[307]:
In [308]:
fig = {
'data': [
{
'labels': ['positive', 'negative'],
'values': values0,
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['00FF00',
'#FF0000']},
'domain': {'x': [0, .48],
'y': [0, .49]},
'hoverinfo':'label+percent+name',
'textinfo':'percent'
},
{
'labels': ['positive', 'negative'],
'values': values1,
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['00FF00',
'#FF0000']},
'domain': {'x': [.52, 1],
'y': [0, .49]},
'hoverinfo':'label+percent+name',
'textinfo':'percent'
},
{
'labels': ['positive', 'negative'],
'values': values2,
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['00FF00',
'#FF0000']},
'domain': {'x': [0, .48],
'y': [.51, 1]},
'hoverinfo':'label+percent+name',
'textinfo':'percent'
},
{
'labels': ['positive', 'negative'],
'values': values3,
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['00FF00',
'#FF0000']},
'domain': {'x': [.52, 1],
'y': [.51, 1]},
'hoverinfo':'label+percent+name',
'textinfo':'percent'
},
{
'labels': ['positive', 'negative'],
'values': values4,
'type': 'pie',
'name': 'Starry Night',
'marker': {'colors': ['00FF00',
'#FF0000']},
'domain': {'x': [.0, 1],
'y': [.51, 1]},
'hoverinfo':'label+percent+name',
'textinfo':'percent'
},
],
'layout': {
'title': 'Van Gogh: 5 Most Prominent Colors Shown Proportionally',
'showlegend': False,
"annotations": [
{
"font": {
"size": 20
},
"showarrow": False,
"text": "South",
"x": 0.20,
"y": 1.1
},
{
"font": {
"size": 20
},
"showarrow": False,
"text": "Midwest",
"x": 0.78,
"y": 1.1
},
{
"font": {
"size": 20
},
"showarrow": False,
"text": "West",
"x": 0.22,
"y": -0.1
},
{
"font": {
"size": 20
},
"showarrow": False,
"text": "East",
"x": 0.82,
"y": -0.1
},
{
"font": {
"size": 20
},
"showarrow": False,
"text": "Rocky mountains",
"x": 0.5,
"y": 1.1
}
]
}
}
py.iplot(fig, filename='pie_chart_subplots')
Out[308]:
In [ ]: