In [1]:
from nltk.twitter import Query, credsfromfile
import numpy as np
import os
import sys
import pickle
sys.path.append("../bhtsa")
from twitter_senti_analyzer import senti_score_daily
# settings
oauth = credsfromfile()
client = Query(**oauth)
# start from Dec-06 21:30, take 10 tweets every 5 minute for an hour
twtNum = 100
startTime = [2016, 12, 1]
days = 10
path = os.path.join(os.getcwd()[:-4], 'data', 'model')
f = open(os.path.join(path, 'NBClassifier.pickle'), 'r')
NBC = pickle.load(f)
print NBC.informative_features()
In [2]:
keyword1 = 'duke'
score_all1 = senti_score_daily(keyword1, client, NBC, twtNum, startTime, days, 1)
keyword2 = 'unc'
score_all2 = senti_score_daily(keyword2, client, NBC, twtNum, startTime, days, 1)
In [3]:
import datetime as dt
origin = dt.date(startTime[0], startTime[1], startTime[2])
dates = []
for i in range(days):
next_val = origin + dt.timedelta(days=i)
dates.append(next_val)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, ax = plt.subplots()
plt.xticks(rotation=70)
plt.plot(dates, np.mean(score_all1, axis=0), color='blue', linewidth=2, label='Duke')
plt.plot(dates, np.mean(score_all2, axis=0), color='red', linewidth=2, label='UNC')
ax.xaxis.set_major_formatter(DateFormatter('%m-%d'))
plt.xlabel('days')
plt.ylabel('score')
plt.title('Sentiment Score of Duke/UNC')
plt.legend(loc="upper left")
plt.grid(True)
plt.show()
In [ ]: