Initial analysis of tweet sentiment data

dependencies:

pandas
ggplot
seaborn
statsmodels



In [1]:

    
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#from ggplot import *
from pandas import Series, DataFrame, Panel
import seaborn as sns
import statsmodels.tsa.api
import datetime
import pandas.io.data
from pandas.tools.plotting import *

%pylab qt









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['ceil', 'table', 'boxplot']
`%matplotlib` prevents importing * from pylab and numpy



In [2]:

    
# date converter function for pandas time series to add a new, non-indexed column
# to the set for ggplot
def dateConvert(df):
    dt = df.index
    df['Date'] = dt
    df.reset_index(drop=True)
    return df



In [3]:

    
pylab.rcParams['figure.figsize'] = (14, 14.0)



In [3]:



In [3]:



In [3]:



In [3]:

Here we read the data from our csv file into a data frame, parsing the date column as dates and indexing on dates.



In [4]:

    
tweet_data = pd.read_csv('tweet_sentiment_scores.csv', parse_dates=[0], index_col=['time'])



In [5]:

    
#sanity check
tweet_data.keys()









    Out[5]:





Index([u'total_tweets', u'lsi_words', u'lda_words', u'lsi_score', u'lda_score'], dtype='object')



In [6]:

    
words = tweet_data.lsi_words
print words['2014-10-20']









    



time
2014-10-20 07:00:00    [u'is', u'new', u'ebola', u'facebook', u'poste...
2014-10-20 08:00:00    [u'facebook', u'new', u'is', u'posted', u'phot...
2014-10-20 09:00:00    [u'is', u'facebook', u'new', u'posted', u'phot...
2014-10-20 10:00:00    [u'\u2713', u'users', u'new', u'follow', u'htt...
2014-10-20 11:00:00    [u'is', u'new', u'facebook', u'posted', u'phot...
2014-10-20 12:00:00    [u'new', u'is', u'facebook', u'posted', u'vide...
2014-10-20 13:00:00    [u'@', u'is', u'big', u'take', u'fan', u'cover...
2014-10-20 14:00:00    [u'video', u'liked', u'new', u'music', u'can',...
2014-10-20 15:00:00    [u'video', u'@youtube', u'liked', u'new', u'is...
2014-10-20 16:00:00    [u'video', u'@youtube', u'liked', u'is', u'new...
2014-10-20 17:00:00    [u'now', u'\u2026', u'subscribe', u'daily', u'...
2014-10-20 18:00:00    [u'\u2026', u'now', u'subscribe', u'daily', u'...
2014-10-20 19:00:00    [u'facebook', u'new', u'posted', u'photo', u'r...
2014-10-20 20:00:00    [u'facebook', u'new', u'posted', u'photo', u'i...
2014-10-20 21:00:00    [u'facebook', u'new', u'posted', u'photo', u'w...
2014-10-20 22:00:00    [u'facebook', u'page', u'fan', u'likes', u'$5'...
Name: lsi_words, dtype: object



In [7]:

    
word2 = words['2014-10-31'][0]
word2 = word2.split(']')
print word2









    



['[u\'@youtube\', u\'get\', u\'people\', u\'watch\', u\'must\', u\'doing\', u\'videos.\', u\'held\', u\'will,\', u\'should,\', u\'new\', u\'facebook\', u\'posted\', u\'photo\', u\'is\', u\'video\', u\'photos\', u\'album\', u\'song\', u\'are\', u\'@youtube\', u\'video\', u\'liked\', u\'is\', u\'playlist\', u\'added\', u\'get\', u\'people\', u\'held\', u\'will,\', u\'is\', u\'apple\', u\'@youtube\', u\'video\', u\'liked\', u\'ebola\', u\'tim\', u\'are\', u\'ceo\', u\'google\', u\'apple\', u\'iphone\', u\'is\', u\'price\', u\'full\', u\'read\', u\'ebay:\', u\'usd\', u\'bids)\', u\'end\', u\'honor\', u\'sacrifice\', u\'vote!!!\', u\'@talbertswan:\', u\'http://t.co/ywhb8l0mvt\', u\'ancestors,\', u\'new\', u\'free\', u\'google\', u\'happy\', u\'free\', u\'new\', u\'happy\', u\'halloween\', u\'facebook\', u\'google\', u\'get\', u\'is\', u\'#news\', u\'#youtube\', u\'free\', u\'new\', u\'facebook\', u\'tim\', u\'#youtube\', u\'cook\', u\'ceo\', u\'#news\', u\'#pussy\', u\'#pics\', u\'happy\', u\'halloween\', u\'new\', u\'free\', u\'facebook\', u\'are\', u\'#news\', u\'#youtube\', u\'#pics\', u\'#pussy\', u\'new\', u\'facebook\', u\'tim\', u\'apple\', u\'ceo\', u\'cook\', u\'free\', u\'gay\', u\'full\', u\'price\', u\'google\', u\'fb\', u\'page\', u\'ebola\', u\'happy\', u\'new\', u\'are\', u\'facebook\', u\'halloween\', u\'andy\', u\'ebola\', u\'google\', u\'are\', u\'look\', u\'worried\', u\'travel\', u\'apple\', u\'impact\', u\'potential\', u\'jal\', u\'#news\', u\'#youtube\', u\'#pussy\', u\'#pics\', u\'free\', u\'win\', u\'null\', u\'stuff\', u\'are\', u\'bid!!!\', u\'are\', u\'ebola\', u\'google\', u\'apple\', u\'halloween\', u\'free\', u\'youtube\', u\'win\', u\'null\', u\'stuff\', u\'apple\', u\'tim\', u\'\\U0001f4f1\', u\'iglass\', u\'@appieofficiel:\', u\'http://t.co/d2vcocotqt\', u\'has\', u\'are\', u\'gay\', u\'cook\', u\'was\', u\'andy\', u\'rubin\', u\'has\', u\'co-founder\', u\'fb\', u\'tim\', u\'leaves\', u\'rubin,\', u\'android,\', u\'get\', u\'are\', u\'amazon\', u\'@youtube\', u\'win\', u\'null\', u\'stuff\', u\'video\', u\'https://t.co/28l7xxtqnr\', u\'bid!!!\', u\':\', u\'love\', u\'get\', u\'video\', u\'@youtube\', u\'are\', u\'2014\', u\'now\', u\'liked\', u\'can\', u\'youtube\', u\'@youtube\', u\'video\', u\'love\', u\'get\', u\':\', u\'are\', u\'amazon\', u\'will\', u\'facebook\', u\'@youtube\', u\':\', u\'video\', u\'love\', u\'now\', u\'liked\', u\'please\', u\'was\', u\'youtube\', u\'will\', u\'love\', u\'was\', u\'google\', u\'fb\', u\'now\', u\'youtube\', u\'page\', u\'12\', u\'6\', u\'iphone\', u\'now\', u\'has\', u\'google\', u\'please\', u\'was\', u\'love\', u\'youtube\', u\'more\', u\'6\', u\'need\', u\'love\', u\'youtube\', u\'google\', u\'@youtube\', u\'video\', u\'fb\', u\'now\', u\'page\', u\'has\', u\'super\', u\'youtube\', u\'please\', u\'facebook\', u\'google\', u\'photo\', u\'need\', u\'song\', u\'will\', u\'talk\', u\'was\', u\'will\', u\'more\', u\'has\', u\'now\', u\'cook\', u\'announced\', u\'tim\', u\'please\', u\'can\', u"he\'s", u\'will\', u\'please\', u\'song\', u\'youtube\', u\'need\', u\'more\', u\'talk\', u\'opinions\', u\'a.d\', u\'#thanksalot\', u\'amazon\', u\'get\', u\'2014\', u\'just\', u\'gift\', u\'&gt;&gt;&gt;\', u\'more\', u\'\\u2013\', u\'has\', u\'people\', u\'more\', u\'now\', u\'has\', u\'just\', u\'was\', u\'amazon\', u\'can\', u\'get\', u\'people\', u"can\'t", u\'just\', u\'more\', u\'can\', u\'will\', u\'2014\', u\'was\', u\'check\', u\'youtube\', u\'6\', u\'are\', u\'2014\', u\'liked\', u\'added\', u\'playlist\', u\'amazon\', u\'now\', u\'will\', u\'photo\', u\'october\', u\'check\', u\'liked\', u\'added\', u\'playlist\', u\'has\', u\'now\', u\'2014\', u\'see\', u\'can\', u\'6\', u\'iphone\', u\'can\', u\'check\', u\'has\', u\'just\', u\'2014\', u\'see\', u\'added\', u\'playlist\', u\'liked\', u\'google\', u\'justcameout\', u\'andimsure\', u\'youwill\', u\'youshould\', u\'likeit:)\', u\'followme:)myfirstsingle\', u\'http://t.co/p9tdr2znc7\', u\'heyif\', u\'youlike\', u\'coldplay\', u\'check\', u\'see\', u\'2014\', u\'added\', u\'playlist\', u\'was\', u\'liked\', u\'now\', u\'photo\', u\'just\', u\'2014\', u\'apple\', u\'\\U0001f4f1\', u\'http://t.co/d2vcocotqt\', u\'iglass\', u\'@appieofficiel:\', u\'iphone\', u\'6\', u\'was\', u\'more\', u\'halloween\', u\'happy\', u\'photo\', u\'facebook\', u\'2014\', u\'posted\', u\'added\', u\'can\', u\'playlist\', u\'please\', u\'halloween\', u\'happy\', u\'check\', u\'2014\', u\'photo\', u\'can\', u\'facebook\', u\'halloween!\', u\'posted\', u\'day\', u\'check\', u\'can\', u\'now\', u\'travel\', u\'impact\', u\'potential\', u\'worried\', u\'jal\', u\'has\', u\'more\', u\'can\', u\'now\', u\'travel\', u\'impact\', u\'worried\', u\'potential\', u\'jal\', u\'2\', u\'check\', u\'halloween\', u\'2\', u\'\\u2026\', u\'free\', u\'amazon\', u\'posted\', u\'#apple\', u\'facebook\', u\'win\', u\'#itunes\', u\'see\', u\'2\', u\'free\', u\'2014\', u\'#apple\', u\'day\', u\'can\', u\'#itunes\', u\'get\', u\'#iphone\', u\'check\', u\'see\', u\'free\', u\'day\', u\'check\', u\'now\', u\'#apple\', u\'people\', u\'6\', u\'get\', u\'announced\', u\'day\', u\'best\', u\'2\', u\'2014\', u\'top\', u\'\\u2013\', u\'buy\', u\'modi\', u\'narendra\', u\'days\', u\'\\u2013\', u\'day\', u\'top\', u\'see\', u\'music\', u\'buy\', u\'has\', u\'days\', u\'2\', u\'100\', u\'\\u2013\', u\'day\', u\'top\', u\'buy\', u\'days\', u\'see\', u\'check\', u\'best\', u\'now\', u\'book\', u\'\\u2013\', u\'see\', u\'day\', u\'check\', u\'2\', u\'narendra\', u\'modi\', u\'\\u2026\', u\'prime\', u\'minister\', u\'see\', u\'has\', u\'#scarystoriesin5words\', u\'#apple\', u\'people\', u\'lebron\', u\'cleveland\', u\'#beenrapedneverreported\', u\'#unwantedhalloweentreats\', u\'westbrook\', u\'#apple\', u\'#iphone\', u\'free\', u\'#itunes\', u\'germany\', u\'book\', u\'album\', u\'get\', u\'photos\', u\'amazon\', u\'call\', u\'credit\', u\'counseling\', u\'consumer\', u\'254-4100\', u\'(800)\', u\'city\', u\'album\', u\'photos\', u\'1-800-254-4100\', u\'album\', u\'photos\', u\'posted\', u\'\\u2026\', u\'#scarystoriesin5words\', u\'lebron\', u\'cleveland\', u\'westbrook\', u\'#beenrapedneverreported\', u\'#unwantedhalloweentreats\', u\'day\', u\'#scarystoriesin5words\', u\'lebron\', u\'cleveland\', u\'westbrook\', u\'#beenrapedneverreported\', u\'#unwantedhalloweentreats\', u\'see\', u\'album\', u\'check\', u\'book\', u\'ruin\', u\'(the\', u\'series\', u\'top\', u\'part\', u\'music\', u\'iphone\', u\'was\', u\'microsoft\', u\'best\', u\'was\', u\'book\', u\'iphone\', u\'narendra\', u\'modi\', u\'#apple\', u\'6\', u\'prime\', u\'microsoft\', u\'\\u2026\', u\'2\', u\'good\', u\'music\', u\'microsoft\', u\'watch\', u\':)\', u\'best\', u\'album\', u\'book\', u\'good\', u\':)\', u\'\\u2026\', u"don\'t", u\'morning\', u\'time\', u\'microsoft\', u\'day\', u"i\'m", u\'know\', u\'best\', u\'top\', u\'album\', u\'iphone\', u\'6\', u\'photos\', u\'was\', u\'days\', u\'music\', u\'8\', u\'microsoft\', u\'song\', u\'amazon\', u\'please\', u\'click\', u\'game\', u\'lyrics\', u\'music\', u\'\\U0001f496\', u\'tyga\', u\'microsoft\', u\'amazon\', u\'ur\', u\'take\', u\'\\u2026\', u\'best\', u\'game\', u\'click\', u\'music\', u\'big\', u\'super\', u\'microsoft\', u\'music\', u\'good\', u\'\\u2013\', u\'message\', u\'fans\', u"don\'t", u\'want\', u\'best\', u\'microsoft\', u\'ur\', u\'take\', u\'song\', u\'news\', u\'neo\', u\'neos\', u\'#neowhere\', u\'@justinbieber:\', u\'u?\', u\'insects\', u\'hum\', u\'siles\', u"don\'t", u\'music\', u\'mix\', u\'(\', u\'h\\u2026\', u\'cr\', u\'want\', u\'music\', u\'buy\', u\'super\', u\'love\', u\'message\', u\'fans\', u\'more\', u\'look\', u\'official\', u"junior\'s", u"don\'t", u\'want\', u\':)\', u\'good\', u\'insects\', u\'hum\', u\'siles\', u\'super\', u\'know\', u\'buy\', u\'world\', u\'look\', u\'guidelines\', u\'first\', u\'states\', u"here\'s", u\'time\', u\'news\', u\'click\', u\'make\', u\'world\', u\'music\', u\'more\', u\'people\', u\'make\', u\'8\', u\'first\', u\'worth\', u\'great\', u\'look\', u\'look\', u\'8\', u\'more\', u\'worth\', u\'guidelines\', u\'stock\', u\'hands\', u\'execs\', u\'$27m\', u\'awards\', u\'song\', u\'lyrics\', u\'\\U0001f496\', u\'https://t.co/h3pb1cmakl\', u\'tyga\', u\'app\', u\'ur\', u\'take\', u\'6\', u\'music\', u\'chocolate,\', u\'trail\', u\'nut\', u\'mix,\', u\'planters\', u\'#3:\', u\'(pack\', u\'3):\', u\'19-oun...\', u\'19-ounce\', u\'app\', u\'look\', u\'chocolate,\', u\'trail\', u\'nut\', u\'planters\', u\'mix,\', u\'world\', u\'#app\', u\'...\', u\'more\', u\'look\', u\'make\', u\'people\', u\'first\', u\'fb\', u\'great\', u\'world\', u"can\'t", u"he\'s", u\'fb\', u\'app\', u\'3\', u\'6\', u"i\'m", u"don\'t", u\'#app\', u\'#iphone6\', u\'...\', u\'was\', u\'official\', u\'ipas2\', u\'time\', u\'review\', u\'vote\', u\'find\', u"i\'m", u\'world\', u\'first\', u\'...\', u\'time\', u\'ipas2\', u\'society\', u\'pranking\', u\'loins\', u\'purged\', u\'outrageous\', u\'official\', u\'vote\', u\'world\', u\'time\', u\'society\', u\'pranking\', u\'outrageous\', u\'purged\', u\'loins\', u\'first\', u\'#ourcitygogo\', u\'youtube\', u\'world\', u\'vote\', u\'news\', u\'1\', u\'click\', u"don\'t", u\'app\', u\'world\', u\'--&gt;\', u"i\'m", u\'ceo\', u\'@barbjonn:\', u\'birthday\', u\'doodle\', u\'french\', u\'saint\', u\'celebrates\', u\'phalle:\', u\'niki\', u\'#nikidesaintphalle\', u\'84th\', u\'...\', u\'says\', u\'world\', u\'5\', u\'ipas2\', u\'time\', u\'developer\', u\'jonas\', u\'#jonassalk\', u\'salk:\', u\'5\', u\'...\', u\'mac\', u\'first\', u\'buy\', u\'please\', u\'black\', u\'follow\', u\'make\', u\'need\', u\'fb\', u\'nurse\', u\'ceo\', u\'6\', u\'apple\', u\'bike\', u\'#timcook\', u\'quarantine\', u\'comes\', u\'defies\', u\'\\u2764\', u\'\\u266b\', u\'remix\', u\'up"\', u\'#bayarea\', u\'"hold\', u\'cdq\', u\'(mp3)\', u\'#bonthebeat)\', u\'(#619saint\'', '']



In [7]:



In [7]:



In [7]:



In [7]:



In [7]:



In [8]:

    
#tweet_data.total_tweets.rolplot()



In [8]:



In [8]:



In [8]:

    
tweet_dates = dateConvert(tweet_data)
tweet_dates.head()
tweet_dates = tweet_dates[['Date','lsi_score','lda_score']]



In [9]:

    
data = pd.melt(tweet_dates, id_vars='Date').dropna() # Here is the method I use to format the data in the long format. Please note the use of 'Date' as the id_vars.

data = data.rename(columns = {'Date':'Date','variable':'index','value':'score'}) # common to rename these columns



In [10]:

    
print data.info()
data.head()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 698 entries, 0 to 697
Data columns (total 3 columns):
Date     698 non-null datetime64[ns]
index    698 non-null object
score    698 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 21.8+ KB
None






    Out[10]:






  
    
      
      Date
      index
      score
    
  
  
    
      0
      2014-10-17 08:00:00
       lsi_score
       5.480078
    
    
      1
      2014-10-17 09:00:00
       lsi_score
       4.737615
    
    
      2
      2014-10-17 10:00:00
       lsi_score
       5.868986
    
    
      3
      2014-10-17 11:00:00
       lsi_score
       4.136575
    
    
      4
      2014-10-17 12:00:00
       lsi_score
       4.702260



In [10]:



In [10]:



In [10]:



In [10]:



In [10]:



In [10]:



In [11]:



In [12]:

    
rets = df.pct_change()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-12-3c3ca3abd7d4> in <module>()
----> 1 rets = df.pct_change()

NameError: name 'df' is not defined



In [12]:



In [13]:

    
corr = rets.corr()
corr









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-64554e92a6ea> in <module>()
----> 1 corr = rets.corr()
      2 corr

NameError: name 'rets' is not defined



In [14]:

    
plt.imshow(corr, cmap = 'hot', interpolation='none')
plt.colorbar()
plt.xticks(range(len(corr)), corr.columns)
plt.yticks(range(len(corr)), corr.columns)
plt.show()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-14-36cca6a6678b> in <module>()
----> 1 plt.imshow(corr, cmap = 'hot', interpolation='none')
      2 plt.colorbar()
      3 plt.xticks(range(len(corr)), corr.columns)
      4 plt.yticks(range(len(corr)), corr.columns)
      5 plt.show()

NameError: name 'corr' is not defined



In [14]:



In [14]:



In [14]:



In [14]:



In [15]:

    
plt.figure()









    Out[15]:





<matplotlib.figure.Figure at 0x10eab5390>



In [16]:

    
lag_plot(tweet_data.lsi_score)
show()



In [17]:

    
lag_plot(tweet_data.lda_score)
show()



In [18]:

    
autocorrelation_plot(tweet_data.lsi_score)
show()



In [14]:

    
autocorrelation_plot(tweet_data.lda_score)









    Out[14]:





<matplotlib.axes._subplots.AxesSubplot at 0x10f8ba390>



In [19]:

    
from pandas.tools.plotting import bootstrap_plot
bootstrap_plot(tweet_data.lsi_score, size = 50, samples = 300
               , color = 'grey')









    Out[19]:



In [11]:

    
mas = pd.rolling_mean(tweet_data.lsi_score, 20)
mstd = pd.rolling_std(tweet_data.lsi_score, 20)
mad = pd.rolling_mean(tweet_data.lda_score, 20)
madst = pd.rolling_std(tweet_data.lda_score, 20)

#plt.plot(tweet_data.index, tweet_data.lsi_score, 'k')
plt.plot(mas.index, mas)
plt.fill_between(mstd.index, mas-mstd, mas+mstd, color = 'b', alpha = 0.1)
plt.plot(mad.index, mad)
plt.fill_between(madst.index, mad-madst, mad+madst, color = 'g', alpha = 0.1)
plt.xticks(rotation=50)
plt.legend()
show()









    



/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.py:476: UserWarning: No labelled objects found. Use label='...' kwarg on individual plots.
  warnings.warn("No labelled objects found. "



In [ ]:

Preping the stock data



In [12]:

    
stock_data = pd.read_csv('../data/stocks.csv', parse_dates={'Timestamp': ['Date', 'Time']},
                    index_col='Timestamp')

For our purposes, we only want to use the hourly closing time in our analysis. We may return in the future and use other information from our stock data.



In [13]:

    
stock_data = stock_data[['Stockid','Close']]



In [13]:



In [14]:

    
dateConvert(stock_data);



In [14]:

here we reshape the data into a 'wide' format using pandas pivot function.



In [15]:

    
stock_data_pivot = stock_data.pivot(index = 'Date', columns='Stockid', values='Close')
stock_data_pivot.head()









    Out[15]:






  
    
      Stockid
      aapl
      amzn
      fb
      goog
      googl
      msft
      twtr
    
    
      Date
      
      
      
      
      
      
      
    
  
  
    
      2014-08-12 16:00:00
       96.183
       319.93
       72.990
       565.55
       575.65
       43.071
       44.62
    
    
      2014-08-12 17:00:00
       95.934
       319.24
       72.470
       563.02
       572.73
       43.091
       44.12
    
    
      2014-08-12 18:00:00
       95.914
       319.04
       72.539
       562.40
       571.38
       43.031
       43.94
    
    
      2014-08-12 19:00:00
       95.675
       319.10
       72.640
       562.46
       572.00
       43.086
       44.19
    
    
      2014-08-12 20:00:00
       95.751
       318.33
       72.520
       562.20
       571.50
       43.137
       44.10



In [16]:

    
stock_data_pivot.plot()









    Out[16]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcd18083650>



In [17]:

    
test = stock_data_pivot
test = test.join(tweet_data)

This shows a comparison of the LDA and LSI scores with two running means, 10 and 24 hours.



In [18]:

    
mas10 = pd.rolling_mean(tweet_data.lsi_score, 10)
mstd10 = pd.rolling_std(tweet_data.lsi_score, 10)
mad10 = pd.rolling_mean(tweet_data.lda_score, 10)
madst10 = pd.rolling_std(tweet_data.lda_score, 10)
mas24 = pd.rolling_mean(tweet_data.lsi_score, 24)
mstd24 = pd.rolling_std(tweet_data.lsi_score, 24)
mad24 = pd.rolling_mean(tweet_data.lda_score, 24)
madst24 = pd.rolling_std(tweet_data.lda_score, 24)

# plot the mean / std dev for 10h rolling mean
plt.plot(mas10.index, mas10, label='LSI, 10-hour Rolling Mean')
plt.fill_between(mstd10.index, mas10-(mstd10*.5), mas10+(mstd10*.5), color = 'b', alpha = 0.1)
plt.plot(mad10.index, mad10, label='LDA, 10-hour Rolling Mean')
plt.fill_between(madst10.index, mad10-(0.5*madst10), mad10+(madst10* 0.5), color = 'g', alpha = 0.1)

# plot mean/std dev for 24h rolling mean
plt.plot(mas24.index, mas24, label='LSI, 24-hour Rolling Mean')
plt.fill_between(mstd24.index, mas24-(mstd24*.5), mas24+(mstd24*.5), color = 'r', alpha = 0.1)
plt.plot(mad24.index, mad24, label='LDA, 24-hour Rolling Mean')
plt.fill_between(madst24.index, mad24-(0.5*madst24), mad24+(madst24* 0.5), color = 'k', alpha = 0.1)

# plot options
plt.xticks(rotation=50)
plt.legend(loc = 2, prop={'size':'20'})
plt.title('Comparison of LSI and LDA scores for tweet data', size=24)
plt.tick_params(labelsize=14)

For presentation purposes, we'll just use the LSI Scores.



In [19]:

    
# plot the mean / std dev for 10h rolling mean
plt.plot(mas10.index, mas10, label='LSI, 10-hour Rolling Mean', color = 'b')
plt.fill_between(mstd10.index, mas10-(mstd10*.5), mas10+(mstd10*.5), color = 'b', alpha = 0.1)

# plot mean/std dev for 24h rolling mean
plt.plot(mas24.index, mas24, label='LSI, 24-hour Rolling Mean', color = 'r')
plt.fill_between(mstd24.index, mas24-(mstd24*.5), mas24+(mstd24*.5), color = 'r', alpha = 0.1)

# plot options
plt.xticks(rotation=50)
plt.legend(loc = 2, prop={'size':'20'})
plt.title('LSI score per hour, rolling means', size=24)
plt.tick_params(labelsize=14)



In [28]:

    
print test.keys()
testa = test[['aapl', 'amzn', 'fb', 'goog', 'googl', 'msft', 'twtr', 'total_tweets', 'lsi_score', 'lda_score']]









    



Index([u'aapl', u'amzn', u'fb', u'goog', u'googl', u'msft', u'twtr', u'total_tweets', u'lsi_words', u'lda_words', u'lsi_score', u'lda_score', u'Date'], dtype='object')



In [29]:

    
from sklearn.preprocessing import scale
from sklearn import preprocessing
from pandas import DataFrame

zscore = lambda x: (x - x.mean()) / x.std()



In [30]:

    
tweet_data.lsi_score.count()









    Out[30]:





349



In [31]:

    
z_tweets = scale(tweet_data.lsi_score)
z_tweets = DataFrame(z_tweets, index=tweet_data.index)
z_tweets['tph'] = scale(log(tweet_data.total_tweets))
z_tweets.columns = ['lsi_score','tweets_per_hour']
z_tweets.plot()









    Out[31]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fccfaa58890>



In [32]:

    
# interpolation
print stock_data_pivot.amzn.count()
stock_data_pivot.amzn = stock_data_pivot.amzn.interpolate()
print stock_data_pivot.amzn.count()



In [33]:

    
z_stock = scale(stock_data_pivot)
z_stock = DataFrame(z_stock, index=stock_data_pivot.index)



In [46]:

    
z_stock.columns = ['aapl', 'amzn', 'fb', 'goog', 'googl', 'msft', 'twtr']
z_stock = z_stock[['aapl', 'amzn', 'fb', 'goog', 'msft', 'twtr']]
pd.rolling_mean(z_stock, 24).plot()









    Out[46]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcd08855b50>



In [47]:

    
test = z_stock
test = test.join(z_tweets)

print test.lsi_score.count()

test = test.interpolate()
test.interpolate().plot()









    



87






    Out[47]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fcd08704610>



In [ ]:

    
#test.pct_change().plot()



In [52]:

    
pd.rolling_mean(tweet_data.total_tweets, 1).plot()
plt.tick_params(labelsize=14)



In [55]:

    
print tweet_data.mean()
print tweet_data.std()









    



total_tweets    205262.381089
lsi_score            4.779961
lda_score            0.918338
dtype: float64
total_tweets    71739.295896
lsi_score           1.393843
lda_score           0.764332
dtype: float64



In [ ]:



In [56]:

    
test.lsi_score.interpolate(method='polynomial', order=2).pct_change().plot()









    Out[56]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fccf6c26e10>



In [57]:

    
import datetime as dt
start = test.index.searchsorted(dt.datetime(2014,10,10))
end = test.index.searchsorted(dt.datetime(2014,11,18))



In [130]:

    
small_range = test.ix[start:end]



In [131]:

    
small_range.lsi_score.interpolate().plot(label = 'lsi')
small_range.goog.plot(label='GOOG')
plt.legend()
plt.tick_params(labelsize=14)



In [60]:

    
mvavg = pd.rolling_mean(small_range, 60)
#pd.rolling_mean(small_range.goog, 60).plot(label = 'rmean')
mvavg.plot()
plt.legend()
plt.tick_params(labelsize=14)



In [62]:

    
model = statsmodels.tsa.api.VAR(small_range.interpolate(), missing = 'drop')



In [63]:

    
model.select_order(5)









    



                 VAR Order Selection                 
=====================================================
           aic          bic          fpe         hqic
-----------------------------------------------------
0       -9.584       -9.391    6.880e-05       -9.506
1      -24.40*      -22.66*   2.548e-11*      -23.69*
2       -23.68       -20.40    5.289e-11       -22.35
3       -23.11       -18.28    9.761e-11       -21.15
4       -22.76       -16.39    1.496e-10       -20.18
5       -22.39       -14.48    2.484e-10       -19.18
=====================================================
* Minimum







    Out[63]:





{'aic': 1, 'bic': 1, 'fpe': 1, 'hqic': 1}



In [64]:

    
results = model.fit(5,ic='aic')



In [65]:

    
results.summary()









    Out[65]:





  Summary of Regression Results   
==================================
Model:                         VAR
Method:                        OLS
Date:           Tue, 02, Dec, 2014
Time:                     23:08:15
--------------------------------------------------------------------
No. of Equations:         8.00000    BIC:                   -22.7006
Nobs:                     117.000    HQIC:                  -23.7103
Log likelihood:           171.295    FPE:                2.53578e-11
AIC:                     -24.4004    Det(Omega_mle):     1.40163e-11
--------------------------------------------------------------------
Results for equation aapl
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                      0.323345         0.134802            2.399           0.018
L1.aapl                    0.823401         0.067935           12.120           0.000
L1.amzn                    0.045077         0.025224            1.787           0.077
L1.fb                      0.030902         0.018623            1.659           0.100
L1.goog                    0.087395         0.052900            1.652           0.101
L1.msft                    0.058009         0.041760            1.389           0.168
L1.twtr                   -0.068965         0.037687           -1.830           0.070
L1.lsi_score               0.002565         0.010312            0.249           0.804
L1.tweets_per_hour        -0.053553         0.016079           -3.331           0.001
=====================================================================================

Results for equation amzn
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                     -0.308669         0.199649           -1.546           0.125
L1.aapl                    0.040680         0.100615            0.404           0.687
L1.amzn                    0.957021         0.037358           25.618           0.000
L1.fb                      0.015350         0.027582            0.557           0.579
L1.goog                   -0.122231         0.078348           -1.560           0.122
L1.msft                   -0.070083         0.061849           -1.133           0.260
L1.twtr                   -0.090526         0.055817           -1.622           0.108
L1.lsi_score               0.047521         0.015272            3.112           0.002
L1.tweets_per_hour         0.018202         0.023814            0.764           0.446
=====================================================================================

Results for equation fb
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                      0.072779         0.347980            0.209           0.835
L1.aapl                    0.092684         0.175368            0.529           0.598
L1.amzn                   -0.024118         0.065113           -0.370           0.712
L1.fb                      0.852631         0.048075           17.735           0.000
L1.goog                   -0.025372         0.136558           -0.186           0.853
L1.msft                    0.008139         0.107800            0.075           0.940
L1.twtr                    0.220230         0.097287            2.264           0.026
L1.lsi_score               0.009943         0.026619            0.374           0.709
L1.tweets_per_hour        -0.009426         0.041507           -0.227           0.821
=====================================================================================

Results for equation goog
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                     -0.371493         0.126148           -2.945           0.004
L1.aapl                    0.152691         0.063574            2.402           0.018
L1.amzn                    0.009021         0.023605            0.382           0.703
L1.fb                      0.026257         0.017428            1.507           0.135
L1.goog                    0.837791         0.049504           16.924           0.000
L1.msft                   -0.113596         0.039079           -2.907           0.004
L1.twtr                   -0.056938         0.035268           -1.614           0.109
L1.lsi_score               0.028273         0.009650            2.930           0.004
L1.tweets_per_hour         0.010431         0.015047            0.693           0.490
=====================================================================================

Results for equation msft
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                     -0.197756         0.153393           -1.289           0.200
L1.aapl                    0.128398         0.077304            1.661           0.100
L1.amzn                   -0.000040         0.028703           -0.001           0.999
L1.fb                     -0.008357         0.021192           -0.394           0.694
L1.goog                   -0.088633         0.060196           -1.472           0.144
L1.msft                    0.891678         0.047519           18.764           0.000
L1.twtr                   -0.027616         0.042885           -0.644           0.521
L1.lsi_score               0.011813         0.011734            1.007           0.316
L1.tweets_per_hour        -0.017788         0.018297           -0.972           0.333
=====================================================================================

Results for equation twtr
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                     -0.305853         0.206567           -1.481           0.142
L1.aapl                    0.074425         0.104102            0.715           0.476
L1.amzn                    0.050590         0.038652            1.309           0.193
L1.fb                      0.026078         0.028538            0.914           0.363
L1.goog                   -0.142307         0.081063           -1.756           0.082
L1.msft                   -0.107220         0.063992           -1.676           0.097
L1.twtr                    0.852606         0.057751           14.763           0.000
L1.lsi_score               0.009145         0.015802            0.579           0.564
L1.tweets_per_hour        -0.031015         0.024639           -1.259           0.211
=====================================================================================

Results for equation lsi_score
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                     -0.643538         0.848071           -0.759           0.450
L1.aapl                    0.460044         0.427394            1.076           0.284
L1.amzn                   -0.366964         0.158689           -2.312           0.023
L1.fb                     -0.296093         0.117165           -2.527           0.013
L1.goog                   -0.183691         0.332809           -0.552           0.582
L1.msft                    0.066295         0.262723            0.252           0.801
L1.twtr                    0.608378         0.237101            2.566           0.012
L1.lsi_score               0.747035         0.064875           11.515           0.000
L1.tweets_per_hour         0.119502         0.101157            1.181           0.240
=====================================================================================

Results for equation tweets_per_hour
=====================================================================================
                        coefficient       std. error           t-stat            prob
-------------------------------------------------------------------------------------
const                      0.269490         0.776862            0.347           0.729
L1.aapl                    0.061372         0.391508            0.157           0.876
L1.amzn                    0.279044         0.145365            1.920           0.058
L1.fb                      0.075509         0.107327            0.704           0.483
L1.goog                    0.052817         0.304864            0.173           0.863
L1.msft                   -0.060046         0.240663           -0.250           0.803
L1.twtr                   -0.237904         0.217193           -1.095           0.276
L1.lsi_score              -0.111477         0.059427           -1.876           0.063
L1.tweets_per_hour         0.317930         0.092663            3.431           0.001
=====================================================================================

Correlation matrix of residuals
                       aapl      amzn        fb      goog      msft      twtr  lsi_score  tweets_per_hour
aapl               1.000000  0.292585  0.265203  0.451827  0.283226 -0.037667  -0.036037         0.271231
amzn               0.292585  1.000000  0.071693  0.460635 -0.162275 -0.056739  -0.204374         0.131644
fb                 0.265203  0.071693  1.000000  0.213516  0.254954  0.180508  -0.057115         0.092370
goog               0.451827  0.460635  0.213516  1.000000  0.241137  0.035560  -0.111302         0.082300
msft               0.283226 -0.162275  0.254954  0.241137  1.000000  0.041478  -0.156754         0.142419
twtr              -0.037667 -0.056739  0.180508  0.035560  0.041478  1.000000   0.041571        -0.063693
lsi_score         -0.036037 -0.204374 -0.057115 -0.111302 -0.156754  0.041571   1.000000        -0.195503
tweets_per_hour    0.271231  0.131644  0.092370  0.082300  0.142419 -0.063693  -0.195503         1.000000



In [240]:

    
results.plot()



In [239]:

    
results.plot_acorr()

Impulse Response Analysis allows us to examine the impact of 'shock' to a variable in our model



In [249]:

    
irf = results.irf(2)
irf.plot(impulse='lsi_score', orth=False)
plt.suptitle('Impulse Responses', fontsize = 20)









    Out[249]:





<matplotlib.text.Text object at 0x7fccf299bc10>



In [67]:



In [68]:

    
fevd = results.fevd(10)
fevd.plot()



In [250]:

    
for var in results.names:
    results.test_causality(var, ['lsi_score'], signif=0.1, kind='f')









    



Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         0.061863         2.711355            0.804  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause aapl
Conclusion: fail to reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         9.681784         2.711355            0.002  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause amzn
Conclusion: reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         0.139518         2.711355            0.709  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause fb
Conclusion: fail to reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         8.584039         2.711355            0.003  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause goog
Conclusion: reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         1.013533         2.711355            0.314  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause msft
Conclusion: fail to reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         0.334930         2.711355            0.563  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause twtr
Conclusion: fail to reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
       132.596787         2.711355            0.000  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause lsi_score
Conclusion: reject H_0 at 10.00% significance level
Granger causality f-test
=============================================================
   Test statistic   Critical Value          p-value        df
-------------------------------------------------------------
         3.518828         2.711355            0.061  (1, 864)
=============================================================
H_0: ['lsi_score'] do not Granger-cause tweets_per_hour
Conclusion: reject H_0 at 10.00% significance level



In [69]:

    
results.plotsim()



In [ ]:



In [ ]:



In [ ]:



In [70]:

    
plt.imshow(corr, cmap = 'hot', interpolation='none')
plt.colorbar()
plt.xticks(range(len(corr)), corr.columns)
plt.yticks(range(len(corr)), corr.columns)
plt.show()









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-70-36cca6a6678b> in <module>()
----> 1 plt.imshow(corr, cmap = 'hot', interpolation='none')
      2 plt.colorbar()
      3 plt.xticks(range(len(corr)), corr.columns)
      4 plt.yticks(range(len(corr)), corr.columns)
      5 plt.show()

NameError: name 'corr' is not defined



In [179]:

    
smooth_lsi = small_range.interpolate()
smooth_lsi.lsi_score = pd.rolling_mean(smooth_lsi.lsi_score, 24)
smooth_lsi.aapl = pd.rolling_mean(smooth_lsi.aapl, 24)
smooth_lsi.amzn = pd.rolling_mean(smooth_lsi.amzn, 24)
smooth_lsi.fb = pd.rolling_mean(smooth_lsi.fb, 24)
smooth_lsi.goog = pd.rolling_mean(smooth_lsi.goog, 24)
smooth_lsi.msft = pd.rolling_mean(smooth_lsi.msft, 24)
smooth_lsi.twtr = pd.rolling_mean(smooth_lsi.twtr, 24)
#smooth_lsi.lsi_score.plot()









    Out[179]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fccf377ef10>



In [164]:



In [186]:

    
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.plot(smooth_lsi.lsi_score.plot(label = 'LSI, 24h rolling mean', color = '#5e402e'))
fig.add_subplot(plot(smooth_lsi.aapl, label='Apple, 24h rolling mean', color = '#519e8b'))
#pd.rolling_mean(small_range.amzn, 24).plot(label='Amazon, 24h rolling mean', color = '#be5fc1')
#pd.rolling_mean(small_range.fb, 24).plot(label='Facebook, 24h rolling mean', color = '#c8803a')
#axarr[0].legend()
#axarr[1].plot(smooth_lsi.index, smooth_lsi.lsi_score, label = 'LSI, 24h rolling mean', color = '#5e402e')
#axarr[1].plot(smooth_lsi.index, fb, label='Facebook, 24h rolling mean', color = '#6574b3')
#axarr[1].plot(smooth_lsi.index, smooth_lsi.goog, label='Google, 24h rolling mean', color = '#bb5868')
#axarr[1].plot(smooth_lsi.index, smooth_lsi.twtr, label='Twitter, 24h rolling mean', color = '#5da148')
#fig.legend()
#fig.tick_params(labelsize = 14)
plt.show()
#p.plot()
#plt.legend()
#plt.tick_params(labelsize=14)smoo









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-186-f88d5922f5db> in <module>()
      1 fig = plt.figure()
      2 ax1 = fig.add_subplot(211)
----> 3 ax1.plot(smooth_lsi.lsi_score.plot(label = 'LSI, 24h rolling mean', color = '#5e402e'))
      4 fig.add_subplot(plot(smooth_lsi.aapl, label='Apple, 24h rolling mean', color = '#519e8b'))
      5 #pd.rolling_mean(small_range.amzn, 24).plot(label='Amazon, 24h rolling mean', color = '#be5fc1')

/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_axes.pyc in plot(self, *args, **kwargs)
   1373 
   1374         for line in self._get_lines(*args, **kwargs):
-> 1375             self.add_line(line)
   1376             lines.append(line)
   1377 

/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in add_line(self, line)
   1484             line.set_clip_path(self.patch)
   1485 
-> 1486         self._update_line_limits(line)
   1487         if not line.get_label():
   1488             line.set_label('_line%d' % len(self.lines))

/usr/local/lib/python2.7/dist-packages/matplotlib/axes/_base.pyc in _update_line_limits(self, line)
   1495         Figures out the data limit of the given line, updating self.dataLim.
   1496         """
-> 1497         path = line.get_path()
   1498         if path.vertices.size == 0:
   1499             return

/usr/local/lib/python2.7/dist-packages/matplotlib/lines.pyc in get_path(self)
    869         """
    870         if self._invalidy or self._invalidx:
--> 871             self.recache()
    872         return self._path
    873 

/usr/local/lib/python2.7/dist-packages/matplotlib/lines.pyc in recache(self, always)
    579                 y = ma.asarray(yconv, np.float_)
    580             else:
--> 581                 y = np.asarray(yconv, np.float_)
    582             y = y.ravel()
    583         else:

/usr/local/lib/python2.7/dist-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
    460 
    461     """
--> 462     return array(a, dtype, copy=False, order=order)
    463 
    464 def asanyarray(a, dtype=None, order=None):

TypeError: float() argument must be a string or a number



In [181]:

    
pd.rolling_mean(smooth_lsi, 24).plot(label = 'LSI, 24h rolling mean', color = '#5e402e')
pd.rolling_mean(small_range.fb, 24).plot(label='Facebook, 24h rolling mean', color = '#6574b3')
pd.rolling_mean(small_range.goog, 24).plot(label='Google, 24h rolling mean', color = '#bb5868')
pd.rolling_mean(small_range.twtr, 24).plot(label='Twitter, 24h rolling mean', color = '#5da148')
plt.legend()
plt.tick_params(labelsize = 14)



In [238]:

    
#smooth_lsi = small_range.lsi_score.interpolate()
#smooth_lsi = pd.rolling_mean(smooth_lsi, 24)
figure(1)
subplot(211)
pd.rolling_mean(small_range.lsi_score, 12).plot(label = 'LSI, 12h rolling mean', color = '#5e402e', linewidth = 2.5)
pd.rolling_mean(small_range.aapl, 24).plot(label='Apple, 24h rolling mean', color = '#519e8b', linewidth = 2)
pd.rolling_mean(small_range.amzn, 24).plot(label='Amazon, 24h rolling mean', color = '#be5fc1', linewidth = 2)
pd.rolling_mean(small_range.fb, 24).plot(label='Facebook, 24h rolling mean', color = '#c8803a', linewidth = 2)
plt.xlabel('')
plt.ylabel('z-scored value')
#fig = plt.figure()
#ax = fig.add_subplot(111)
#ax.grid(True)
#ax.set_xticklabels([])
plt.tick_params(labelsize = 14)
plt.legend()
subplot(212)
pd.rolling_mean(small_range.lsi_score, 12).plot(label = 'LSI, 12h rolling mean', color = '#5e402e', linewidth = 2.5)
#pd.rolling_mean(small_range.lsi_score, 2).plot(label = 'LSI, 24h rolling mean', color = '#5e402e')
pd.rolling_mean(small_range.goog, 24).plot(label='Google, 24h rolling mean', color = '#bb5868', linewidth = 2)
pd.rolling_mean(small_range.msft, 24).plot(label='Microsoft, 24h rolling mean', color = '#6574b3', linewidth = 2)
pd.rolling_mean(small_range.twtr, 24).plot(label='Twitter, 24h rolling mean', color = '#5da148', linewidth = 2)
plt.legend()
plt.ylabel('z-scored value')
plt.tick_params(labelsize = 14)
plt.show()



In [237]:

    
#smooth_lsi = small_range.lsi_score.interpolate()
#smooth_lsi = pd.rolling_mean(smooth_lsi, 24)
figure(1)
subplot(211)
pd.rolling_mean(small_range.lsi_score, 1).plot(label = 'LSI, 12h rolling mean', color = '#5e402e', linewidth = 2.5)
pd.rolling_mean(small_range.aapl, 4).plot(label='Apple, 24h rolling mean', color = '#519e8b', linewidth = 2)
pd.rolling_mean(small_range.amzn, 4).plot(label='Amazon, 24h rolling mean', color = '#be5fc1', linewidth = 2)
pd.rolling_mean(small_range.fb, 4).plot(label='Facebook, 24h rolling mean', color = '#c8803a', linewidth = 2)
plt.xlabel('')
plt.ylabel('z-scored value')
plt.title('Detail of October 28th - October 30th ', fontsize = 22)
#fig = plt.figure()
#ax = fig.add_subplot(111)
#ax.grid(True)
#ax.set_xticklabels([])
plt.tick_params(labelsize = 14)
plt.legend()
subplot(212)
pd.rolling_mean(small_range.lsi_score, 1).plot(label = 'LSI, 12h rolling mean', color = '#5e402e', linewidth = 2.5)
#pd.rolling_mean(small_range.lsi_score, 2).plot(label = 'LSI, 24h rolling mean', color = '#5e402e')
pd.rolling_mean(small_range.goog, 4).plot(label='Google, 24h rolling mean', color = '#bb5868', linewidth = 2)
pd.rolling_mean(small_range.msft, 4).plot(label='Microsoft, 24h rolling mean', color = '#6574b3', linewidth = 2)
pd.rolling_mean(small_range.twtr, 4).plot(label='Twitter, 24h rolling mean', color = '#5da148', linewidth = 2)
plt.legend()
plt.ylabel('z-scored value')
plt.tick_params(labelsize = 14)
plt.show()



In [88]:

    
pd.rolling_mean(smooth_lsi, 24).plot(label = 'LSI, 24h rolling mean')
pd.rolling_mean(small_range.msft, 24).plot(label='GOOG, 24h rolling mean')
plt.legend()
plt.tick_params(labelsize=14)



In [89]:

    
pd.rolling_mean(smooth_lsi, 24).plot(label = 'LSI, 24h rolling mean')
pd.rolling_mean(small_range.twtr, 24).plot(label='Twitter, 24h rolling mean')
plt.legend()
plt.tick_params(labelsize=14)



In [ ]:

	Date	index	score
0	2014-10-17 08:00:00	lsi_score	5.480078
1	2014-10-17 09:00:00	lsi_score	4.737615
2	2014-10-17 10:00:00	lsi_score	5.868986
3	2014-10-17 11:00:00	lsi_score	4.136575
4	2014-10-17 12:00:00	lsi_score	4.702260

Stockid	aapl	amzn	fb	goog	googl	msft	twtr
Date
2014-08-12 16:00:00	96.183	319.93	72.990	565.55	575.65	43.071	44.62
2014-08-12 17:00:00	95.934	319.24	72.470	563.02	572.73	43.091	44.12
2014-08-12 18:00:00	95.914	319.04	72.539	562.40	571.38	43.031	43.94
2014-08-12 19:00:00	95.675	319.10	72.640	562.46	572.00	43.086	44.19
2014-08-12 20:00:00	95.751	318.33	72.520	562.20	571.50	43.137	44.10