In [1]:
    
import pandas as pd
import datetime
import numpy as np
import scipy as sp
import os
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
font = {'size'   : 18}
matplotlib.rc('font', **font)
matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)
#os.chdir("/root/Envs/btc-analysis/btc-price-analysis")
time_format = "%Y-%m-%dT%H:%M:%SZ"
    
In [2]:
    
score_data = pd.read_csv("../data/indico_nyt_bitcoin.csv", index_col='time',
                   parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x, time_format))
score_data.head()
    
    Out[2]:
In [3]:
    
weekly_score = score_data.resample('w', how='mean').loc['2013':].fillna(0.5)
weekly_score.head()
    
    Out[3]:
In [4]:
    
weekly_score.plot()
    
    Out[4]:
    
read bitcoin price data
In [5]:
    
time_format = "%Y-%m-%dT%H:%M:%S"
data = pd.read_csv("../data/price.csv", names=['time', 'price'], index_col='time',
                   parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x[:-6], time_format))
bpi = data.resample('w', how='ohlc')
bpi.index.name = 'time'
bpi = pd.DataFrame(bpi['price']['close']).loc['2013':]
bpi.head()
    
    Out[5]:
In [6]:
    
trend_bpi = pd.merge(weekly_score, bpi, how='right', left_index=True, right_index=True)
trend_bpi.columns = ['sentiment', 'close_price']
trend_bpi.head()
    
    Out[6]:
add news volume data
In [7]:
    
trend_bpi.plot(secondary_y='close_price')
    
    Out[7]:
    
In [8]:
    
trend_bpi.corr()
    
    Out[8]:
In [9]:
    
time_format = "%Y-%m-%dT%H:%M:%SZ"
alchemy_data = pd.read_csv("../data/alchemy_nyt_bitcoin.csv"
                           , index_col='time',
                   parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x, time_format))
alchemy_data.head()
    
    Out[9]:
In [10]:
    
alchemy_data.alchemy_score.plot(kind='hist')
    
    Out[10]:
    
In [11]:
    
alchemy_data.describe()
    
    Out[11]:
In [12]:
    
weekly_alchemy = alchemy_data.resample('w', how='mean').loc['2013':].fillna(0.0)
weekly_alchemy.head()
    
    Out[12]:
In [13]:
    
weekly_alchemy.plot(kind='hist')
    
    Out[13]:
    
In [14]:
    
weekly_alchemy.describe()
    
    Out[14]:
In [15]:
    
alchemy_bpi = pd.merge(weekly_alchemy, bpi, how='right', left_index=True, right_index=True)
alchemy_bpi.columns = ['sentiment', 'close_price']
alchemy_bpi.head()
    
    Out[15]:
In [16]:
    
alchemy_bpi.plot(secondary_y='close_price')
    
    Out[16]:
    
In [17]:
    
merged_data = pd.merge(alchemy_bpi, weekly_score, how='right', left_index=True, right_index=True)
merged_data.head()
    
    Out[17]:
In [18]:
    
merged_data.plot(secondary_y='close_price')
    
    Out[18]:
    
In [19]:
    
merged_data.corr()
    
    Out[19]:
In [20]:
    
daily_alchemy = alchemy_data.resample('d', how='mean').loc['2013':].fillna(0.0)
daily_alchemy.head()
    
    Out[20]:
In [21]:
    
daily_price = data.resample('d', how='ohlc')
daily_price.index.name = 'time'
daily_price = pd.DataFrame(daily_price['price']['close']).loc['2013':]
daily_price.head()
    
    Out[21]:
In [22]:
    
daily_data = pd.merge(daily_price, daily_alchemy, how='right', left_index=True, right_index=True)
daily_data.head()
    
    Out[22]:
In [23]:
    
daily_data.plot(secondary_y='close')
    
    Out[23]:
    
In [24]:
    
alchemy_bpi['avg_sentiment'] = pd.rolling_mean(alchemy_bpi.sentiment, 1)
alchemy_bpi.head()
    
    Out[24]:
In [25]:
    
alchemy_bpi['avg_shifted'] = alchemy_bpi['avg_sentiment'].shift(1)
alchemy_bpi.head()
    
    Out[25]:
In [26]:
    
alchemy_bpi['order']= 'NA'
alchemy_bpi['diff'] = alchemy_bpi.sentiment - alchemy_bpi.avg_shifted
alchemy_bpi.head()
    
    Out[26]:
In [27]:
    
## SII_diff >= diff => search interest rises this week => price rises next week
alchemy_bpi.loc[alchemy_bpi['diff'] > 0,'order'] = False
## SII_diff < diff => search interest falls this week => price falls next week
alchemy_bpi.loc[alchemy_bpi['diff'] < 0,'order'] = True
alchemy_bpi.head()
    
    Out[27]:
In [28]:
    
alchemy_bpi['trend'] = alchemy_bpi.close_price > alchemy_bpi.close_price.shift(1)
alchemy_bpi.head()
    
    Out[28]:
In [29]:
    
total_predict = alchemy_bpi[alchemy_bpi.order!='NA'].order.count()
total_correct = alchemy_bpi[alchemy_bpi.order==alchemy_bpi.trend].order.count()
print "TP+TN: %f (%d/%d)" % (total_correct/float(total_predict), total_correct, total_predict)
    
    
In [30]:
    
alchemy_bpi.corr()
    
    Out[30]:
In [109]:
    
daily_data = pd.merge(daily_price, daily_alchemy, how='right', left_index=True, right_index=True)
daily_data['avg_sentiment'] = pd.rolling_mean(daily_data.alchemy_score, 1)
daily_data.head()
    
    Out[109]:
In [110]:
    
daily_data['avg_shifted'] = daily_data['avg_sentiment'].shift(3)
daily_data.head()
    
    Out[110]:
In [111]:
    
daily_data['order']= 'NA'
daily_data['diff'] = daily_data.alchemy_score - daily_data.avg_shifted
daily_data.head()
    
    Out[111]:
In [112]:
    
## SII_diff >= diff => search interest rises this week => price rises next week
daily_data.loc[daily_data['diff'] > 0,'order'] = True
## SII_diff < diff => search interest falls this week => price falls next week
daily_data.loc[daily_data['diff'] < 0,'order'] = False
daily_data.head()
    
    Out[112]:
In [113]:
    
daily_data['trend'] = daily_data.close > daily_data.close.shift(1)
daily_data.head()
    
    Out[113]:
In [114]:
    
total_predict = daily_data[daily_data.order!='NA'].order.count()
total_correct = daily_data[daily_data.order==daily_data.trend].order.count()
print "TP+TN: %f (%d/%d)" % (total_correct/float(total_predict), total_correct, total_predict)