In [1]:
import pandas as pd
import datetime
import numpy as np
import scipy as sp
import os
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
font = {'size' : 18}
matplotlib.rc('font', **font)
matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)
#os.chdir("/root/Envs/btc-analysis/btc-price-analysis")
time_format = "%Y-%m-%dT%H:%M:%SZ"
In [2]:
score_data = pd.read_csv("../data/indico_nyt_bitcoin.csv", index_col='time',
parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x, time_format))
score_data.head()
Out[2]:
In [3]:
weekly_score = score_data.resample('w', how='mean').loc['2013':].fillna(0.5)
weekly_score.head()
Out[3]:
In [4]:
weekly_score.plot()
Out[4]:
read bitcoin price data
In [5]:
time_format = "%Y-%m-%dT%H:%M:%S"
data = pd.read_csv("../data/price.csv", names=['time', 'price'], index_col='time',
parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x[:-6], time_format))
bpi = data.resample('w', how='ohlc')
bpi.index.name = 'time'
bpi = pd.DataFrame(bpi['price']['close']).loc['2013':]
bpi.head()
Out[5]:
In [6]:
trend_bpi = pd.merge(weekly_score, bpi, how='right', left_index=True, right_index=True)
trend_bpi.columns = ['sentiment', 'close_price']
trend_bpi.head()
Out[6]:
add news volume data
In [7]:
trend_bpi.plot(secondary_y='close_price')
Out[7]:
In [8]:
trend_bpi.corr()
Out[8]:
In [9]:
time_format = "%Y-%m-%dT%H:%M:%SZ"
alchemy_data = pd.read_csv("../data/alchemy_nyt_bitcoin.csv"
, index_col='time',
parse_dates=[0], date_parser=lambda x: datetime.datetime.strptime(x, time_format))
alchemy_data.head()
Out[9]:
In [10]:
alchemy_data.alchemy_score.plot(kind='hist')
Out[10]:
In [11]:
alchemy_data.describe()
Out[11]:
In [12]:
weekly_alchemy = alchemy_data.resample('w', how='mean').loc['2013':].fillna(0.0)
weekly_alchemy.head()
Out[12]:
In [13]:
weekly_alchemy.plot(kind='hist')
Out[13]:
In [14]:
weekly_alchemy.describe()
Out[14]:
In [15]:
alchemy_bpi = pd.merge(weekly_alchemy, bpi, how='right', left_index=True, right_index=True)
alchemy_bpi.columns = ['sentiment', 'close_price']
alchemy_bpi.head()
Out[15]:
In [16]:
alchemy_bpi.plot(secondary_y='close_price')
Out[16]:
In [17]:
merged_data = pd.merge(alchemy_bpi, weekly_score, how='right', left_index=True, right_index=True)
merged_data.head()
Out[17]:
In [18]:
merged_data.plot(secondary_y='close_price')
Out[18]:
In [19]:
merged_data.corr()
Out[19]:
In [20]:
daily_alchemy = alchemy_data.resample('d', how='mean').loc['2013':].fillna(0.0)
daily_alchemy.head()
Out[20]:
In [21]:
daily_price = data.resample('d', how='ohlc')
daily_price.index.name = 'time'
daily_price = pd.DataFrame(daily_price['price']['close']).loc['2013':]
daily_price.head()
Out[21]:
In [22]:
daily_data = pd.merge(daily_price, daily_alchemy, how='right', left_index=True, right_index=True)
daily_data.head()
Out[22]:
In [23]:
daily_data.plot(secondary_y='close')
Out[23]:
In [24]:
alchemy_bpi['avg_sentiment'] = pd.rolling_mean(alchemy_bpi.sentiment, 1)
alchemy_bpi.head()
Out[24]:
In [25]:
alchemy_bpi['avg_shifted'] = alchemy_bpi['avg_sentiment'].shift(1)
alchemy_bpi.head()
Out[25]:
In [26]:
alchemy_bpi['order']= 'NA'
alchemy_bpi['diff'] = alchemy_bpi.sentiment - alchemy_bpi.avg_shifted
alchemy_bpi.head()
Out[26]:
In [27]:
## SII_diff >= diff => search interest rises this week => price rises next week
alchemy_bpi.loc[alchemy_bpi['diff'] > 0,'order'] = False
## SII_diff < diff => search interest falls this week => price falls next week
alchemy_bpi.loc[alchemy_bpi['diff'] < 0,'order'] = True
alchemy_bpi.head()
Out[27]:
In [28]:
alchemy_bpi['trend'] = alchemy_bpi.close_price > alchemy_bpi.close_price.shift(1)
alchemy_bpi.head()
Out[28]:
In [29]:
total_predict = alchemy_bpi[alchemy_bpi.order!='NA'].order.count()
total_correct = alchemy_bpi[alchemy_bpi.order==alchemy_bpi.trend].order.count()
print "TP+TN: %f (%d/%d)" % (total_correct/float(total_predict), total_correct, total_predict)
In [30]:
alchemy_bpi.corr()
Out[30]:
In [109]:
daily_data = pd.merge(daily_price, daily_alchemy, how='right', left_index=True, right_index=True)
daily_data['avg_sentiment'] = pd.rolling_mean(daily_data.alchemy_score, 1)
daily_data.head()
Out[109]:
In [110]:
daily_data['avg_shifted'] = daily_data['avg_sentiment'].shift(3)
daily_data.head()
Out[110]:
In [111]:
daily_data['order']= 'NA'
daily_data['diff'] = daily_data.alchemy_score - daily_data.avg_shifted
daily_data.head()
Out[111]:
In [112]:
## SII_diff >= diff => search interest rises this week => price rises next week
daily_data.loc[daily_data['diff'] > 0,'order'] = True
## SII_diff < diff => search interest falls this week => price falls next week
daily_data.loc[daily_data['diff'] < 0,'order'] = False
daily_data.head()
Out[112]:
In [113]:
daily_data['trend'] = daily_data.close > daily_data.close.shift(1)
daily_data.head()
Out[113]:
In [114]:
total_predict = daily_data[daily_data.order!='NA'].order.count()
total_correct = daily_data[daily_data.order==daily_data.trend].order.count()
print "TP+TN: %f (%d/%d)" % (total_correct/float(total_predict), total_correct, total_predict)