In [3]:
import pandas.io.data as web
import pandas
from datetime import datetime
import random
import math
import urllib
from lxml import html
import csv
import json
In [4]:
url = "https://poloniex.com/public?command=returnChartData¤cyPair=BTC_ETH&start=1405699200&end=9999999999&period=86400"
eth_prices = urllib.urlopen(url).read()
outfilename = 'eth_prices.json'
outfile = open(outfilename,'w')
outfile.write(eth_prices)
outfile.close()
df = pandas.read_json(eth_prices)
outfilename = 'eth_prices.csv'
outfile = open(outfilename,'w')
outfile.write(df.to_csv(index =False, header=False, columns=['date','open','volume']))
outfile.close()
In [8]:
#reformatting org file
dates = []
prices = []
volumes = []
with open(outfilename, 'rb') as f:
reader = csv.reader(f)
for row in reader:
if float(row[1]) > 0:
#date_object = datetime.strptime(row['date'], '%d/%m/%Y %H:%M:%S')
#dates.append(date_object.strftime("%Y-%m-%d"))
dates.append(row[0])
prices.append(float(row[1]))
volumes.append(float(row[2]))
In [9]:
import random
def selectSampleIndex(listSize, sampleSize, maxSeqLen):
if listSize <= 0 or sampleSize <= 0 or listSize < sampleSize or maxSeqLen >= listSize:
return []
resSample = set()
while len(resSample) < sampleSize:
pick = random.randint(0, listSize-maxSeqLen-1)
resSample.add(pick)
res = list(resSample)
res.sort()
return res
def selectPredictionIndex(listSize, marginDays):
if listSize <= 0 or marginDays >= listSize:
return []
resPred = range(listSize-marginDays, listSize)
return resPred
def getDiffValue(lastDayValue, targetDayValue):
return float(targetDayValue - lastDayValue)
def getDiff(lastDayValue, targetDayValue):
return getDiffValue(lastDayValue, targetDayValue)/float(targetDayValue)
In [10]:
maxSeqLen = 20
marginDays = 5
minGain = 0.05
numSamples = 200
In [11]:
resIdx = selectSampleIndex(len(prices)-5, numSamples, maxSeqLen)
resIdx[0:10], resIdx[-10:] #see first and last 10 index
Out[11]:
In [12]:
len(dates)
Out[12]:
In [13]:
#build train file
outfilename = 'eth_train_data.csv'
outfile = open(outfilename,'w')
#print header
header = 'date,' + 'day'+',day'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',vol'+',vol'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',dayTarget'+',diffVal'+',diff'+',isBullish'+'\n'
outfile.write(header)
for idx in resIdx:
isBullish = 0
diff = getDiff(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])
if diff > minGain:
isBullish = 1
elif diff < (-1*minGain):
isBullish = -1
else:
isBullish = 0
#print idx, tsla_prices[idx:idx+10], tsla_prices[idx+15], good, diff
#print tsla_prices[idx:idx+maxSeqLen-5], tsla_prices[idx+maxSeqLen], isBullish
result = prices[idx:idx+maxSeqLen-marginDays] #prices of sequence of days
result.insert(0, dates[idx+(maxSeqLen-marginDays)-1]) #date of last day
result = result + volumes[idx:idx+maxSeqLen-marginDays] #volumes of sequence of days
result.append(prices[idx+maxSeqLen]) #price of the day target
result.append(getDiffValue(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])) #diff value
result.append(diff) #diff
result.append(isBullish) #isbullish
trainset = ",".join([`x` for x in result])
outfile.write(trainset)
outfile.write('\n')
outfile.close()
In [11]:
#build prediction file
outfilename = 'eth_pred_data.csv'
outfile = open(outfilename,'w')
#print header
header = 'date,' + 'day'+',day'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',vol'+',vol'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+'\n'
outfile.write(header)
idxPred = selectPredictionIndex(len(prices), marginDays)
maxSeqLen-marginDays
for idx in idxPred:
result = prices[idx-(maxSeqLen-marginDays)+1:idx+1] #prices of sequence of days
result.insert(0, dates[idx]) #date of last day
result = result + volumes[idx-(maxSeqLen-marginDays)+1:idx+1] #volumes of sequence of days
predset = ",".join([`x` for x in result])
outfile.write(predset)
outfile.write('\n')
outfile.close()
In [15]:
idxPred
Out[15]:
In [16]:
prices[idxPred[4]]
Out[16]:
In [17]:
dates[idxPred[4]]
Out[17]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: