notebook.community

Edit and run



In [3]:

    
import pandas.io.data as web
import pandas
from datetime import datetime
import random
import math
import urllib
from lxml import html
import csv
import json



In [4]:

    
url = "https://poloniex.com/public?command=returnChartData&currencyPair=BTC_ETH&start=1405699200&end=9999999999&period=86400"
eth_prices = urllib.urlopen(url).read()

outfilename = 'eth_prices.json'
outfile = open(outfilename,'w')
outfile.write(eth_prices)
outfile.close()

df = pandas.read_json(eth_prices)

outfilename = 'eth_prices.csv'
outfile = open(outfilename,'w')
outfile.write(df.to_csv(index =False, header=False, columns=['date','open','volume']))
outfile.close()



In [8]:

    
#reformatting org file
dates = []
prices = []
volumes = []
with open(outfilename, 'rb') as f:
    reader = csv.reader(f)
    for row in reader:
        if float(row[1]) > 0:
            #date_object = datetime.strptime(row['date'], '%d/%m/%Y %H:%M:%S')
            #dates.append(date_object.strftime("%Y-%m-%d"))
            dates.append(row[0])
            prices.append(float(row[1]))
            volumes.append(float(row[2]))



In [9]:

    
import random

def selectSampleIndex(listSize, sampleSize, maxSeqLen):
    if listSize <= 0 or sampleSize <= 0 or listSize < sampleSize or maxSeqLen >= listSize:
        return []
    
    resSample = set()
    while len(resSample) < sampleSize:
        pick = random.randint(0, listSize-maxSeqLen-1)
        resSample.add(pick)
    res = list(resSample)
    res.sort()
    return res

def selectPredictionIndex(listSize, marginDays):
    if listSize <= 0 or marginDays >= listSize:
        return []
    
    resPred = range(listSize-marginDays, listSize)
    return resPred
    
def getDiffValue(lastDayValue, targetDayValue):
    return float(targetDayValue - lastDayValue)

def getDiff(lastDayValue, targetDayValue):
    return getDiffValue(lastDayValue, targetDayValue)/float(targetDayValue)



In [10]:

    
maxSeqLen = 20
marginDays = 5
minGain = 0.05
numSamples = 200



In [11]:

    
resIdx = selectSampleIndex(len(prices)-5, numSamples, maxSeqLen)
resIdx[0:10], resIdx[-10:] #see first and last 10 index









    Out[11]:





([0, 1, 4, 5, 6, 7, 8, 9, 10, 11],
 [255, 256, 257, 258, 259, 260, 261, 262, 264, 265])



In [12]:

    
len(dates)









    Out[12]:





292



In [13]:

    
#build train file

outfilename = 'eth_train_data.csv'
outfile = open(outfilename,'w')

#print header
header = 'date,' + 'day'+',day'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',vol'+',vol'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',dayTarget'+',diffVal'+',diff'+',isBullish'+'\n'
outfile.write(header)

for idx in resIdx:
    isBullish = 0    
    diff = getDiff(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])
    if diff > minGain:
        isBullish = 1
    elif diff < (-1*minGain):
        isBullish = -1
    else:
        isBullish = 0
    #print idx, tsla_prices[idx:idx+10], tsla_prices[idx+15], good, diff
    #print tsla_prices[idx:idx+maxSeqLen-5], tsla_prices[idx+maxSeqLen], isBullish
    result = prices[idx:idx+maxSeqLen-marginDays]  #prices of sequence of days
    result.insert(0, dates[idx+(maxSeqLen-marginDays)-1]) #date of last day
    result = result + volumes[idx:idx+maxSeqLen-marginDays] #volumes of sequence of days
    result.append(prices[idx+maxSeqLen]) #price of the day target
    result.append(getDiffValue(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])) #diff value
    result.append(diff) #diff 
    result.append(isBullish) #isbullish
    trainset = ",".join([`x` for x in result])
    outfile.write(trainset)
    outfile.write('\n')

outfile.close()



In [11]:

    
#build prediction file

outfilename = 'eth_pred_data.csv'
outfile = open(outfilename,'w')

#print header
header = 'date,' + 'day'+',day'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',vol'+',vol'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+'\n'
outfile.write(header)

idxPred = selectPredictionIndex(len(prices), marginDays)

maxSeqLen-marginDays
for idx in idxPred:
    result = prices[idx-(maxSeqLen-marginDays)+1:idx+1]  #prices of sequence of days
    result.insert(0, dates[idx]) #date of last day
    result = result + volumes[idx-(maxSeqLen-marginDays)+1:idx+1] #volumes of sequence of days
    predset = ",".join([`x` for x in result])
    outfile.write(predset)
    outfile.write('\n')

outfile.close()



In [15]:

    
idxPred









    Out[15]:





[284, 285, 286, 287, 288]



In [16]:

    
prices[idxPred[4]]









    Out[16]:





0.03168962



In [17]:

    
dates[idxPred[4]]









    Out[17]:





'2016-05-22'



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: