In [26]:
import pandas.io.data as web
from datetime import datetime
import random
import math
import urllib
from lxml import html
import csv

In [27]:
url = "https://blockchain.info/charts/market-price/?showDataPoints=false&timespan=all&show_header=true&daysAverageString=1&scale=0&format=csv&address="
bitcoin_prices = urllib.urlopen(url).read()

outfilename = 'btc_prices.csv'
outfile = open(outfilename,'w')
outfile.write(bitcoin_prices)
outfile.close()

In [28]:
#testing 
with open(outfilename, 'rb') as f:
    reader = csv.reader(f)
    for row in reader:
        print row[0]
        print date_object.date()
        break


03/01/2009 18:15:05
2016-05-21

In [30]:
#reformatting org file
dates = []
prices = []
with open(outfilename, 'rb') as f:
    reader = csv.reader(f)
    for row in reader:
        if float(row[1]) > 0:
            date_object = datetime.strptime(row[0], '%d/%m/%Y %H:%M:%S')
            dates.append(date_object.strftime("%Y-%m-%d"))
            prices.append(float(row[1]))
dates
prices


Out[30]:
[0.0769,
 0.074,
 0.0688,
 0.0667,
 0.066899,
 0.0664,
 0.066,
 0.066889,
 0.0665,
 0.066499,
 0.065,
 0.065,
 0.0648,
 0.069,
 0.06497,
 0.0649,
 0.0629,
 0.0634,
 0.0613,
 0.0629,
 0.064,
 0.06185,
 0.06201,
 0.0624,
 0.06201,
 0.062,
 0.064999,
 0.06201,
 0.0641,
 0.175,
 0.0619,
 0.0609,
 0.0609,
 0.062599,
 0.0634,
 0.0633,
 0.0628,
 0.063,
 0.06281,
 0.0624,
 0.062279,
 0.062206,
 0.06271,
 0.06219,
 0.06192,
 0.061999,
 0.061999,
 0.0614,
 0.062,
 0.06301,
 0.0633,
 0.0638,
 0.088,
 0.12001,
 0.12,
 0.1301,
 0.099,
 0.095,
 0.105,
 0.119,
 0.109,
 0.1045,
 0.103,
 0.1024,
 0.103,
 0.109,
 0.107,
 0.10901,
 0.19,
 0.15,
 0.19,
 0.19,
 0.19001,
 0.191,
 0.1919,
 0.199,
 0.1975,
 0.1955,
 0.275,
 0.23601,
 0.2399,
 0.29,
 0.499999,
 0.37,
 0.3368,
 0.2667,
 0.24,
 0.251,
 0.3,
 0.299,
 0.2828,
 0.2827,
 0.28,
 0.289999,
 0.283,
 0.289,
 0.29,
 0.282,
 0.2879,
 0.28299,
 0.28299,
 0.289,
 0.284399,
 0.28461,
 0.279,
 0.275,
 0.225,
 0.25,
 0.2553,
 0.2589,
 0.235,
 0.225,
 0.226,
 0.2477,
 0.2388,
 0.204,
 0.2275,
 0.228,
 0.22748,
 0.23,
 0.2468,
 0.2459,
 0.255,
 0.249,
 0.2499,
 0.275,
 0.26909,
 0.267,
 0.25,
 0.25,
 0.25,
 0.269999,
 0.265,
 0.28,
 0.301,
 0.299999,
 0.299999,
 0.299998,
 0.299996,
 0.299998,
 0.299899,
 0.298998,
 0.299,
 0.322,
 0.322898,
 0.322998,
 0.329,
 0.329,
 0.35,
 0.405,
 0.45,
 0.4,
 0.4,
 0.4,
 0.38679,
 0.3401,
 0.3675,
 0.44,
 0.4299,
 0.4443,
 0.443,
 0.425,
 0.425,
 0.4174,
 0.45,
 0.446,
 0.48,
 0.5,
 0.95,
 0.840099,
 0.75,
 0.88,
 0.9167,
 0.92,
 0.90585,
 0.9,
 0.935,
 1.1,
 1.0065,
 1.0899,
 1.08,
 1.08,
 1.085,
 1.05019,
 1.05019,
 1.05019,
 1.02,
 0.954896,
 0.8796,
 0.869499,
 0.95,
 0.9499,
 1.0,
 0.988567,
 0.96,
 0.949231,
 0.97,
 0.9498,
 0.94,
 0.9392,
 0.918901,
 0.910445,
 0.909,
 0.9072,
 0.883751,
 0.87,
 0.9329,
 0.9197,
 0.9196,
 0.9,
 0.899885,
 0.89,
 0.88,
 0.850617,
 0.824,
 0.79,
 0.7883,
 0.799646,
 0.85,
 0.9,
 0.89,
 0.905,
 0.8958,
 0.8575,
 0.8,
 0.795,
 0.80098,
 0.79997,
 0.79697,
 0.7998,
 0.79,
 0.71,
 0.747844,
 0.7677,
 0.799,
 0.7676,
 0.758899,
 0.781,
 0.897,
 1.0,
 0.94,
 1.08999,
 1.095,
 1.085,
 1.1999,
 1.19,
 1.1979,
 1.1979,
 1.332,
 1.549,
 1.95,
 1.701,
 1.73,
 1.95,
 2.65,
 2.7,
 4.14996,
 4.09,
 3.495,
 3.49,
 3.58,
 3.5,
 3.5998,
 3.7,
 3.9,
 3.937,
 5.2,
 6.065,
 5.94998,
 8.45,
 8.9,
 8.55,
 8.5,
 8.38901,
 7.874,
 7.34,
 7.1,
 6.6036,
 6.61,
 7.45,
 7.2,
 7.51,
 9.33,
 8.92,
 8.735,
 8.4992,
 9.0,
 9.4998,
 9.391,
 10.57,
 14.3,
 17.41,
 18.998,
 19.23,
 19.06,
 31.9099,
 31.5,
 35.0,
 30.0,
 24.99,
 24.5,
 20.99,
 20.0,
 19.96,
 19.3898,
 17.2,
 18.8766,
 17.35,
 15.5,
 15.05,
 16.0,
 16.7501,
 17.6,
 17.51001,
 18.0,
 17.52,
 17.3,
 17.5,
 17.0,
 16.49,
 15.7,
 15.85,
 15.0,
 16.5,
 16.25,
 15.64276,
 15.0,
 15.68,
 15.1999,
 14.63988,
 14.15,
 14.1,
 14.3,
 14.1,
 13.96,
 13.6901,
 14.7,
 14.04,
 13.9389,
 13.9389,
 13.92,
 13.979999,
 14.7399,
 14.4,
 14.13,
 14.0,
 13.8,
 13.8,
 14.8999,
 13.5501,
 13.45,
 13.1,
 11.4876543,
 11.55,
 11.0,
 9.8,
 8.89,
 12.1,
 10.7,
 10.4959,
 9.89520343,
 10.05,
 11.24,
 11.89,
 11.38,
 11.3,
 11.67,
 11.81,
 11.798,
 11.59,
 11.501,
 11.49,
 11.4,
 10.96554,
 10.3,
 9.11,
 9.48,
 9.4811,
 9.15031,
 9.0,
 8.449339999700001,
 8.4989,
 8.7138,
 8.5939999988,
 8.467,
 7.65713,
 7.59916,
 7.31509,
 6.94,
 6.208920000199999,
 7.4,
 7.081,
 6.25,
 5.990000000899999,
 5.69,
 5.23,
 5.022,
 5.0,
 5.6,
 6.795,
 6.28664,
 5.83001,
 5.70653,
 5.66,
 5.5,
 5.46014,
 5.13018,
 4.989,
 4.809,
 5.35,
 5.3,
 5.16,
 5.07546,
 5.03,
 5.025,
 4.93,
 4.85,
 4.594,
 4.389,
 4.25937,
 4.14001,
 4.20612,
 4.45,
 4.11455,
 4.05,
 3.9009000002,
 3.75996,
 2.9,
 2.64999,
 2.42,
 2.72002,
 3.3,
 3.36,
 3.2,
 3.04,
 2.876,
 3.0499,
 3.255,
 3.82717,
 3.65026,
 3.388,
 3.35,
 3.44,
 3.299,
 3.21874,
 3.21,
 3.05,
 3.0289,
 3.21,
 3.114,
 3.0,
 3.0,
 3.11,
 3.099,
 3.03,
 2.69,
 2.6,
 2.60031,
 2.38998,
 2.3,
 2.499,
 2.29,
 2.35,
 2.38,
 2.46,
 2.56,
 2.53125,
 2.4965,
 2.53899,
 2.981,
 2.959,
 3.14,
 3.138,
 3.12999,
 2.99,
 2.93,
 3.05,
 3.082,
 3.1,
 3.039,
 3.04,
 3.38,
 3.34237,
 3.3,
 3.277,
 3.1933,
 3.23,
 3.23,
 3.25,
 3.70036,
 4.5,
 4.11,
 3.99199,
 3.95,
 3.9499,
 4.3897,
 4.31259,
 4.06,
 4.18888,
 4.33,
 4.3,
 4.995,
 5.2,
 5.4999,
 5.29,
 5.6063,
 6.399,
 7.22,
 7.01556,
 7.2,
 7.2,
 6.89,
 7.138,
 6.997,
 6.8583899897,
 6.75,
 6.98999,
 7.18888,
 6.98,
 6.95,
 6.3,
 6.58,
 6.55,
 6.39,
 6.45485,
 6.515,
 6.45,
 6.2,
 5.689,
 5.74815,
 5.76,
 5.6,
 5.65,
 5.638,
 6.2,
 6.148,
 6.0,
 5.9625,
 5.79,
 5.70999,
 5.8494,
 5.8,
 5.92525,
 6.0,
 5.85,
 5.72,
 5.6,
 4.88,
 4.73368,
 4.76998,
 4.49675,
 4.33333,
 4.52,
 4.4354,
 4.54,
 4.92481,
 5.19778,
 5.07,
 5.00073,
 5.1,
 5.0,
 4.9,
 4.98421,
 4.98888,
 4.77995,
 4.9,
 5.04,
 5.05,
 5.07,
 5.0,
 4.9499,
 4.94,
 4.99,
 4.95915,
 5.41,
 5.4444,
 5.45,
 5.4,
 5.3998,
 5.37998,
 5.3099,
 4.98,
 4.87,
 4.88,
 4.83,
 4.74896,
 4.68294,
 4.73793,
 4.74,
 4.84592,
 4.86092,
 4.83,
 4.95,
 4.929,
 5.08,
 5.01,
 4.97,
 4.94486,
 4.96,
 4.98,
 4.8,
 4.8,
 4.9,
 4.8941,
 4.98,
 4.94464,
 5.03,
 4.98389,
 4.98,
 5.02207,
 5.1782,
 5.19,
 5.17,
 5.48,
 5.3287,
 5.21799,
 5.2,
 5.16,
 5.18,
 5.1372,
 5.1176,
 5.0184,
 5.0,
 5.0,
 5.1789,
 5.184,
 5.15,
 5.1495,
 5.099,
 5.0947,
 5.1,
 5.09609,
 5.125,
 5.03188,
 5.0,
 4.99888,
 5.036,
 5.09,
 5.08998,
 5.1345,
 5.13,
 5.14291,
 5.15,
 5.1475,
 5.12,
 5.16,
 5.2324,
 5.15,
 5.149,
 5.148,
 5.16,
 5.15889,
 5.1725,
 5.19,
 5.27,
 5.279,
 5.26466,
 5.2785,
 5.5,
 5.47,
 5.529,
 5.66,
 5.69999,
 5.62,
 5.54545,
 5.7,
 5.96,
 5.95,
 6.165,
 6.599,
 6.52999,
 6.4668,
 6.53294,
 6.64999,
 6.8,
 6.79962,
 6.65062,
 6.5915,
 6.44999,
 6.4514,
 6.55555,
 6.66897,
 6.662,
 6.693,
 6.694,
 6.75,
 6.765,
 6.55,
 6.77,
 6.73449,
 6.75,
 6.87,
 6.896,
 7.239,
 7.257,
 7.32,
 7.9,
 7.68891,
 7.64877,
 8.2899,
 9.49,
 9.39899,
 9.27491,
 9.23355,
 9.7,
 9.0,
 9.2,
 8.96,
 8.76998,
 8.9,
 8.95,
 8.93,
 8.889,
 9.15,
 9.44,
 9.54,
 9.8,
 11.1188,
 11.3,
 11.1869,
 11.29112,
 11.04,
 11.14999,
 12.0,
 11.6,
 11.59788,
 11.765,
 11.86999,
 12.17887,
 12.67,
 13.84119,
 15.4,
 15.26014,
 12.0,
 10.5,
 10.29999,
 10.26,
 10.25,
 10.25,
 10.25,
 10.61913,
 12.14999,
 11.3807,
 11.20999,
 10.937,
 10.8359,
 10.42,
 10.18999,
 10.5,
 10.5934,
 10.57888,
 11.17,
 11.29,
 11.21,
 11.14288,
 11.127,
 11.19401,
 11.3789,
 11.399,
 11.75,
 11.799,
 11.99,
 11.9625,
 12.09,
 12.61,
 12.68666,
 12.57001,
 12.4433,
 12.2749,
 12.29888,
 12.22909,
 12.28,
 12.46,
 12.444,
 12.49,
 12.47499,
 12.481,
 12.88,
 12.8999,
 13.0899,
 12.99,
 12.89999,
 12.7369,
 12.08,
 12.35,
 12.15,
 12.19,
 12.15,
 12.137,
 12.0,
 12.03,
 11.99,
 11.99,
 11.95998,
 11.967,
 11.85,
 11.7965,
 11.81,
 12.0,
 11.82,
 11.71,
 11.09988,
 10.83883,
 10.61,
 10.95,
 10.85,
 11.14,
 11.279,
 11.20999,
 10.60097,
 10.68,
 10.9,
 10.88997,
 11.21603,
 11.0979,
 11.07,
 10.95899,
 10.939,
 11.18,
 11.129,
 11.05,
 11.114,
 11.8,
 11.8,
 11.832,
 11.79998,
 11.84,
 11.784,
 12.28,
 12.43,
 12.4108,
 12.6,
 12.6515,
 12.52999,
 12.40712,
 12.599,
 12.65,
 12.68778,
 12.68,
 12.67901,
 12.96675,
 13.5,
 13.6888,
 13.68,
 13.55,
 13.53,
 13.55,
 13.63999,
 13.63999,
 13.79989,
 13.90119,
 13.7722,
 13.66548,
 13.498,
 13.399,
 13.399,
 13.72,
 13.6475,
 13.58998,
 13.48547,
 13.45,
 13.45,
 13.3989,
 13.47,
 13.6499,
 13.67,
 13.56998,
 13.59,
 13.561,
 13.4,
 13.464,
 13.48986,
 13.548,
 13.52999,
 13.535,
 13.83,
 13.87998,
 14.32,
 14.29999,
 14.34999,
 14.3148,
 14.3,
 14.47899,
 14.689,
 15.39,
 15.985,
 15.84304,
 15.89,
 16.98,
 17.59,
 17.5889,
 19.18999,
 18.8348,
 17.61926,
 17.99999,
 18.45,
 19.8,
 19.7,
 21.43,
 21.3,
 21.1,
 20.68,
 21.05,
 20.79,
 21.3339,
 22.15,
 22.15,
 23.69997,
 23.61458,
 24.1955,
 25.76997,
 26.09999,
 26.53092,
 26.99898,
 27.07009,
 25.6083,
 26.81565,
 28.78999,
 29.81231,
 29.80012,
 30.901,
 29.27998,
 30.29777,
 30.25001,
 31.179,
 31.40181,
 33.50001,
 34.87799,
 34.1808,
 34.10007,
 36.35,
 40.04,
 47.5,
 42.49271,
 42.98,
 46.01112,
 46.5,
 47.7397,
 45.01771,
 46.79999,
 47.27589,
 47.25005,
 47.11,
 47.4419,
 47.65,
 57.76,
 63.40967,
 73.8,
 71.5,
 63.0,
 70.1,
 73.88798,
 78.0,
 88.9,
 93.56701,
 89.05,
 90.99999,
 92.50001,
 102.51001,
 108.73,
 138.79,
 131.99899,
 143.8,
 142.49765,
 160.0997,
 184.0,
 237.99,
 198.0,
 123.40098,
 76.488,
 109.49999,
 91.0,
 91.0,
 91.0,
 85.59999,
 96.5,
 119.62,
 123.86,
 125.0,
 123.515,
 136.08099,
 153.20019,
 151.12001,
 135.601,
 128.00001,
 135.98999,
 141.50002,
 139.109,
 124.2998,
 105.00003,
 93.15,
 111.98979,
 117.89001,
 120.94999,
 106.81,
 113.95001,
 109.56154,
 117.68,
 115.4,
 114.32,
 ...]

In [46]:
import random

def selectSampleIndex(listSize, sampleSize, maxSeqLen):
    if listSize <= 0 or sampleSize <= 0 or listSize < sampleSize:
        return []
    
    resSample = set()
    while len(resSample) < sampleSize:
        pick = random.randint(0, listSize-maxSeqLen)
        resSample.add(pick)
    res = list(resSample)
    res.sort()
    return res

def getDiffValue(lastDayValue, targetDayValue):
    return float(targetDayValue - lastDayValue)

def getDiff(lastDayValue, targetDayValue):
    return getDiffValue(lastDayValue, targetDayValue)/float(targetDayValue)

In [47]:
maxSeqLen = 20
marginDays = 5
minGain = 0.05
numSamples = 200

In [48]:
resIdx = selectSampleIndex(len(prices), numSamples, maxSeqLen)
resIdx[0:10], resIdx[-10:] #see first and last 10 index


Out[48]:
([1, 2, 6, 7, 28, 29, 40, 44, 53, 59],
 [2014, 2016, 2032, 2034, 2036, 2041, 2057, 2064, 2075, 2076])

In [50]:
outfilename = 'btc_train_data.csv'
outfile = open(outfilename,'w')

#print header
header = 'date,' + 'day'+',day'.join([`num+1` for num in xrange(maxSeqLen-marginDays)])+',dayTarget'+',diffVal'+',diff'+',isBullish'+'\n'
outfile.write(header)

for idx in resIdx:
    isBullish = 0    
    diff = getDiff(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])
    if diff > minGain:
        isBullish = 1
    elif diff < (-1*minGain):
        isBullish = -1
    else:
        isBullish = 0
    #print idx, tsla_prices[idx:idx+10], tsla_prices[idx+15], good, diff
    #print tsla_prices[idx:idx+maxSeqLen-5], tsla_prices[idx+maxSeqLen], isBullish
    result = prices[idx:idx+maxSeqLen-marginDays]  #prices of sequence of days
    result.insert(0, dates[idx+(maxSeqLen-marginDays)-1]) #date
    result.append(prices[idx+maxSeqLen]) #price of the day target
    result.append(getDiffValue(prices[idx+(maxSeqLen-marginDays)-1], prices[idx+maxSeqLen])) #diff value
    result.append(diff) #diff 
    result.append(isBullish) #isbullish
    trainset = ",".join([`x` for x in result])
    outfile.write(trainset)
    outfile.write('\n')

outfile.close()

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [25]:
'day'+',day'.join([`num+1` for num in xrange(maxSeqLen)])


Out[25]:
'day1,day2,day3,day4,day5,day6,day7,day8,day9,day10,day11,day12,day13,day14,day15,day16,day17,day18,day19,day20'

In [30]:
%alias_magic --line whereami pwd


Created `%whereami` as an alias for `%pwd`.

In [32]:
%pwd


Out[32]:
u'D:\\Work\\data\\stockprices'

In [ ]: