notebook.community

Edit and run



In [ ]:

    
%matplotlib inline
import pandas as pd



In [ ]:

    
f = open('apple_stock_snowday.json')
import json
data = pd.DataFrame( (json.loads(tick) for tick in f) ) #Expression in the ( ) is a python generator
data



In [ ]:

    
prices = data.lastSalePrice #we only really care about one column, most recent price
prices.plot()



In [ ]:

    
#We don't care about the overall price, we're trying to predict small fluxuations
diffs = prices.diff() #Subtract each from the one before it
diffs = diffs[ diffs != 0 ] #Not all rows have a price change, sometimes something else was updated.
diffs.plot()
#Stationary time series!



In [ ]:

    
original_diffs = diffs.copy()
diffs = diffs.apply(lambda x: x > 0) #Simplify things into just positive/negative as True/False
diffs.hist()



In [ ]:

    
def training_generator():
    window_size = 5
    for i in range(0, len(diffs) - window_size):
        yield {j: diffs.iloc[i+j] for j in range(window_size)}



In [ ]:

    
generator = training_generator()
df = pd.DataFrame(generator)
df



In [ ]:

    
examples = df[[0,1,2,3]]
answers = df[[4]][4]



In [ ]:

    
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()



In [ ]:

    
clf.fit(examples, answers)
predictions = clf.predict(examples)
predictions



In [ ]:

    
successes = predictions == answers
successes



In [ ]:

    
sum(successes) / len(examples) #Did we do better than random guessing?



In [ ]:

    
#How much can we make?
#we'll make the gaps as profit by buying/selling as appropriate, regardless of whether they were up or down
potential_profits = original_diffs.abs() 
#first four rows were for training, [5] is the first we actually predicted
potential_profits = potential_profits[5:]
potential_profits = potential_profits.reset_index().lastSalePrice

profits = sum(potential_profits[successes])
losses = sum(potential_profits[~successes])
num_shares = 1000
net = (profits - losses) * num_shares
round(net)