In [1]:
import datetime
import numpy as np
import pandas as pd
import zipline
%matplotlib inline
STOCKS = ['AMD', 'CERN', 'COST', 'DELL', 'GPS', 'INTC', 'MMM']
In [2]:
class BUY_APPLE(zipline.TradingAlgorithm):
"""
Copy the sample trading algorithm from Quantopian and see if we can
run it in zipline (what needs to change to convert between their platform
th)
"""
def initialize(self):
pass
def handle_data(self, data):
self.order(zipline.api.symbol('AAPL'), 10)
self.record(APPL=data[zipline.api.symbol('AAPL')].price)
In [3]:
start = datetime.datetime(2001, 8, 1)
end = datetime.datetime(2013, 2, 1)
data = zipline.utils.factory.load_from_yahoo(stocks=['AAPL', 'AMD'], indexes={}, start=start, end=end)
In [4]:
def run_buy_apple():
buy_apple = BUY_APPLE();
results = buy_apple.run(data)
return results.portfolio_value
In [5]:
results_buy_apple = run_buy_apple()
In [6]:
results_buy_apple.tail()
Out[6]:
In [7]:
results_buy_apple.head()
Out[7]:
In [8]:
results_buy_apple.plot()
Out[8]:
In [9]:
from collections import deque as moving_window
class DualMovingAverage(zipline.TradingAlgorithm):
""" Implements the Dual Moving average """
def initialize(self, short_window=100, long_window=300):
self.short_window = moving_window(maxlen=short_window)
self.long_window = moving_window(maxlen=long_window)
def handle_data(self, data):
self.short_window.append(data[zipline.api.symbol('AAPL')].price)
self.long_window.append(data[zipline.api.symbol('AAPL')].price)
short_mavg = np.mean(self.short_window)
long_mavg = np.mean(self.long_window)
#Trading logic
if short_mavg > long_mavg:
self.order_target(zipline.api.symbol('AAPL'), 100)
elif short_mavg < long_mavg:
self.order_target(zipline.api.symbol('AAPL'), 0)
self.record(APPL=data[zipline.api.symbol('AAPL')].price,
short_mavg=short_mavg,
long_mavg=long_mavg)
In [10]:
def run_dual_moving_ave():
moving_ave = DualMovingAverage();
results = moving_ave.run(data)
return results.portfolio_value
In [11]:
results_DMA = run_dual_moving_ave()
In [14]:
results_DMA.plot()
Out[14]:
Scikit Learn's home page divides up the space of machine learning well, but the Mahout algorithms list has a more comprehensive list of algorithms. From both:
The S&P 500 dataset is great for us to quickly explore regression, clustering, and principal component analysis.
Goal is to cluster Chicago-area Fortune 500 stocks by similar day-to-day returns in 2012. Steps:
In [13]:
# This is a module we wrote using pg8000 to access our Postgres database on Heroku
from database import Database
db = Database()
In [14]:
#list of Chicago's fortune 500 companies' ticker symbols
chicago_companies_lookup = dict(
ABT = "Abbot",
ADM = "Archer-Daniels Midland",
ALL = "Allstate",
BA = "Boeing",
CF = "CF Industries (Fertilizer)",
DFS = "Discover",
DOV = "Dover Corporation (industrial products)",
EXC = "Exelon",
GWW = "Grainger",
ITW = "Illinois Tool Works",
MCD = "McDonalds",
MDLZ = "Mondelez",
MSI = "Motorola",
NI = "Nicor",
TEG = "Integrys (energy)")
chicago_companies = chicago_companies_lookup.keys()
returns = db.select( ('SELECT dt, "{}" FROM return '
'WHERE dt BETWEEN \'2012-01-01\' AND \'2012-12-31\''
'ORDER BY dt;').format(
'", "'.join((c.lower() for c in chicago_companies))),
columns=["Date"] + chicago_companies)
sp_dates = [row.pop("Date") for row in returns]
returns = pd.DataFrame(returns, index=sp_dates)
In [15]:
#cluster to determine if sectors move similarly in the marketplace
from scipy.cluster.vq import whiten
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
%matplotlib inline
In [16]:
normalize = whiten(returns.transpose().dropna())
steps = range(2,10)
inertias = [KMeans(i).fit(normalize).inertia_ for i in steps]
plt.plot(steps, inertias, 'go-')
plt.title("Pick 5 clusters (but the dropoff looks linear)")
Out[16]:
In [17]:
nclust = 5
km = KMeans(n_clusters = nclust)
km.fit(normalize)
clustered_companies = [set() for i in range(nclust)]
for i in range(len(normalize.index)):
company = normalize.index[i]
cluster_id = km.labels_[i]
clustered_companies[cluster_id].add(company)
print "Here are the clusters...."
for c in clustered_companies:
print len(c), " companies:\n ", ", ".join(chicago_companies_lookup[co] for co in c)
In [18]:
import scipy.spatial.distance as dist
import scipy.cluster.hierarchy as hclust
chicago_dist = dist.pdist(normalize, 'euclidean')
links = hclust.linkage(chicago_dist)
plt.figure(figsize=(3,4))
den = hclust.dendrogram(
links,
labels=[chicago_companies_lookup[co] for co in normalize.index],
orientation="left")
plt.ylabel('Samples', fontsize=9)
plt.xlabel('Distance')
plt.suptitle('Stocks clustered by similarity', fontweight='bold', fontsize=14);
We copied the Flask tutorial instructions but replaced the database with a MongoDB database, using the Flask PyMongo extension
# It's really this easy!
from flask.ext.pymongo import PyMongo
app.config['MONGO_URI'] = os.environ['MONGO_URI']
app.config['PASSWORD'] = urlparse.urlparse(app.config['MONGO_URI']).password
app.config['USERNAME'] = urlparse.urlparse(app.config['MONGO_URI']).username
mongo = PyMongo(app)
@app.route("/")
def show_entries():
"""Show all of the blog entries."""
entries = mongo.db.entries.find(sort=[('$natural', -1)])
return render_template('show_entries.html', entries=entries)
<img src="data/flask_tutorial.png", style="width:500px"></img>
-Get code from my github page
In [ ]: