In [120]:
import numpy as np
import pandas as pd
import Quandl
from matplotlib import pyplot as plt
from sklearn.linear_model import BayesianRidge
%matplotlib inline

In [121]:
# http://www.quantshare.com/sa-426-6-ways-to-download-free-intraday-and-tick-data-for-the-us-stock-market

In [122]:
# http://www.google.com/finance/getprices?i=300&p=10d&f=d,o,h,l,c,v&df=cpct&q=TNXP

In [123]:
!head tnxp_goog_5min_10day_clean.csv


78,10.4,10.4,10.33,10.4,25042

In [124]:
tnxp_df = pd.read_csv("tnxp_goog_5min_10day_clean.csv")

In [125]:
print tnxp_df.shape
tnxp_df.head()


(775, 6)
Out[125]:
INDEX CLOSE HIGH LOW OPEN VOLUME
0 0 7.49 7.4900 7.380 7.3800 2412
1 1 7.32 7.4780 7.320 7.4780 500
2 2 7.35 7.3899 7.289 7.3700 3975
3 3 7.33 7.3404 7.270 7.3404 7150
4 4 7.40 7.4600 7.330 7.3690 6110

In [126]:
tnxp_df.describe()


Out[126]:
INDEX CLOSE HIGH LOW OPEN VOLUME
count 775.000000 775.000000 775.000000 775.000000 775.000000 775.000000
mean 38.486452 8.630166 8.658258 8.597809 8.628757 6760.030968
std 22.540449 0.832298 0.836817 0.825881 0.830760 9526.718482
min 0.000000 7.320000 7.340400 7.110000 7.340400 100.000000
25% 19.000000 7.885000 7.905000 7.847500 7.875000 1500.000000
50% 38.000000 8.483000 8.500000 8.460000 8.490000 3500.000000
75% 58.000000 9.154800 9.189950 9.130000 9.160000 8223.000000
max 78.000000 10.430000 10.450000 10.415000 10.430000 107980.000000

In [127]:
tnxp_df['CLOSE'].plot()
plt.show()




In [128]:
close_values = tnxp_df['LOW'].values[:-1]
low_values = tnxp_df['LOW'].values[:-1]
high_values = tnxp_df['HIGH'].values[:-1]
vol_values = tnxp_df['VOLUME'].values[:-1]

In [129]:
tnxp_df['LASTCLOSE'] = np.insert(close_values, 0, 0.0)
tnxp_df['LASTLOW'] = np.insert(low_values, 0, 0.0)
tnxp_df['LASTHIGH'] = np.insert(high_values, 0, 0.0)
tnxp_df['LASTVOLUME'] = np.insert(vol_values, 0, 0.0)

In [130]:
tnxp_df = tnxp_df.ix[1:]

In [131]:
tnxp_df.head()


Out[131]:
INDEX CLOSE HIGH LOW OPEN VOLUME LASTCLOSE LASTLOW LASTHIGH LASTVOLUME
1 1 7.320 7.4780 7.320 7.4780 500 7.380 7.380 7.4900 2412
2 2 7.350 7.3899 7.289 7.3700 3975 7.320 7.320 7.4780 500
3 3 7.330 7.3404 7.270 7.3404 7150 7.289 7.289 7.3899 3975
4 4 7.400 7.4600 7.330 7.3690 6110 7.270 7.270 7.3404 7150
5 5 7.482 7.5000 7.416 7.4200 3754 7.330 7.330 7.4600 6110

In [132]:
tnxp_df.tail()


Out[132]:
INDEX CLOSE HIGH LOW OPEN VOLUME LASTCLOSE LASTLOW LASTHIGH LASTVOLUME
770 74 10.3999 10.44 10.3900 10.4200 6634 10.4150 10.4150 10.44 2800
771 75 10.4200 10.43 10.3700 10.3916 4900 10.3900 10.3900 10.44 6634
772 76 10.3800 10.42 10.3600 10.3918 8546 10.3700 10.3700 10.43 4900
773 77 10.4000 10.40 10.3564 10.3916 7957 10.3600 10.3600 10.42 8546
774 78 10.4000 10.40 10.3300 10.4000 25042 10.3564 10.3564 10.40 7957


In [133]:
tnxp_df['LOWPRICEIN30MINS'] = np.append(tnxp_df['LOW'].values[6:], [0,0,0,0,0,0])

In [134]:
tnxp_df.head(10)


Out[134]:
INDEX CLOSE HIGH LOW OPEN VOLUME LASTCLOSE LASTLOW LASTHIGH LASTVOLUME LOWPRICEIN30MINS
1 1 7.3200 7.4780 7.3200 7.4780 500 7.3800 7.3800 7.4900 2412 7.437
2 2 7.3500 7.3899 7.2890 7.3700 3975 7.3200 7.3200 7.4780 500 7.510
3 3 7.3300 7.3404 7.2700 7.3404 7150 7.2890 7.2890 7.3899 3975 7.520
4 4 7.4000 7.4600 7.3300 7.3690 6110 7.2700 7.2700 7.3404 7150 7.557
5 5 7.4820 7.5000 7.4160 7.4200 3754 7.3300 7.3300 7.4600 6110 7.550
6 6 7.4925 7.5000 7.4657 7.5000 6700 7.4160 7.4160 7.5000 3754 7.580
7 7 7.5100 7.5200 7.4370 7.4460 5900 7.4657 7.4657 7.5000 6700 7.580
8 8 7.5950 7.7099 7.5100 7.5100 15190 7.4370 7.4370 7.5200 5900 7.450
9 9 7.5200 7.6310 7.5200 7.6040 7165 7.5100 7.5100 7.7099 15190 7.450
10 10 7.5850 7.6000 7.5570 7.5800 685 7.5200 7.5200 7.6310 7165 7.497

In [135]:
tnxp_df.tail(10)


Out[135]:
INDEX CLOSE HIGH LOW OPEN VOLUME LASTCLOSE LASTLOW LASTHIGH LASTVOLUME LOWPRICEIN30MINS
765 69 10.3800 10.38 10.3700 10.3700 3500 10.2800 10.2800 10.3559 5350 10.3700
766 70 10.4200 10.43 10.3700 10.3700 3628 10.3700 10.3700 10.3800 3500 10.3600
767 71 10.3900 10.43 10.3900 10.4200 8000 10.3700 10.3700 10.4300 3628 10.3564
768 72 10.4300 10.43 10.3900 10.3900 5725 10.3900 10.3900 10.4300 8000 10.3300
769 73 10.4200 10.44 10.4150 10.4300 2800 10.3900 10.3900 10.4300 5725 0.0000
770 74 10.3999 10.44 10.3900 10.4200 6634 10.4150 10.4150 10.4400 2800 0.0000
771 75 10.4200 10.43 10.3700 10.3916 4900 10.3900 10.3900 10.4400 6634 0.0000
772 76 10.3800 10.42 10.3600 10.3918 8546 10.3700 10.3700 10.4300 4900 0.0000
773 77 10.4000 10.40 10.3564 10.3916 7957 10.3600 10.3600 10.4200 8546 0.0000
774 78 10.4000 10.40 10.3300 10.4000 25042 10.3564 10.3564 10.4000 7957 0.0000


In [150]:
print [str(col) for col in train_df.columns]


['INDEX', 'CLOSE', 'HIGH', 'LOW', 'OPEN', 'VOLUME', 'LASTCLOSE', 'LASTLOW', 'LASTHIGH', 'LASTVOLUME', 'LOWPRICEIN30MINS']

In [151]:
train_df = tnxp_df.ix[0:768]

In [153]:
X = train_df[['INDEX', 'CLOSE', 'HIGH', 'LOW', 'OPEN', 'VOLUME', 'LASTCLOSE', 'LASTLOW', 'LASTHIGH', 'LASTVOLUME']].values
y = train_df[['LOWPRICEIN30MINS']].values.flatten()
print X.shape, y.shape


(768, 10) (768,)

In [154]:
clf = BayesianRidge(compute_score=True)
clf.fit(X, y)


Out[154]:
BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=True, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [183]:
value_index = 1
features = tnxp_df.ix[value_index].values
features = np.delete(features, features.shape[0]-1)
prediction = clf.predict(features) 
print "Actual: {}".format(tnxp_df.ix[value_index].values[-1])
print "Prediction: {}".format(prediction[0])
print "Difference: {}".format(tnxp_df.ix[value_index].values[-1] - prediction[0])


Actual: 7.437
Prediction: 7.31331963493
Difference: 0.123680365066

In [186]:
value_index = 768
features = tnxp_df.ix[value_index].values
features = np.delete(features, features.shape[0]-1)
prediction = clf.predict(features) 
print "Actual: {}".format(tnxp_df.ix[value_index].values[-1])
print "Prediction: {}".format(prediction[0])
print "Difference: {}".format(tnxp_df.ix[value_index].values[-1] - prediction[0])


Actual: 10.33
Prediction: 10.4078041694
Difference: -0.0778041694197

In [188]:
value_index = 769
features = tnxp_df.ix[value_index].values
features = np.delete(features, features.shape[0]-1)
prediction = clf.predict(features) 
print "Actual: {}".format(10.40)
print "Prediction: {}".format(prediction[0])
print "Difference: {}".format(10.40 - prediction[0])


Actual: 10.4
Prediction: 10.3977414207
Difference: 0.00225857934042

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [123]:
# june 17
prediction = clf.predict(np.array([9.97, 9.67, 797492, 9.90]))
print prediction, prediction - (prediction*0.05)


[ 11.03600107] [ 10.48420101]

In [125]:
# june 17
prediction = clf.predict(np.array([9.97, 9.67, 797400, 9.90]))
print prediction, prediction - (prediction*0.05)


[ 11.03590219] [ 10.48410708]

In [ ]:


In [ ]:


In [ ]:



In [72]:
X = tnxp_df[['Open', 'High', 'Low', 'Volume']].values
y = tnxp_df[['Close']].values.flatten()
print X.shape, y.shape


(779, 4) (779,)

In [73]:
indices = np.random.permutation(np.arange(0,X.shape[0]-1))

In [74]:
train_count = np.floor(X.shape[0] * 0.80)
print train_count
print np.ceil(X.shape[0] * 0.20)


623.0
156.0

In [75]:
X_train = X[:train_count]
y_train = y[:train_count]
X_test = X[train_count:]
y_test = y[train_count:]

print len(X_train), len(X_test), len(y_train), len(y_test)


623 156 623 156

In [76]:
clf = BayesianRidge(compute_score=True)
clf.fit(X_train, y_train)


Out[76]:
BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=True, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)

In [77]:
clf.score(X_test, y_test)


Out[77]:
0.97043973543978568

In [78]:
clf.predict(np.array([9.97, 10.45, 9.67, 797492]))


Out[78]:
array([ 10.00664805])

In [79]:
clf.predict(np.array([7.18, 7.18, 6.79, 123700]))


Out[79]:
array([ 6.95554819])

In [ ]:


In [8]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.linear_model import BayesianRidge, LinearRegression

###############################################################################
# Generating simulated data with Gaussian weigthts
np.random.seed(0)
n_samples, n_features = 100, 100
X = np.random.randn(n_samples, n_features)  # Create Gaussian data
# Create weigts with a precision lambda_ of 4.
lambda_ = 4.
w = np.zeros(n_features)
# Only keep 10 weights of interest
relevant_features = np.random.randint(0, n_features, 10)
for i in relevant_features:
    w[i] = stats.norm.rvs(loc=0, scale=1. / np.sqrt(lambda_))
# Create noise with a precision alpha of 50.
alpha_ = 50.
noise = stats.norm.rvs(loc=0, scale=1. / np.sqrt(alpha_), size=n_samples)
# Create the target
y = np.dot(X, w) + noise

###############################################################################
# Fit the Bayesian Ridge Regression and an OLS for comparison
clf = BayesianRidge(compute_score=True)
clf.fit(X, y)

ols = LinearRegression()
ols.fit(X, y)

###############################################################################
# Plot true weights, estimated weights and histogram of the weights
plt.figure(figsize=(6, 5))
plt.title("Weights of the model")
plt.plot(clf.coef_, 'b-', label="Bayesian Ridge estimate")
plt.plot(w, 'g-', label="Ground truth")
plt.plot(ols.coef_, 'r--', label="OLS estimate")
plt.xlabel("Features")
plt.ylabel("Values of the weights")
plt.legend(loc="best", prop=dict(size=12))

plt.figure(figsize=(6, 5))
plt.title("Histogram of the weights")
plt.hist(clf.coef_, bins=n_features, log=True)
plt.plot(clf.coef_[relevant_features], 5 * np.ones(len(relevant_features)),
         'ro', label="Relevant features")
plt.ylabel("Features")
plt.xlabel("Values of the weights")
plt.legend(loc="lower left")

plt.figure(figsize=(6, 5))
plt.title("Marginal log-likelihood")
plt.plot(clf.scores_)
plt.ylabel("Score")
plt.xlabel("Iterations")
plt.show()


Automatically created module for IPython interactive environment


In [4]:
#https://www.google.com/finance/getprices?i=1&p=1000d&f=d,o,h,l,c,v&df=cpct&q=LPTH

In [2]:
tickers_df = pd.read_csv('WIKI_tickers.csv')
tickers_df.shape


Out[2]:
(2666, 2)

In [3]:
tickers = tickers_df['quandl code'].values.tolist()
print len(tickers)
print tickers[:5]


2666
['WIKI/ACT', 'WIKI/ADM', 'WIKI/AEE', 'WIKI/ADP', 'WIKI/ADS']

In [6]:
new_tickers = []
for ticker in tickers[5:10]:
    stock = Quandl.get(ticker, authtoken='DVhizWXNTePyzzy1eHWR')
    if stock['Close'].values[-1] < 20.00:
        new_tickers.append(ticker)
        
print len(new_tickers)


1

In [7]:
new_tickers


Out[7]:
['WIKI/AES']

In [8]:
stock = Quandl.get(new_tickers[0], authtoken='DVhizWXNTePyzzy1eHWR')

In [9]:
stock.tail()


Out[9]:
Open High Low Close Volume Ex-Dividend Split Ratio Adj. Open Adj. High Adj. Low Adj. Close Adj. Volume
Date
2015-06-08 13.12 13.160 13.020 13.08 4512754 0 1 13.12 13.160 13.020 13.08 4512754
2015-06-09 13.07 13.510 13.060 13.51 9146188 0 1 13.07 13.510 13.060 13.51 9146188
2015-06-10 13.64 13.650 13.300 13.32 5886451 0 1 13.64 13.650 13.300 13.32 5886451
2015-06-11 13.40 13.580 13.320 13.52 4962357 0 1 13.40 13.580 13.320 13.52 4962357
2015-06-12 13.48 13.515 13.305 13.35 3640202 0 1 13.48 13.515 13.305 13.35 3640202

In [54]:
plt.subplot(2,1,1)
plt.plot(stock.index[-10:],stock['Close'].values[-10:])
plt.subplot(2,1,2)
plt.bar(stock.index[-10:],stock['Volume'].values[-10:])
plt.xticks(rotation=45)
plt.show()



In [29]:
plt.plot(stock.index[-10:],stock['Close'].values[-10:])
plt.show()



In [30]:
plt.bar(stock.index[-10:], stock['Volume'].values[-10:])


Out[30]:
<Container object of 10 artists>


In [63]:
#!/usr/bin/env python
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY
from matplotlib.finance import quotes_historical_yahoo_ohlc, candlestick_ohlc


# (Year, month, day) tuples suffice as args for quotes_historical_yahoo
date1 = (2015, 5, 15)
date2 = (2015, 6, 15)


mondays = WeekdayLocator(MONDAY)        # major ticks on the mondays
alldays = DayLocator()              # minor ticks on the days
weekFormatter = DateFormatter('%b %d')  # e.g., Jan 12
dayFormatter = DateFormatter('%d')      # e.g., 12

quotes = quotes_historical_yahoo_ohlc('AES', date1, date2)
if len(quotes) == 0:
    raise SystemExit

fig, ax = plt.subplots()
fig.subplots_adjust(bottom=0.2)
ax.xaxis.set_major_locator(mondays)
ax.xaxis.set_minor_locator(alldays)
ax.xaxis.set_major_formatter(weekFormatter)
#ax.xaxis.set_minor_formatter(dayFormatter)

#plot_day_summary(ax, quotes, ticksize=3)
candlestick_ohlc(ax, quotes, width=0.6, colorup='g', colordown='r')

ax.xaxis_date()
ax.autoscale_view()
plt.setp(plt.gca().get_xticklabels(), rotation=45, horizontalalignment='right')

plt.show()



In [64]:
import datetime
import numpy as np
import matplotlib.colors as colors
import matplotlib.finance as finance
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.font_manager as font_manager


startdate = datetime.date(2006,1,1)
today = enddate = datetime.date.today()
ticker = 'SPY'


fh = finance.fetch_historical_yahoo(ticker, startdate, enddate)
# a numpy record array with fields: date, open, high, low, close, volume, adj_close)

r = mlab.csv2rec(fh); fh.close()
r.sort()


def moving_average(x, n, type='simple'):
    """
    compute an n period moving average.

    type is 'simple' | 'exponential'

    """
    x = np.asarray(x)
    if type=='simple':
        weights = np.ones(n)
    else:
        weights = np.exp(np.linspace(-1., 0., n))

    weights /= weights.sum()


    a =  np.convolve(x, weights, mode='full')[:len(x)]
    a[:n] = a[n]
    return a

def relative_strength(prices, n=14):
    """
    compute the n period relative strength indicator
    http://stockcharts.com/school/doku.php?id=chart_school:glossary_r#relativestrengthindex
    http://www.investopedia.com/terms/r/rsi.asp
    """

    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter

        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta

        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n

        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)

    return rsi

def moving_average_convergence(x, nslow=26, nfast=12):
    """
    compute the MACD (Moving Average Convergence/Divergence) using a fast and slow exponential moving avg'
    return value is emaslow, emafast, macd which are len(x) arrays
    """
    emaslow = moving_average(x, nslow, type='exponential')
    emafast = moving_average(x, nfast, type='exponential')
    return emaslow, emafast, emafast - emaslow


plt.rc('axes', grid=True)
plt.rc('grid', color='0.75', linestyle='-', linewidth=0.5)

textsize = 9
left, width = 0.1, 0.8
rect1 = [left, 0.7, width, 0.2]
rect2 = [left, 0.3, width, 0.4]
rect3 = [left, 0.1, width, 0.2]


fig = plt.figure(facecolor='white')
axescolor  = '#f6f6f6'  # the axes background color

ax1 = fig.add_axes(rect1, axisbg=axescolor)  #left, bottom, width, height
ax2 = fig.add_axes(rect2, axisbg=axescolor, sharex=ax1)
ax2t = ax2.twinx()
ax3  = fig.add_axes(rect3, axisbg=axescolor, sharex=ax1)



### plot the relative strength indicator
prices = r.adj_close
rsi = relative_strength(prices)
fillcolor = 'darkgoldenrod'

ax1.plot(r.date, rsi, color=fillcolor)
ax1.axhline(70, color=fillcolor)
ax1.axhline(30, color=fillcolor)
ax1.fill_between(r.date, rsi, 70, where=(rsi>=70), facecolor=fillcolor, edgecolor=fillcolor)
ax1.fill_between(r.date, rsi, 30, where=(rsi<=30), facecolor=fillcolor, edgecolor=fillcolor)
ax1.text(0.6, 0.9, '>70 = overbought', va='top', transform=ax1.transAxes, fontsize=textsize)
ax1.text(0.6, 0.1, '<30 = oversold', transform=ax1.transAxes, fontsize=textsize)
ax1.set_ylim(0, 100)
ax1.set_yticks([30,70])
ax1.text(0.025, 0.95, 'RSI (14)', va='top', transform=ax1.transAxes, fontsize=textsize)
ax1.set_title('%s daily'%ticker)

### plot the price and volume data
dx = r.adj_close - r.close
low = r.low + dx
high = r.high + dx

deltas = np.zeros_like(prices)
deltas[1:] = np.diff(prices)
up = deltas>0
ax2.vlines(r.date[up], low[up], high[up], color='black', label='_nolegend_')
ax2.vlines(r.date[~up], low[~up], high[~up], color='black', label='_nolegend_')
ma20 = moving_average(prices, 20, type='simple')
ma200 = moving_average(prices, 200, type='simple')

linema20, = ax2.plot(r.date, ma20, color='blue', lw=2, label='MA (20)')
linema200, = ax2.plot(r.date, ma200, color='red', lw=2, label='MA (200)')


last = r[-1]
s = '%s O:%1.2f H:%1.2f L:%1.2f C:%1.2f, V:%1.1fM Chg:%+1.2f' % (
    today.strftime('%d-%b-%Y'),
    last.open, last.high,
    last.low, last.close,
    last.volume*1e-6,
    last.close-last.open )
t4 = ax2.text(0.3, 0.9, s, transform=ax2.transAxes, fontsize=textsize)

props = font_manager.FontProperties(size=10)
leg = ax2.legend(loc='center left', shadow=True, fancybox=True, prop=props)
leg.get_frame().set_alpha(0.5)


volume = (r.close*r.volume)/1e6  # dollar volume in millions
vmax = volume.max()
poly = ax2t.fill_between(r.date, volume, 0, label='Volume', facecolor=fillcolor, edgecolor=fillcolor)
ax2t.set_ylim(0, 5*vmax)
ax2t.set_yticks([])


### compute the MACD indicator
fillcolor = 'darkslategrey'
nslow = 26
nfast = 12
nema = 9
emaslow, emafast, macd = moving_average_convergence(prices, nslow=nslow, nfast=nfast)
ema9 = moving_average(macd, nema, type='exponential')
ax3.plot(r.date, macd, color='black', lw=2)
ax3.plot(r.date, ema9, color='blue', lw=1)
ax3.fill_between(r.date, macd-ema9, 0, alpha=0.5, facecolor=fillcolor, edgecolor=fillcolor)


ax3.text(0.025, 0.95, 'MACD (%d, %d, %d)'%(nfast, nslow, nema), va='top',
         transform=ax3.transAxes, fontsize=textsize)

#ax3.set_yticks([])
# turn off upper axis tick labels, rotate the lower ones, etc
for ax in ax1, ax2, ax2t, ax3:
    if ax!=ax3:
        for label in ax.get_xticklabels():
            label.set_visible(False)
    else:
        for label in ax.get_xticklabels():
            label.set_rotation(30)
            label.set_horizontalalignment('right')

    ax.fmt_xdata = mdates.DateFormatter('%Y-%m-%d')



class MyLocator(mticker.MaxNLocator):
    def __init__(self, *args, **kwargs):
        mticker.MaxNLocator.__init__(self, *args, **kwargs)

    def __call__(self, *args, **kwargs):
        return mticker.MaxNLocator.__call__(self, *args, **kwargs)

# at most 5 ticks, pruning the upper and lower so they don't overlap
# with other ticks
#ax2.yaxis.set_major_locator(mticker.MaxNLocator(5, prune='both'))
#ax3.yaxis.set_major_locator(mticker.MaxNLocator(5, prune='both'))

ax2.yaxis.set_major_locator(MyLocator(5, prune='both'))
ax3.yaxis.set_major_locator(MyLocator(5, prune='both'))

plt.show()



In [66]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.finance import candlestick
from matplotlib.finance import volume_overlay3
from matplotlib.dates import num2date
from matplotlib.dates import date2num
import matplotlib.mlab as mlab
import datetime

datafile = 'data.csv'
r = mlab.csv2rec(datafile, delimiter=';')

# the dates in my example file-set are very sparse (and annoying) change the dates to be sequential
for i in range(len(r)-1):
    r['date'][i+1] = r['date'][i] + datetime.timedelta(days=1)

candlesticks = zip(date2num(r['date']),r['open'],r['close'],r['max'],r['min'],r['volume'])

fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.set_ylabel('Quote ($)', size=20)
candlestick(ax, candlesticks,width=1,colorup='g', colordown='r')

# shift y-limits of the candlestick plot so that there is space at the bottom for the volume bar chart
pad = 0.25
yl = ax.get_ylim()
ax.set_ylim(yl[0]-(yl[1]-yl[0])*pad,yl[1])

# create the second axis for the volume bar-plot
ax2 = ax.twinx()


# set the position of ax2 so that it is short (y2=0.32) but otherwise the same size as ax
ax2.set_position(matplotlib.transforms.Bbox([[0.125,0.1],[0.9,0.32]]))

# get data from candlesticks for a bar plot
dates = [x[0] for x in candlesticks]
dates = np.asarray(dates)
volume = [x[5] for x in candlesticks]
volume = np.asarray(volume)

# make bar plots and color differently depending on up/down for the day
pos = r['open']-r['close']<0
neg = r['open']-r['close']>0
ax2.bar(dates[pos],volume[pos],color='green',width=1,align='center')
ax2.bar(dates[neg],volume[neg],color='red',width=1,align='center')

#scale the x-axis tight
ax2.set_xlim(min(dates),max(dates))
# the y-ticks for the bar were too dense, keep only every third one
yticks = ax2.get_yticks()
ax2.set_yticks(yticks[::3])

ax2.yaxis.set_label_position("right")
ax2.set_ylabel('Volume', size=20)

# format the x-ticks with a human-readable date. 
xt = ax.get_xticks()
new_xticks = [datetime.date.isoformat(num2date(d)) for d in xt]
ax.set_xticklabels(new_xticks,rotation=45, horizontalalignment='right')

plt.ion()
plt.show()


/usr/local/lib/python2.7/site-packages/matplotlib/finance.py:865: MatplotlibDeprecationWarning: This function has been deprecated in 1.4 in favor of `candlestick_ochl`, which maintains the original argument order, or `candlestick_ohlc`, which uses the open-high-low-close order. This function will be removed in 1.5
  mplDeprecation)