Financial and Economic Data Applications


In [1]:
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn
import numpy as np
pd.options.display.max_rows = 12
np.set_printoptions(precision=4, suppress=True)
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(12, 6))

In [2]:
%matplotlib inline

Data munging topics

Time series and cross-section alignment


In [5]:
close_px = pd.read_csv('stock_px.csv', parse_dates=True, index_col=0)
volume = pd.read_csv('volume.csv', parse_dates=True, index_col=0)
prices = close_px.ix['2011-09-05':'2011-09-14', ['AAPL', 'JNJ', 'SPX', 'XOM']]
volume = volume.ix['2011-09-05':'2011-09-12', ['AAPL', 'JNJ', 'XOM']]

In [6]:
prices


Out[6]:
AAPL JNJ SPX XOM
2011-09-06 379.74 64.64 1165.24 71.15
2011-09-07 383.93 65.43 1198.62 73.65
2011-09-08 384.14 64.95 1185.90 72.82
2011-09-09 377.48 63.64 1154.23 71.01
2011-09-12 379.94 63.59 1162.27 71.84
2011-09-13 384.62 63.61 1172.87 71.65
2011-09-14 389.30 63.73 1188.68 72.64

In [7]:
volume


Out[7]:
AAPL JNJ XOM
2011-09-06 18173500.0 15848300.0 25416300.0
2011-09-07 12492000.0 10759700.0 23108400.0
2011-09-08 14839800.0 15551500.0 22434800.0
2011-09-09 20171900.0 17008200.0 27969100.0
2011-09-12 16697300.0 13448200.0 26205800.0

In [8]:
prices * volume


Out[8]:
AAPL JNJ SPX XOM
2011-09-06 6.901205e+09 1.024434e+09 NaN 1.808370e+09
2011-09-07 4.796054e+09 7.040072e+08 NaN 1.701934e+09
2011-09-08 5.700561e+09 1.010070e+09 NaN 1.633702e+09
2011-09-09 7.614489e+09 1.082402e+09 NaN 1.986086e+09
2011-09-12 6.343972e+09 8.551710e+08 NaN 1.882625e+09
2011-09-13 NaN NaN NaN NaN
2011-09-14 NaN NaN NaN NaN

In [9]:
vwap = (prices * volume).sum() / volume.sum()

In [10]:
vwap


Out[10]:
AAPL    380.655181
JNJ      64.394769
SPX            NaN
XOM      72.024288
dtype: float64

In [11]:
vwap.dropna()


Out[11]:
AAPL    380.655181
JNJ      64.394769
XOM      72.024288
dtype: float64

In [12]:
prices.align(volume, join='inner')


Out[12]:
(              AAPL    JNJ    XOM
 2011-09-06  379.74  64.64  71.15
 2011-09-07  383.93  65.43  73.65
 2011-09-08  384.14  64.95  72.82
 2011-09-09  377.48  63.64  71.01
 2011-09-12  379.94  63.59  71.84,
                   AAPL         JNJ         XOM
 2011-09-06  18173500.0  15848300.0  25416300.0
 2011-09-07  12492000.0  10759700.0  23108400.0
 2011-09-08  14839800.0  15551500.0  22434800.0
 2011-09-09  20171900.0  17008200.0  27969100.0
 2011-09-12  16697300.0  13448200.0  26205800.0)

In [13]:
s1 = Series(range(3), index=['a', 'b', 'c'])
s2 = Series(range(4), index=['d', 'b', 'c', 'e'])
s3 = Series(range(3), index=['f', 'a', 'c'])
DataFrame({'one': s1, 'two': s2, 'three': s3})


Out[13]:
one three two
a 0.0 1.0 NaN
b 1.0 NaN 1.0
c 2.0 2.0 2.0
d NaN NaN 0.0
e NaN NaN 3.0
f NaN 0.0 NaN

In [14]:
DataFrame({'one': s1, 'two': s2, 'three': s3}, index=list('face'))


Out[14]:
one three two
f NaN 0.0 NaN
a 0.0 1.0 NaN
c 2.0 2.0 2.0
e NaN NaN 3.0

Operations with time series of different frequencies


In [ ]: