In [38]:
import numpy as np
import pandas as pd
import matplotlib as plt

%matplotlib inline
# set default diplay row count
pd.options.display.max_rows=6
# pd.options.display.max_columns=6

Read csv file


In [37]:
# using the row-0 as header and the col-0 as rowindex
stock1 = pd.read_csv('./pydata-book/examples/stock_px.csv', header=0, index_col=0)
stock1.head()


Out[37]:
AA AAPL GE ... PEP SPX XOM
1990-02-01 00:00:00 4.98 7.86 2.87 ... 6.04 328.79 6.12
1990-02-02 00:00:00 5.04 8.00 2.87 ... 6.09 330.92 6.24
1990-02-05 00:00:00 5.07 8.18 2.87 ... 6.05 331.85 6.25
1990-02-06 00:00:00 5.01 8.12 2.88 ... 6.15 329.66 6.23
1990-02-07 00:00:00 5.04 7.77 2.91 ... 6.17 333.75 6.33

5 rows × 9 columns


In [11]:
volumn1 = pd.read_csv('./pydata-book/examples/volume.csv', header=0, index_col=0)
volumn1.head()


Out[11]:
AA AAPL GE IBM JNJ MSFT PEP SPX XOM
1990-02-01 00:00:00 2185600.0 4193200.0 14457600.0 6903600.0 5942400.0 89193600.0 2954400.0 154580000.0 2916400.0
1990-02-02 00:00:00 3103200.0 4248800.0 15302400.0 6064400.0 4732800.0 71395200.0 2424000.0 164400000.0 4250000.0
1990-02-05 00:00:00 1792800.0 3653200.0 9134400.0 5299200.0 3950400.0 59731200.0 2225400.0 130950000.0 5880800.0
1990-02-06 00:00:00 2205600.0 2640000.0 14389200.0 10808000.0 3761600.0 81964800.0 3270000.0 134070000.0 4750800.0
1990-02-07 00:00:00 3592800.0 11180800.0 18704400.0 12057600.0 5458400.0 134150400.0 4332600.0 186710000.0 4124800.0

In [12]:
stock2 = pd.read_csv('./pydata-book/examples/stock_px_2.csv', header=0, index_col=0)
stock2.head()


Out[12]:
AAPL MSFT XOM SPX
2003-01-02 00:00:00 7.40 21.11 29.22 909.03
2003-01-03 00:00:00 7.45 21.14 29.24 908.59
2003-01-06 00:00:00 7.45 21.52 29.96 929.01
2003-01-07 00:00:00 7.43 21.93 28.95 922.93
2003-01-08 00:00:00 7.28 21.31 28.83 909.93

Align data


In [25]:
# remove 'AA' column for align operator below
volumn1.drop(['AA'], axis=1, inplace=True)
tmp1 = stock1 * volumn1
print(2185600.0 * 4.98)
tmp1.head()


10884288.000000002
Out[25]:
AA AAPL GE IBM JNJ MSFT PEP SPX XOM
1990-02-01 00:00:00 NaN 32958552.0 41493312.0 115911444.0 25374048.0 45488736.0 17844576.0 5.082436e+10 17848368.0
1990-02-02 00:00:00 NaN 33990400.0 43917888.0 102427716.0 20682336.0 36411552.0 14762160.0 5.440325e+10 26520000.0
1990-02-05 00:00:00 NaN 29883176.0 26215728.0 91782144.0 17144736.0 30462912.0 13463670.0 4.345576e+10 36755000.0
1990-02-06 00:00:00 NaN 21436800.0 41440896.0 189788480.0 16250112.0 41802048.0 20110500.0 4.419752e+10 29597484.0
1990-02-07 00:00:00 NaN 86874816.0 54429804.0 216192768.0 23907792.0 68416704.0 26732142.0 6.231446e+10 26109984.0

In [30]:
# remove nan clolums
tmp1.dropna(axis=1)


Out[30]:
AAPL GE IBM JNJ MSFT SPX XOM
1990-02-01 00:00:00 3.295855e+07 4.149331e+07 1.159114e+08 25374048.0 4.548874e+07 5.082436e+10 1.784837e+07
1990-02-02 00:00:00 3.399040e+07 4.391789e+07 1.024277e+08 20682336.0 3.641155e+07 5.440325e+10 2.652000e+07
1990-02-05 00:00:00 2.988318e+07 2.621573e+07 9.178214e+07 17144736.0 3.046291e+07 4.345576e+10 3.675500e+07
... ... ... ... ... ... ... ...
2011-10-12 00:00:00 8.931272e+09 1.012784e+09 9.935458e+08 603100183.0 1.415125e+09 6.465258e+12 1.715961e+09
2011-10-13 00:00:00 6.199110e+09 7.481443e+08 8.218585e+08 499780053.0 1.191123e+09 5.339761e+12 1.492293e+09
2011-10-14 00:00:00 8.629900e+09 7.492277e+08 1.022803e+09 432251936.0 1.389344e+09 5.041216e+12 1.395873e+09

5472 rows × 7 columns


In [41]:
# manualy align data
tmp2 = stock1.align(other=volumn1, join='outer')
tmp2


Out[41]:
(                        AA    AAPL     GE  ...      PEP      SPX    XOM
 1990-02-01 00:00:00   4.98    7.86   2.87  ...     6.04   328.79   6.12
 1990-02-02 00:00:00   5.04    8.00   2.87  ...     6.09   330.92   6.24
 1990-02-05 00:00:00   5.07    8.18   2.87  ...     6.05   331.85   6.25
 ...                    ...     ...    ...  ...      ...      ...    ...
 2011-10-12 00:00:00  10.05  402.19  16.40  ...    62.70  1207.25  77.16
 2011-10-13 00:00:00  10.10  408.43  16.22  ...    62.36  1203.66  76.37
 2011-10-14 00:00:00  10.26  422.00  16.60  ...    62.24  1224.58  78.11
 
 [5472 rows x 9 columns],
                      AA        AAPL          GE     ...             PEP  \
 1990-02-01 00:00:00 NaN   4193200.0  14457600.0     ...       2954400.0   
 1990-02-02 00:00:00 NaN   4248800.0  15302400.0     ...       2424000.0   
 1990-02-05 00:00:00 NaN   3653200.0   9134400.0     ...       2225400.0   
 ...                  ..         ...         ...     ...             ...   
 2011-10-12 00:00:00 NaN  22206600.0  61755100.0     ...      13796200.0   
 2011-10-13 00:00:00 NaN  15177900.0  46124800.0     ...       6887300.0   
 2011-10-14 00:00:00 NaN  20450000.0  45134200.0     ...       8736600.0   
 
                               SPX         XOM  
 1990-02-01 00:00:00  1.545800e+08   2916400.0  
 1990-02-02 00:00:00  1.644000e+08   4250000.0  
 1990-02-05 00:00:00  1.309500e+08   5880800.0  
 ...                           ...         ...  
 2011-10-12 00:00:00  5.355360e+09  22239000.0  
 2011-10-13 00:00:00  4.436270e+09  19540300.0  
 2011-10-14 00:00:00  4.116690e+09  17870600.0  
 
 [5472 rows x 9 columns])