In [115]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
import seaborn as sns

%matplotlib inline

%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)

%load_ext autoreload
%autoreload 2

sys.path.append('../../')


Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['f']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [50]:
data_df = pd.read_pickle('../../data/data_df.pkl')
print(data_df.shape)
data_df.head(25)


(30120, 503)
Out[50]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
date feature
1993-01-29 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 43.97 24.62 6.88 NaN NaN NaN NaN 2.64 19.12 NaN ... 22.00 14.32 2.50 NaN NaN NaN NaN NaN 10.94 NaN
Low 43.75 24.47 6.75 NaN NaN NaN NaN 2.56 18.62 NaN ... 21.88 13.84 2.46 NaN NaN NaN NaN NaN 10.62 NaN
Close 43.94 24.50 6.88 NaN NaN NaN NaN 2.59 18.75 NaN ... 22.00 14.28 2.50 NaN NaN NaN NaN NaN 10.94 NaN
Volume 1003200.00 1242800.00 4638400.00 NaN NaN NaN NaN 4990400.00 730600.00 NaN ... 87800.00 7633602.00 1745196.00 NaN NaN NaN NaN NaN 33600.00 NaN
1993-02-01 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 44.25 24.69 6.97 NaN NaN NaN NaN 2.75 19.25 NaN ... 22.19 14.24 2.62 NaN NaN NaN NaN NaN 11.25 NaN
Low 43.97 24.47 6.78 NaN NaN NaN NaN 2.53 18.50 NaN ... 21.94 14.07 2.46 NaN NaN NaN NaN NaN 10.75 NaN
Close 44.25 24.69 6.88 NaN NaN NaN NaN 2.72 19.12 NaN ... 22.19 14.09 2.62 NaN NaN NaN NaN NaN 11.06 NaN
Volume 480500.00 749600.00 4450400.00 NaN NaN NaN NaN 8670400.00 750300.00 NaN ... 72400.00 3001200.00 3574800.00 NaN NaN NaN NaN NaN 32000.00 NaN
1993-02-02 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 44.38 24.88 6.88 NaN NaN NaN NaN 2.86 20.25 NaN ... 22.12 14.13 2.65 NaN NaN NaN NaN NaN 11.12 NaN
Low 44.12 24.69 6.53 NaN NaN NaN NaN 2.73 19.38 NaN ... 21.88 13.99 2.60 NaN NaN NaN NaN NaN 10.88 NaN
Close 44.34 24.72 6.53 NaN NaN NaN NaN 2.84 20.25 NaN ... 22.06 14.09 2.64 NaN NaN NaN NaN NaN 11.12 NaN
Volume 201300.00 1233600.00 10030000.00 NaN NaN NaN NaN 11491200.00 1418100.00 NaN ... 242200.00 1388598.00 2652396.00 NaN NaN NaN NaN NaN 251600.00 NaN
1993-02-03 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 44.84 25.41 6.94 NaN NaN NaN NaN 2.86 20.62 NaN ... 22.38 14.13 2.68 NaN NaN NaN NaN NaN 11.28 NaN
Low 44.38 24.88 6.50 NaN NaN NaN NaN 2.69 20.12 NaN ... 22.12 14.01 2.62 NaN NaN NaN NaN NaN 10.88 NaN
Close 44.81 25.19 6.91 NaN NaN NaN NaN 2.70 20.50 NaN ... 22.38 14.03 2.68 NaN NaN NaN NaN NaN 11.25 NaN
Volume 529400.00 2900400.00 12490000.00 NaN NaN NaN NaN 11788800.00 2163500.00 NaN ... 272200.00 1228200.00 5040396.00 NaN NaN NaN NaN NaN 254800.00 NaN
1993-02-04 Open 0.00 0.00 0.00 NaN NaN NaN NaN 0.00 0.00 NaN ... 0.00 0.00 0.00 NaN NaN NaN NaN NaN 0.00 NaN
High 45.09 26.47 6.97 NaN NaN NaN NaN 2.78 20.88 NaN ... 22.81 14.17 2.77 NaN NaN NaN NaN NaN 11.75 NaN
Low 44.88 25.88 6.78 NaN NaN NaN NaN 2.70 20.12 NaN ... 22.50 14.09 2.66 NaN NaN NaN NaN NaN 11.44 NaN
Close 45.00 26.06 6.84 NaN NaN NaN NaN 2.73 20.12 NaN ... 22.81 14.15 2.67 NaN NaN NaN NaN NaN 11.69 NaN
Volume 531500.00 4122400.00 5190800.00 NaN NaN NaN NaN 6441600.00 1330200.00 NaN ... 162800.00 1675602.00 7033200.00 NaN NaN NaN NaN NaN 317200.00 NaN

25 rows × 503 columns


In [51]:
data_df.unstack().describe()


Out[51]:
SPY MMM ... ZION ZTS
feature Close High Low Open Volume Close High Low Open Volume ... Close High Low Open Volume Close High Low Open Volume
count 6024.000000 6005.000000 6005.000000 6005.000000 6.024000e+03 6024.000000 6024.000000 6024.000000 6024.000000 6.024000e+03 ... 6023.000000 6022.000000 6022.000000 6022.000000 6.023000e+03 987.000000 987.000000 987.000000 987.000000 9.870000e+02
mean 120.379515 121.140626 119.524448 97.583151 5.931430e+07 74.138275 74.734484 73.493715 63.757797 3.009044e+06 ... 38.208409 38.710394 37.703298 29.917592 1.614751e+06 40.142432 40.530537 39.714985 40.149909 4.012389e+06
std 44.215695 44.437438 44.109630 69.694832 7.937370e+07 38.612950 38.757122 38.435405 50.283724 1.932584e+06 ... 21.335254 21.495934 21.175324 26.028264 2.159297e+06 7.749647 7.809476 7.676569 7.749595 3.810260e+06
min 43.410000 43.530000 42.810000 0.000000 0.000000e+00 23.620000 23.940000 23.190000 0.000000 0.000000e+00 ... 6.480000 7.410000 5.900000 0.000000 0.000000e+00 28.400000 28.570000 28.140000 28.480000 4.386320e+05
25% 92.847500 93.720000 91.600000 0.000000 5.380350e+06 44.802500 45.370000 44.250000 0.000000 1.789800e+06 ... 20.625000 20.947500 20.255000 0.000000 2.684500e+05 32.170000 32.500000 31.855000 32.190000 2.378748e+06
50% 119.540000 120.290000 118.770000 115.300000 3.417315e+07 71.500000 72.070000 70.975000 71.530000 2.613242e+06 ... 30.900000 31.295000 30.445000 26.565000 6.121840e+05 42.370000 42.960000 41.580000 42.270000 3.242213e+06
75% 141.412500 142.300000 140.570000 141.170000 7.948068e+07 86.860000 87.502500 86.212500 86.810000 3.720424e+06 ... 55.100000 55.895000 54.360000 51.920000 2.390842e+06 47.145000 47.540000 46.720000 47.155000 4.516790e+06
max 227.760000 228.340000 227.000000 227.410000 8.141804e+08 181.420000 182.270000 181.320000 181.730000 2.874960e+07 ... 88.280000 107.210000 87.810000 88.270000 2.633482e+07 55.380000 55.380000 53.650000 53.930000 6.678948e+07

8 rows × 2515 columns


In [52]:
u_data_df = data_df.unstack()
print(u_data_df.shape)
u_data_df.head()


(6024, 2515)
Out[52]:
SPY MMM ... ZION ZTS
feature Close High Low Open Volume Close High Low Open Volume ... Close High Low Open Volume Close High Low Open Volume
date
1993-01-29 43.94 43.97 43.75 0.0 1003200.0 24.50 24.62 24.47 0.0 1242800.0 ... 10.94 10.94 10.62 0.0 33600.0 NaN NaN NaN NaN NaN
1993-02-01 44.25 44.25 43.97 0.0 480500.0 24.69 24.69 24.47 0.0 749600.0 ... 11.06 11.25 10.75 0.0 32000.0 NaN NaN NaN NaN NaN
1993-02-02 44.34 44.38 44.12 0.0 201300.0 24.72 24.88 24.69 0.0 1233600.0 ... 11.12 11.12 10.88 0.0 251600.0 NaN NaN NaN NaN NaN
1993-02-03 44.81 44.84 44.38 0.0 529400.0 25.19 25.41 24.88 0.0 2900400.0 ... 11.25 11.28 10.88 0.0 254800.0 NaN NaN NaN NaN NaN
1993-02-04 45.00 45.09 44.88 0.0 531500.0 26.06 26.47 25.88 0.0 4122400.0 ... 11.69 11.75 11.44 0.0 317200.0 NaN NaN NaN NaN NaN

5 rows × 2515 columns


In [53]:
len(np.unique(u_data_df.columns.get_level_values(0)))


Out[53]:
503

So, there are 503 symbols, 6024 dates, and 5 features for each.

Let's get the data used for the predictor part:


In [54]:
pred_df = u_data_df.loc[:,(slice(None), 'Close')]
pred_df.columns = pred_df.columns.droplevel('feature')
print(pred_df.shape)
pred_df.head()


(6024, 503)
Out[54]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
date
1993-01-29 43.94 24.50 6.88 NaN NaN NaN NaN 2.59 18.75 NaN ... 22.00 14.28 2.50 NaN NaN NaN NaN NaN 10.94 NaN
1993-02-01 44.25 24.69 6.88 NaN NaN NaN NaN 2.72 19.12 NaN ... 22.19 14.09 2.62 NaN NaN NaN NaN NaN 11.06 NaN
1993-02-02 44.34 24.72 6.53 NaN NaN NaN NaN 2.84 20.25 NaN ... 22.06 14.09 2.64 NaN NaN NaN NaN NaN 11.12 NaN
1993-02-03 44.81 25.19 6.91 NaN NaN NaN NaN 2.70 20.50 NaN ... 22.38 14.03 2.68 NaN NaN NaN NaN NaN 11.25 NaN
1993-02-04 45.00 26.06 6.84 NaN NaN NaN NaN 2.73 20.12 NaN ... 22.81 14.15 2.67 NaN NaN NaN NaN NaN 11.69 NaN

5 rows × 503 columns


In [55]:
missing_df = pred_df.isnull().sum() / pred_df.shape[0]
missing_df.hist(bins=200)
plt.xlabel('Missing data')
plt.ylabel('Number of symbols')
plt.axvline(x=0.01, color='r', label='1% missing data level')
plt.legend()


Out[55]:
<matplotlib.legend.Legend at 0x7fb99b11df28>

In [56]:
description_df = pred_df.describe()
description_df


Out[56]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
count 6024.000000 6024.000000 6024.000000 1023.000000 3886.000000 5837.000000 3798.000000 6023.000000 6022.000000 3797.000000 ... 6024.000000 6024.000000 6023.000000 112.000000 1312.000000 5214.000000 4853.000000 3882.000000 6023.000000 987.000000
mean 120.379515 74.138275 23.928069 55.164673 49.421904 8.717500 69.421403 29.287536 15.965330 64.918667 ... 23.993572 16.756667 28.025208 35.003839 33.941387 24.188189 36.630474 70.848998 38.208409 40.142432
std 44.215695 38.612950 10.034862 8.991563 28.829258 9.591969 64.673705 24.356283 13.338299 47.194735 ... 6.198760 11.937684 16.451688 1.585963 7.540993 16.424537 25.370764 23.899683 21.335254 7.749647
min 43.410000 23.620000 5.750000 33.000000 11.850000 0.290000 9.160000 2.060000 1.620000 12.530000 ... 5.660000 4.170000 2.290000 32.810000 23.000000 0.660000 5.940000 25.020000 6.480000 28.400000
25% 92.847500 44.802500 18.840000 48.835000 26.330000 1.130000 27.815000 7.410000 5.870000 32.630000 ... 20.250000 9.487500 15.425000 33.707500 27.220000 13.230000 13.950000 53.665000 20.625000 32.170000
50% 119.540000 71.500000 23.420000 56.230000 38.925000 6.040000 44.950000 27.030000 14.030000 42.300000 ... 23.230000 13.330000 26.930000 34.320000 34.670000 21.770000 29.480000 68.465000 30.900000 42.370000
75% 141.412500 86.860000 27.720000 62.220000 72.365000 12.030000 71.472500 37.785000 22.347500 82.060000 ... 27.460000 17.660000 38.485000 36.425000 37.415000 34.230000 63.060000 84.515000 55.100000 47.145000
max 227.760000 181.420000 51.200000 71.230000 125.400000 45.470000 279.150000 110.810000 94.810000 200.380000 ... 45.330000 63.690000 97.940000 38.320000 54.750000 118.750000 94.880000 133.090000 88.280000 55.380000

8 rows × 503 columns


In [127]:
description_df.loc['mean'].plot()
plt.title('Mean Close price')
plt.xlabel('Symbol')
plt.ylabel('Price')


Out[127]:
<matplotlib.text.Text at 0x7fb990f9d6a0>

In [58]:
description_df.loc['std'].plot()


Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb9912f0cf8>

In [59]:
description_df.loc['mean'].argmax()


Out[59]:
'RIG'

In [60]:
description_df['RIG']


Out[60]:
count      5941.000000
mean      43135.840766
std       50552.663339
min           8.200000
25%          58.840000
50%       34031.250000
75%       64500.000000
max      242100.000000
Name: RIG, dtype: float64

In [61]:
pred_df['RIG'].plot()
plt.ylabel('Close price')
plt.title('RIG')


Out[61]:
<matplotlib.text.Text at 0x7fb991566be0>

From Yahoo Finance:

Transocean Ltd., together with its subsidiaries, provides offshore contract drilling services for oil and gas wells worldwide. The company primarily offers deepwater and harsh environment drilling services. As of February 9, 2017, it owned or had partial ownership interests in, and operated 56 mobile offshore drilling units that consist of 30 ultra-deepwater floaters, 7 harsh environment floaters, 3 deepwater floaters, 6 midwater floaters, and 10 high-specification jackups. The company serves government-controlled oil companies and independent oil companies. Transocean Ltd. was founded in 1953 and is based in Vernier, Switzerland.

The data of that ticker symbol doesn't match with the external sources...

Let's see if there was a problem while downloading the data


In [71]:
from utils import data_sources as ds
%config Application.log_level="DEBUG"
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

In [74]:
rig_df = ds.download_ticker('RIG', ds.START_DATE, ds.END_DATE)


DEBUG:utils.data_sources:sd = 1993-01-22 00:00:00 , ed = 2003-01-20 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+22%2C+1993&enddate=Jan+20%2C+2003&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 0 size: 251
DEBUG:utils.data_sources:Batch 1 of 3
DEBUG:utils.data_sources:(251, 5)
DEBUG:utils.data_sources:             Open   High    Low  Close    Volume
Date                                            
2016-10-20  10.31  10.59  10.27  10.59   9036772
2016-10-21  10.48  10.56  10.38  10.50   8151599
2016-10-24  10.43  10.50   9.97  10.11  13201607
2016-10-25  10.13  10.37   9.99  10.03   8959886
2016-10-26  10.02  10.34   9.86  10.32  11533498
2016-10-27  10.36  10.49  10.24  10.38  11944157
2016-10-28  10.32  10.50  10.06  10.21  12103351
2016-10-31  10.16  10.19   9.60   9.61  13400042
2016-11-01   9.71   9.79   9.40   9.59  11332111
2016-11-02   9.50   9.56   9.10   9.29  20245898
2016-11-03   9.67  10.32   9.57   9.87  27368099
2016-11-04   9.85   9.94   9.65   9.67  16625668
2016-11-07   9.87   9.87   9.60   9.73  11755506
2016-11-08   9.70   9.99   9.57   9.85  12319611
2016-11-09   9.66  10.36   9.66  10.25  14373054
2016-11-10  10.18  10.68  10.18  10.50  15033317
2016-11-11  10.36  10.47   9.94  10.37  14414817
2016-11-14  10.27  10.70  10.14  10.63  21180015
2016-11-15  10.61  11.06  10.61  10.82  12955746
2016-11-16  10.81  10.89  10.53  10.83  12654890
2016-11-17  10.91  11.42  10.91  11.01  14073493
2016-11-18  11.07  11.40  11.00  11.21  14752642
2016-11-21  11.50  11.85  11.45  11.66  16131679
2016-11-22  11.69  11.74  11.31  11.53  14791018
2016-11-23  11.47  11.80  11.42  11.77   7937796
2016-11-25  11.64  11.72  11.50  11.66   5021862
2016-11-28  11.71  11.80  11.33  11.40  11105990
2016-11-29  11.10  11.10  10.62  11.02  22251355
2016-11-30  11.82  13.28  11.82  12.90  43157259
2016-12-01  13.22  14.00  12.96  13.37  32363847
...           ...    ...    ...    ...       ...
2017-09-07   8.72   8.90   8.54   8.70  12028244
2017-09-08   8.64   8.66   8.35   8.47  12492108
2017-09-11   8.40   8.57   8.37   8.49   9259895
2017-09-12   8.53   9.02   8.50   8.79  16177452
2017-09-13   8.82   9.38   8.80   9.21  23033144
2017-09-14   9.33   9.52   9.10   9.22  22587988
2017-09-15   9.20   9.34   9.07   9.24  19083133
2017-09-18   9.23   9.48   9.10   9.29  13913577
2017-09-19   9.35   9.38   9.19   9.35  12896168
2017-09-20   9.36   9.92   9.36   9.77  24758331
2017-09-21   9.56   9.59   8.92   9.08  28086991
2017-09-22   9.09   9.44   9.04   9.32  16036979
2017-09-25   9.67  10.15   9.67  10.03  34404313
2017-09-26   9.96  10.30   9.86  10.24  18783227
2017-09-27  10.29  10.45  10.11  10.43  14579685
2017-09-28  10.55  10.84  10.46  10.59  24523781
2017-09-29  10.53  10.81  10.46  10.76  18860794
2017-10-02  10.38  10.49  10.20  10.33  16884904
2017-10-03  10.30  10.34  10.06  10.28  11682347
2017-10-04  10.26  10.40  10.14  10.29  12037699
2017-10-05  10.24  10.62  10.24  10.54  13428721
2017-10-06  10.38  10.56  10.20  10.31  16729569
2017-10-09  10.35  10.63  10.35  10.51  13483086
2017-10-10  10.72  10.93  10.51  10.52  14186664
2017-10-11  10.50  10.65  10.31  10.36   1864673
2017-10-12  10.56  10.68  10.27  10.50  15385898
2017-10-13  10.63  11.22  10.63  10.86  20493251
2017-10-16  11.05  11.28  11.03  11.05  16595613
2017-10-17  11.03  11.10  10.71  10.79  13703714
2017-10-18  10.53  10.58  10.25  10.31  18185708

[251 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2003-01-21 00:00:00 , ed = 2013-01-17 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+21%2C+2003&enddate=Jan+17%2C+2013&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 1 size: 502
DEBUG:utils.data_sources:Batch 2 of 3
DEBUG:utils.data_sources:(502, 5)
DEBUG:utils.data_sources:             Open   High    Low  Close    Volume
Date                                            
2016-10-20  10.31  10.59  10.27  10.59   9036772
2016-10-21  10.48  10.56  10.38  10.50   8151599
2016-10-24  10.43  10.50   9.97  10.11  13201607
2016-10-25  10.13  10.37   9.99  10.03   8959886
2016-10-26  10.02  10.34   9.86  10.32  11533498
2016-10-27  10.36  10.49  10.24  10.38  11944157
2016-10-28  10.32  10.50  10.06  10.21  12103351
2016-10-31  10.16  10.19   9.60   9.61  13400042
2016-11-01   9.71   9.79   9.40   9.59  11332111
2016-11-02   9.50   9.56   9.10   9.29  20245898
2016-11-03   9.67  10.32   9.57   9.87  27368099
2016-11-04   9.85   9.94   9.65   9.67  16625668
2016-11-07   9.87   9.87   9.60   9.73  11755506
2016-11-08   9.70   9.99   9.57   9.85  12319611
2016-11-09   9.66  10.36   9.66  10.25  14373054
2016-11-10  10.18  10.68  10.18  10.50  15033317
2016-11-11  10.36  10.47   9.94  10.37  14414817
2016-11-14  10.27  10.70  10.14  10.63  21180015
2016-11-15  10.61  11.06  10.61  10.82  12955746
2016-11-16  10.81  10.89  10.53  10.83  12654890
2016-11-17  10.91  11.42  10.91  11.01  14073493
2016-11-18  11.07  11.40  11.00  11.21  14752642
2016-11-21  11.50  11.85  11.45  11.66  16131679
2016-11-22  11.69  11.74  11.31  11.53  14791018
2016-11-23  11.47  11.80  11.42  11.77   7937796
2016-11-25  11.64  11.72  11.50  11.66   5021862
2016-11-28  11.71  11.80  11.33  11.40  11105990
2016-11-29  11.10  11.10  10.62  11.02  22251355
2016-11-30  11.82  13.28  11.82  12.90  43157259
2016-12-01  13.22  14.00  12.96  13.37  32363847
...           ...    ...    ...    ...       ...
2017-09-07   8.72   8.90   8.54   8.70  12028244
2017-09-08   8.64   8.66   8.35   8.47  12492108
2017-09-11   8.40   8.57   8.37   8.49   9259895
2017-09-12   8.53   9.02   8.50   8.79  16177452
2017-09-13   8.82   9.38   8.80   9.21  23033144
2017-09-14   9.33   9.52   9.10   9.22  22587988
2017-09-15   9.20   9.34   9.07   9.24  19083133
2017-09-18   9.23   9.48   9.10   9.29  13913577
2017-09-19   9.35   9.38   9.19   9.35  12896168
2017-09-20   9.36   9.92   9.36   9.77  24758331
2017-09-21   9.56   9.59   8.92   9.08  28086991
2017-09-22   9.09   9.44   9.04   9.32  16036979
2017-09-25   9.67  10.15   9.67  10.03  34404313
2017-09-26   9.96  10.30   9.86  10.24  18783227
2017-09-27  10.29  10.45  10.11  10.43  14579685
2017-09-28  10.55  10.84  10.46  10.59  24523781
2017-09-29  10.53  10.81  10.46  10.76  18860794
2017-10-02  10.38  10.49  10.20  10.33  16884904
2017-10-03  10.30  10.34  10.06  10.28  11682347
2017-10-04  10.26  10.40  10.14  10.29  12037699
2017-10-05  10.24  10.62  10.24  10.54  13428721
2017-10-06  10.38  10.56  10.20  10.31  16729569
2017-10-09  10.35  10.63  10.35  10.51  13483086
2017-10-10  10.72  10.93  10.51  10.52  14186664
2017-10-11  10.50  10.65  10.31  10.36   1864673
2017-10-12  10.56  10.68  10.27  10.50  15385898
2017-10-13  10.63  11.22  10.63  10.86  20493251
2017-10-16  11.05  11.28  11.03  11.05  16595613
2017-10-17  11.03  11.10  10.71  10.79  13703714
2017-10-18  10.53  10.58  10.25  10.31  18185708

[502 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2013-01-18 00:00:00 , ed = 2017-01-01 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+18%2C+2013&enddate=Jan+01%2C+2017&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 2 size: 753
DEBUG:utils.data_sources:             Open   High    Low  Close    Volume
Date                                            
2016-10-20  10.31  10.59  10.27  10.59   9036772
2016-10-21  10.48  10.56  10.38  10.50   8151599
2016-10-24  10.43  10.50   9.97  10.11  13201607
2016-10-25  10.13  10.37   9.99  10.03   8959886
2016-10-26  10.02  10.34   9.86  10.32  11533498
2016-10-27  10.36  10.49  10.24  10.38  11944157
2016-10-28  10.32  10.50  10.06  10.21  12103351
2016-10-31  10.16  10.19   9.60   9.61  13400042
2016-11-01   9.71   9.79   9.40   9.59  11332111
2016-11-02   9.50   9.56   9.10   9.29  20245898
2016-11-03   9.67  10.32   9.57   9.87  27368099
2016-11-04   9.85   9.94   9.65   9.67  16625668
2016-11-07   9.87   9.87   9.60   9.73  11755506
2016-11-08   9.70   9.99   9.57   9.85  12319611
2016-11-09   9.66  10.36   9.66  10.25  14373054
2016-11-10  10.18  10.68  10.18  10.50  15033317
2016-11-11  10.36  10.47   9.94  10.37  14414817
2016-11-14  10.27  10.70  10.14  10.63  21180015
2016-11-15  10.61  11.06  10.61  10.82  12955746
2016-11-16  10.81  10.89  10.53  10.83  12654890
2016-11-17  10.91  11.42  10.91  11.01  14073493
2016-11-18  11.07  11.40  11.00  11.21  14752642
2016-11-21  11.50  11.85  11.45  11.66  16131679
2016-11-22  11.69  11.74  11.31  11.53  14791018
2016-11-23  11.47  11.80  11.42  11.77   7937796
2016-11-25  11.64  11.72  11.50  11.66   5021862
2016-11-28  11.71  11.80  11.33  11.40  11105990
2016-11-29  11.10  11.10  10.62  11.02  22251355
2016-11-30  11.82  13.28  11.82  12.90  43157259
2016-12-01  13.22  14.00  12.96  13.37  32363847
...           ...    ...    ...    ...       ...
2017-09-07   8.72   8.90   8.54   8.70  12028244
2017-09-08   8.64   8.66   8.35   8.47  12492108
2017-09-11   8.40   8.57   8.37   8.49   9259895
2017-09-12   8.53   9.02   8.50   8.79  16177452
2017-09-13   8.82   9.38   8.80   9.21  23033144
2017-09-14   9.33   9.52   9.10   9.22  22587988
2017-09-15   9.20   9.34   9.07   9.24  19083133
2017-09-18   9.23   9.48   9.10   9.29  13913577
2017-09-19   9.35   9.38   9.19   9.35  12896168
2017-09-20   9.36   9.92   9.36   9.77  24758331
2017-09-21   9.56   9.59   8.92   9.08  28086991
2017-09-22   9.09   9.44   9.04   9.32  16036979
2017-09-25   9.67  10.15   9.67  10.03  34404313
2017-09-26   9.96  10.30   9.86  10.24  18783227
2017-09-27  10.29  10.45  10.11  10.43  14579685
2017-09-28  10.55  10.84  10.46  10.59  24523781
2017-09-29  10.53  10.81  10.46  10.76  18860794
2017-10-02  10.38  10.49  10.20  10.33  16884904
2017-10-03  10.30  10.34  10.06  10.28  11682347
2017-10-04  10.26  10.40  10.14  10.29  12037699
2017-10-05  10.24  10.62  10.24  10.54  13428721
2017-10-06  10.38  10.56  10.20  10.31  16729569
2017-10-09  10.35  10.63  10.35  10.51  13483086
2017-10-10  10.72  10.93  10.51  10.52  14186664
2017-10-11  10.50  10.65  10.31  10.36   1864673
2017-10-12  10.56  10.68  10.27  10.50  15385898
2017-10-13  10.63  11.22  10.63  10.86  20493251
2017-10-16  11.05  11.28  11.03  11.05  16595613
2017-10-17  11.03  11.10  10.71  10.79  13703714
2017-10-18  10.53  10.58  10.25  10.31  18185708

[502 rows x 5 columns]
../../utils/data_sources.py:103: PerformanceWarning: indexing past lexsort depth may impact performance.
  return data_df
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-74-9468062e1390> in <module>()
----> 1 rig_df = ds.download_ticker('RIG', ds.START_DATE, ds.END_DATE)

/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in download_ticker(symbol, start_date, end_date)
     87         log.debug('batch %i size: %i' % (batch_index, raw_df.shape[0]))
     88     log.debug(raw_df[raw_df.index.duplicated()])
---> 89     return raw_to_multiindex(raw_df, symbol)
     90 
     91 

/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in raw_to_multiindex(raw_df, name)
    101     for date in raw_df.index:
    102         for col in raw_df.columns:
--> 103             data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
    104     return data_df
    105 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2427         else:
   2428             # set column
-> 2429             self._set_item(key, value)
   2430 
   2431     def _setitem_slice(self, key, value):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2493 
   2494         self._ensure_valid_index(value)
-> 2495         value = self._sanitize_column(key, value)
   2496         NDFrame._set_item(self, key, value)
   2497 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
   2643 
   2644         if isinstance(value, Series):
-> 2645             value = reindexer(value)
   2646 
   2647         elif isinstance(value, DataFrame):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
   2635                     # duplicate axis
   2636                     if not value.index.is_unique:
-> 2637                         raise e
   2638 
   2639                     # other

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
   2630                 # GH 4107
   2631                 try:
-> 2632                     value = value.reindex(self.index)._values
   2633                 except Exception as e:
   2634 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
   2424     @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
   2425     def reindex(self, index=None, **kwargs):
-> 2426         return super(Series, self).reindex(index=index, **kwargs)
   2427 
   2428     @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
   2402         # perform the reindex on the axes
   2403         return self._reindex_axes(axes, level, limit, tolerance, method,
-> 2404                                   fill_value, copy).__finalize__(self)
   2405 
   2406     def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   2415             ax = self._get_axis(a)
   2416             new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 2417                                             tolerance=tolerance, method=method)
   2418 
   2419             axis = self._get_axis_number(a)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
   2836                         raise ValueError("cannot reindex a non-unique index "
   2837                                          "with a method or limit")
-> 2838                     indexer, missing = self.get_indexer_non_unique(target)
   2839 
   2840         if preserve_names and target.nlevels == 1 and target.name != self.name:

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
   2657             tgt_values = target._values
   2658 
-> 2659         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
   2660         return Index(indexer), missing
   2661 

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique (pandas/_libs/index.c:7681)()

TypeError: 'NoneType' object is not iterable

In [75]:
rig_df = ds.download_ticker('SPY', ds.START_DATE, ds.END_DATE)


DEBUG:utils.data_sources:sd = 1993-01-22 00:00:00 , ed = 2003-01-20 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+22%2C+1993&enddate=Jan+20%2C+2003&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 0 size: 251
DEBUG:utils.data_sources:Batch 1 of 3
DEBUG:utils.data_sources:(251, 5)
DEBUG:utils.data_sources:              Open    High     Low   Close     Volume
Date                                                 
2016-10-20  213.87  214.53  213.11  213.88   73639830
2016-10-21  212.96  214.08  212.76  213.98   89089092
2016-10-24  215.00  215.32  214.48  214.89   60146592
2016-10-25  214.68  214.98  213.98  214.17   66542329
2016-10-26  213.21  214.42  212.93  213.74   75705478
2016-10-27  214.58  214.62  213.08  213.17   77220213
2016-10-28  213.14  213.93  211.71  212.54  140623183
2016-10-31  212.93  213.19  212.36  212.55   61272507
2016-11-01  212.93  212.99  209.60  211.01  122781818
2016-11-02  210.65  211.10  209.23  209.74  103330806
2016-11-03  209.99  210.24  208.46  208.78   88939346
2016-11-04  208.91  209.89  208.38  208.55  109122059
2016-11-07  211.45  213.19  211.30  213.15  109794861
2016-11-08  212.69  214.77  212.38  214.11  106772138
2016-11-09  212.37  217.10  212.34  216.38  258428972
2016-11-10  217.30  218.31  215.22  216.92  172113313
2016-11-11  216.08  216.70  215.32  216.42  100552732
2016-11-14  217.03  217.27  215.72  216.59   94579982
2016-11-15  217.04  218.28  216.80  218.28   91652580
2016-11-16  217.56  218.14  217.42  217.87   65617697
2016-11-17  218.05  219.06  217.92  218.99   69797191
2016-11-18  219.07  219.27  218.29  218.50   86265751
2016-11-21  219.17  220.18  219.00  220.15   72402638
2016-11-22  220.51  220.79  219.73  220.58   67428957
2016-11-23  219.98  220.76  219.75  220.70   56620237
2016-11-25  221.10  221.56  221.01  221.52   37872255
2016-11-28  221.16  221.48  220.36  220.48   76572511
2016-11-29  220.52  221.44  220.17  220.91   69886690
2016-11-30  221.63  221.82  220.31  220.38  113291793
2016-12-01  220.73  220.73  219.15  219.57   79040487
...            ...     ...     ...     ...        ...
2017-09-07  247.25  247.27  246.40  246.87   58034730
2017-09-08  246.54  247.11  246.30  246.58   63832825
2017-09-11  248.04  249.30  248.02  249.21   71364848
2017-09-12  249.63  250.09  249.42  250.05   56896027
2017-09-13  249.72  250.21  249.59  250.17   59228002
2017-09-14  249.80  250.32  249.60  250.09   95446349
2017-09-15  248.69  249.29  248.57  249.19   95432382
2017-09-18  249.61  250.12  249.28  249.72   46235238
2017-09-19  250.00  250.07  249.60  249.97   47108148
2017-09-20  250.07  250.19  248.92  250.06   59574083
2017-09-21  249.88  249.98  249.18  249.39   48211398
2017-09-22  249.05  249.63  249.02  249.44   51214032
2017-09-25  249.15  249.55  248.08  248.93   57064357
2017-09-26  249.42  249.70  248.80  249.08   54081959
2017-09-27  249.88  250.49  248.87  250.05   71852148
2017-09-28  249.73  250.44  249.63  250.35   44778841
2017-09-29  250.34  251.32  250.13  251.23   85578002
2017-10-02  251.49  252.32  251.29  252.32   59022985
2017-10-03  252.46  252.89  252.23  252.86   66810169
2017-10-04  252.69  253.44  252.56  253.16   55953619
2017-10-05  253.54  254.68  253.20  254.66   63522757
2017-10-06  254.15  254.70  253.85  254.37   80645998
2017-10-09  254.63  254.70  253.65  253.95   35803138
2017-10-10  254.60  255.05  253.98  254.62   43057363
2017-10-11  254.51  254.64  254.32  254.32    2963114
2017-10-12  254.66  255.06  254.36  254.64   47065144
2017-10-13  255.14  255.27  254.64  254.95   54800435
2017-10-16  255.21  255.51  254.82  255.29   38221675
2017-10-17  255.23  255.52  254.98  255.47   31560964
2017-10-18  255.90  255.95  255.50  255.72   40888330

[251 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2003-01-21 00:00:00 , ed = 2013-01-17 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+21%2C+2003&enddate=Jan+17%2C+2013&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 1 size: 502
DEBUG:utils.data_sources:Batch 2 of 3
DEBUG:utils.data_sources:(502, 5)
DEBUG:utils.data_sources:              Open    High     Low   Close     Volume
Date                                                 
2016-10-20  213.87  214.53  213.11  213.88   73639830
2016-10-21  212.96  214.08  212.76  213.98   89089092
2016-10-24  215.00  215.32  214.48  214.89   60146592
2016-10-25  214.68  214.98  213.98  214.17   66542329
2016-10-26  213.21  214.42  212.93  213.74   75705478
2016-10-27  214.58  214.62  213.08  213.17   77220213
2016-10-28  213.14  213.93  211.71  212.54  140623183
2016-10-31  212.93  213.19  212.36  212.55   61272507
2016-11-01  212.93  212.99  209.60  211.01  122781818
2016-11-02  210.65  211.10  209.23  209.74  103330806
2016-11-03  209.99  210.24  208.46  208.78   88939346
2016-11-04  208.91  209.89  208.38  208.55  109122059
2016-11-07  211.45  213.19  211.30  213.15  109794861
2016-11-08  212.69  214.77  212.38  214.11  106772138
2016-11-09  212.37  217.10  212.34  216.38  258428972
2016-11-10  217.30  218.31  215.22  216.92  172113313
2016-11-11  216.08  216.70  215.32  216.42  100552732
2016-11-14  217.03  217.27  215.72  216.59   94579982
2016-11-15  217.04  218.28  216.80  218.28   91652580
2016-11-16  217.56  218.14  217.42  217.87   65617697
2016-11-17  218.05  219.06  217.92  218.99   69797191
2016-11-18  219.07  219.27  218.29  218.50   86265751
2016-11-21  219.17  220.18  219.00  220.15   72402638
2016-11-22  220.51  220.79  219.73  220.58   67428957
2016-11-23  219.98  220.76  219.75  220.70   56620237
2016-11-25  221.10  221.56  221.01  221.52   37872255
2016-11-28  221.16  221.48  220.36  220.48   76572511
2016-11-29  220.52  221.44  220.17  220.91   69886690
2016-11-30  221.63  221.82  220.31  220.38  113291793
2016-12-01  220.73  220.73  219.15  219.57   79040487
...            ...     ...     ...     ...        ...
2017-09-07  247.25  247.27  246.40  246.87   58034730
2017-09-08  246.54  247.11  246.30  246.58   63832825
2017-09-11  248.04  249.30  248.02  249.21   71364848
2017-09-12  249.63  250.09  249.42  250.05   56896027
2017-09-13  249.72  250.21  249.59  250.17   59228002
2017-09-14  249.80  250.32  249.60  250.09   95446349
2017-09-15  248.69  249.29  248.57  249.19   95432382
2017-09-18  249.61  250.12  249.28  249.72   46235238
2017-09-19  250.00  250.07  249.60  249.97   47108148
2017-09-20  250.07  250.19  248.92  250.06   59574083
2017-09-21  249.88  249.98  249.18  249.39   48211398
2017-09-22  249.05  249.63  249.02  249.44   51214032
2017-09-25  249.15  249.55  248.08  248.93   57064357
2017-09-26  249.42  249.70  248.80  249.08   54081959
2017-09-27  249.88  250.49  248.87  250.05   71852148
2017-09-28  249.73  250.44  249.63  250.35   44778841
2017-09-29  250.34  251.32  250.13  251.23   85578002
2017-10-02  251.49  252.32  251.29  252.32   59022985
2017-10-03  252.46  252.89  252.23  252.86   66810169
2017-10-04  252.69  253.44  252.56  253.16   55953619
2017-10-05  253.54  254.68  253.20  254.66   63522757
2017-10-06  254.15  254.70  253.85  254.37   80645998
2017-10-09  254.63  254.70  253.65  253.95   35803138
2017-10-10  254.60  255.05  253.98  254.62   43057363
2017-10-11  254.51  254.64  254.32  254.32    2963114
2017-10-12  254.66  255.06  254.36  254.64   47065144
2017-10-13  255.14  255.27  254.64  254.95   54800435
2017-10-16  255.21  255.51  254.82  255.29   38221675
2017-10-17  255.23  255.52  254.98  255.47   31560964
2017-10-18  255.90  255.95  255.50  255.72   40888330

[502 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2013-01-18 00:00:00 , ed = 2017-01-01 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+18%2C+2013&enddate=Jan+01%2C+2017&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 2 size: 753
DEBUG:utils.data_sources:              Open    High     Low   Close     Volume
Date                                                 
2016-10-20  213.87  214.53  213.11  213.88   73639830
2016-10-21  212.96  214.08  212.76  213.98   89089092
2016-10-24  215.00  215.32  214.48  214.89   60146592
2016-10-25  214.68  214.98  213.98  214.17   66542329
2016-10-26  213.21  214.42  212.93  213.74   75705478
2016-10-27  214.58  214.62  213.08  213.17   77220213
2016-10-28  213.14  213.93  211.71  212.54  140623183
2016-10-31  212.93  213.19  212.36  212.55   61272507
2016-11-01  212.93  212.99  209.60  211.01  122781818
2016-11-02  210.65  211.10  209.23  209.74  103330806
2016-11-03  209.99  210.24  208.46  208.78   88939346
2016-11-04  208.91  209.89  208.38  208.55  109122059
2016-11-07  211.45  213.19  211.30  213.15  109794861
2016-11-08  212.69  214.77  212.38  214.11  106772138
2016-11-09  212.37  217.10  212.34  216.38  258428972
2016-11-10  217.30  218.31  215.22  216.92  172113313
2016-11-11  216.08  216.70  215.32  216.42  100552732
2016-11-14  217.03  217.27  215.72  216.59   94579982
2016-11-15  217.04  218.28  216.80  218.28   91652580
2016-11-16  217.56  218.14  217.42  217.87   65617697
2016-11-17  218.05  219.06  217.92  218.99   69797191
2016-11-18  219.07  219.27  218.29  218.50   86265751
2016-11-21  219.17  220.18  219.00  220.15   72402638
2016-11-22  220.51  220.79  219.73  220.58   67428957
2016-11-23  219.98  220.76  219.75  220.70   56620237
2016-11-25  221.10  221.56  221.01  221.52   37872255
2016-11-28  221.16  221.48  220.36  220.48   76572511
2016-11-29  220.52  221.44  220.17  220.91   69886690
2016-11-30  221.63  221.82  220.31  220.38  113291793
2016-12-01  220.73  220.73  219.15  219.57   79040487
...            ...     ...     ...     ...        ...
2017-09-07  247.25  247.27  246.40  246.87   58034730
2017-09-08  246.54  247.11  246.30  246.58   63832825
2017-09-11  248.04  249.30  248.02  249.21   71364848
2017-09-12  249.63  250.09  249.42  250.05   56896027
2017-09-13  249.72  250.21  249.59  250.17   59228002
2017-09-14  249.80  250.32  249.60  250.09   95446349
2017-09-15  248.69  249.29  248.57  249.19   95432382
2017-09-18  249.61  250.12  249.28  249.72   46235238
2017-09-19  250.00  250.07  249.60  249.97   47108148
2017-09-20  250.07  250.19  248.92  250.06   59574083
2017-09-21  249.88  249.98  249.18  249.39   48211398
2017-09-22  249.05  249.63  249.02  249.44   51214032
2017-09-25  249.15  249.55  248.08  248.93   57064357
2017-09-26  249.42  249.70  248.80  249.08   54081959
2017-09-27  249.88  250.49  248.87  250.05   71852148
2017-09-28  249.73  250.44  249.63  250.35   44778841
2017-09-29  250.34  251.32  250.13  251.23   85578002
2017-10-02  251.49  252.32  251.29  252.32   59022985
2017-10-03  252.46  252.89  252.23  252.86   66810169
2017-10-04  252.69  253.44  252.56  253.16   55953619
2017-10-05  253.54  254.68  253.20  254.66   63522757
2017-10-06  254.15  254.70  253.85  254.37   80645998
2017-10-09  254.63  254.70  253.65  253.95   35803138
2017-10-10  254.60  255.05  253.98  254.62   43057363
2017-10-11  254.51  254.64  254.32  254.32    2963114
2017-10-12  254.66  255.06  254.36  254.64   47065144
2017-10-13  255.14  255.27  254.64  254.95   54800435
2017-10-16  255.21  255.51  254.82  255.29   38221675
2017-10-17  255.23  255.52  254.98  255.47   31560964
2017-10-18  255.90  255.95  255.50  255.72   40888330

[502 rows x 5 columns]
../../utils/data_sources.py:103: PerformanceWarning: indexing past lexsort depth may impact performance.
  data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-75-1390cbd86ee7> in <module>()
----> 1 rig_df = ds.download_ticker('SPY', ds.START_DATE, ds.END_DATE)

/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in download_ticker(symbol, start_date, end_date)
     87         log.debug('batch %i size: %i' % (batch_index, raw_df.shape[0]))
     88     log.debug(raw_df[raw_df.index.duplicated()])
---> 89     return raw_to_multiindex(raw_df, symbol)
     90 
     91 

/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in raw_to_multiindex(raw_df, name)
    101     for date in raw_df.index:
    102         for col in raw_df.columns:
--> 103             data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
    104     return data_df
    105 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
   2427         else:
   2428             # set column
-> 2429             self._set_item(key, value)
   2430 
   2431     def _setitem_slice(self, key, value):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
   2493 
   2494         self._ensure_valid_index(value)
-> 2495         value = self._sanitize_column(key, value)
   2496         NDFrame._set_item(self, key, value)
   2497 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
   2643 
   2644         if isinstance(value, Series):
-> 2645             value = reindexer(value)
   2646 
   2647         elif isinstance(value, DataFrame):

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
   2635                     # duplicate axis
   2636                     if not value.index.is_unique:
-> 2637                         raise e
   2638 
   2639                     # other

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
   2630                 # GH 4107
   2631                 try:
-> 2632                     value = value.reindex(self.index)._values
   2633                 except Exception as e:
   2634 

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
   2424     @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
   2425     def reindex(self, index=None, **kwargs):
-> 2426         return super(Series, self).reindex(index=index, **kwargs)
   2427 
   2428     @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
   2402         # perform the reindex on the axes
   2403         return self._reindex_axes(axes, level, limit, tolerance, method,
-> 2404                                   fill_value, copy).__finalize__(self)
   2405 
   2406     def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   2415             ax = self._get_axis(a)
   2416             new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 2417                                             tolerance=tolerance, method=method)
   2418 
   2419             axis = self._get_axis_number(a)

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
   2836                         raise ValueError("cannot reindex a non-unique index "
   2837                                          "with a method or limit")
-> 2838                     indexer, missing = self.get_indexer_non_unique(target)
   2839 
   2840         if preserve_names and target.nlevels == 1 and target.name != self.name:

/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
   2657             tgt_values = target._values
   2658 
-> 2659         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
   2660         return Index(indexer), missing
   2661 

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique (pandas/_libs/index.c:7681)()

TypeError: 'NoneType' object is not iterable

In [78]:
symbol='SPY'
import datetime as dt
from pandas_datareader import data
raw_df = data.DataReader(name=symbol,
                             data_source=ds.DATA_SOURCE,
                             start=ds.START_DATE,
                             end=ds.START_DATE + dt.timedelta(days=2000))


DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+22%2C+1993&enddate=Jul+15%2C+1998&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None

In [79]:
raw_df


Out[79]:
Open High Low Close Volume
Date
2016-10-20 213.87 214.53 213.11 213.88 73639830
2016-10-21 212.96 214.08 212.76 213.98 89089092
2016-10-24 215.00 215.32 214.48 214.89 60146592
2016-10-25 214.68 214.98 213.98 214.17 66542329
2016-10-26 213.21 214.42 212.93 213.74 75705478
2016-10-27 214.58 214.62 213.08 213.17 77220213
2016-10-28 213.14 213.93 211.71 212.54 140623183
2016-10-31 212.93 213.19 212.36 212.55 61272507
2016-11-01 212.93 212.99 209.60 211.01 122781818
2016-11-02 210.65 211.10 209.23 209.74 103330806
2016-11-03 209.99 210.24 208.46 208.78 88939346
2016-11-04 208.91 209.89 208.38 208.55 109122059
2016-11-07 211.45 213.19 211.30 213.15 109794861
2016-11-08 212.69 214.77 212.38 214.11 106772138
2016-11-09 212.37 217.10 212.34 216.38 258428972
2016-11-10 217.30 218.31 215.22 216.92 172113313
2016-11-11 216.08 216.70 215.32 216.42 100552732
2016-11-14 217.03 217.27 215.72 216.59 94579982
2016-11-15 217.04 218.28 216.80 218.28 91652580
2016-11-16 217.56 218.14 217.42 217.87 65617697
2016-11-17 218.05 219.06 217.92 218.99 69797191
2016-11-18 219.07 219.27 218.29 218.50 86265751
2016-11-21 219.17 220.18 219.00 220.15 72402638
2016-11-22 220.51 220.79 219.73 220.58 67428957
2016-11-23 219.98 220.76 219.75 220.70 56620237
2016-11-25 221.10 221.56 221.01 221.52 37872255
2016-11-28 221.16 221.48 220.36 220.48 76572511
2016-11-29 220.52 221.44 220.17 220.91 69886690
2016-11-30 221.63 221.82 220.31 220.38 113291793
2016-12-01 220.73 220.73 219.15 219.57 79040487
... ... ... ... ... ...
2017-09-07 247.25 247.27 246.40 246.87 58034730
2017-09-08 246.54 247.11 246.30 246.58 63832825
2017-09-11 248.04 249.30 248.02 249.21 71364848
2017-09-12 249.63 250.09 249.42 250.05 56896027
2017-09-13 249.72 250.21 249.59 250.17 59228002
2017-09-14 249.80 250.32 249.60 250.09 95446349
2017-09-15 248.69 249.29 248.57 249.19 95432382
2017-09-18 249.61 250.12 249.28 249.72 46235238
2017-09-19 250.00 250.07 249.60 249.97 47108148
2017-09-20 250.07 250.19 248.92 250.06 59574083
2017-09-21 249.88 249.98 249.18 249.39 48211398
2017-09-22 249.05 249.63 249.02 249.44 51214032
2017-09-25 249.15 249.55 248.08 248.93 57064357
2017-09-26 249.42 249.70 248.80 249.08 54081959
2017-09-27 249.88 250.49 248.87 250.05 71852148
2017-09-28 249.73 250.44 249.63 250.35 44778841
2017-09-29 250.34 251.32 250.13 251.23 85578002
2017-10-02 251.49 252.32 251.29 252.32 59022985
2017-10-03 252.46 252.89 252.23 252.86 66810169
2017-10-04 252.69 253.44 252.56 253.16 55953619
2017-10-05 253.54 254.68 253.20 254.66 63522757
2017-10-06 254.15 254.70 253.85 254.37 80645998
2017-10-09 254.63 254.70 253.65 253.95 35803138
2017-10-10 254.60 255.05 253.98 254.62 43057363
2017-10-11 254.51 254.64 254.32 254.32 2963114
2017-10-12 254.66 255.06 254.36 254.64 47065144
2017-10-13 255.14 255.27 254.64 254.95 54800435
2017-10-16 255.21 255.51 254.82 255.29 38221675
2017-10-17 255.23 255.52 254.98 255.47 31560964
2017-10-18 255.90 255.95 255.50 255.72 40888330

251 rows × 5 columns


In [80]:
raw_2_df = data.DataReader(name=symbol,
                             data_source=ds.DATA_SOURCE,
                             start=ds.START_DATE + dt.timedelta(days=2000),
                             end=ds.START_DATE + dt.timedelta(days=4000))


DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jul+15%2C+1998&enddate=Jan+05%2C+2004&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None

In [81]:
raw_2_df


Out[81]:
Open High Low Close Volume
Date
2016-10-21 212.96 214.08 212.76 213.98 89089092
2016-10-24 215.00 215.32 214.48 214.89 60146592
2016-10-25 214.68 214.98 213.98 214.17 66542329
2016-10-26 213.21 214.42 212.93 213.74 75705478
2016-10-27 214.58 214.62 213.08 213.17 77220213
2016-10-28 213.14 213.93 211.71 212.54 140623183
2016-10-31 212.93 213.19 212.36 212.55 61272507
2016-11-01 212.93 212.99 209.60 211.01 122781818
2016-11-02 210.65 211.10 209.23 209.74 103330806
2016-11-03 209.99 210.24 208.46 208.78 88939346
2016-11-04 208.91 209.89 208.38 208.55 109122059
2016-11-07 211.45 213.19 211.30 213.15 109794861
2016-11-08 212.69 214.77 212.38 214.11 106772138
2016-11-09 212.37 217.10 212.34 216.38 258428972
2016-11-10 217.30 218.31 215.22 216.92 172113313
2016-11-11 216.08 216.70 215.32 216.42 100552732
2016-11-14 217.03 217.27 215.72 216.59 94579982
2016-11-15 217.04 218.28 216.80 218.28 91652580
2016-11-16 217.56 218.14 217.42 217.87 65617697
2016-11-17 218.05 219.06 217.92 218.99 69797191
2016-11-18 219.07 219.27 218.29 218.50 86265751
2016-11-21 219.17 220.18 219.00 220.15 72402638
2016-11-22 220.51 220.79 219.73 220.58 67428957
2016-11-23 219.98 220.76 219.75 220.70 56620237
2016-11-25 221.10 221.56 221.01 221.52 37872255
2016-11-28 221.16 221.48 220.36 220.48 76572511
2016-11-29 220.52 221.44 220.17 220.91 69886690
2016-11-30 221.63 221.82 220.31 220.38 113291793
2016-12-01 220.73 220.73 219.15 219.57 79040487
2016-12-02 219.67 220.25 219.26 219.68 74840347
... ... ... ... ... ...
2017-09-07 247.25 247.27 246.40 246.87 58034730
2017-09-08 246.54 247.11 246.30 246.58 63832825
2017-09-11 248.04 249.30 248.02 249.21 71364848
2017-09-12 249.63 250.09 249.42 250.05 56896027
2017-09-13 249.72 250.21 249.59 250.17 59228002
2017-09-14 249.80 250.32 249.60 250.09 95446349
2017-09-15 248.69 249.29 248.57 249.19 95432382
2017-09-18 249.61 250.12 249.28 249.72 46235238
2017-09-19 250.00 250.07 249.60 249.97 47108148
2017-09-20 250.07 250.19 248.92 250.06 59574083
2017-09-21 249.88 249.98 249.18 249.39 48211398
2017-09-22 249.05 249.63 249.02 249.44 51214032
2017-09-25 249.15 249.55 248.08 248.93 57064357
2017-09-26 249.42 249.70 248.80 249.08 54081959
2017-09-27 249.88 250.49 248.87 250.05 71852148
2017-09-28 249.73 250.44 249.63 250.35 44778841
2017-09-29 250.34 251.32 250.13 251.23 85578002
2017-10-02 251.49 252.32 251.29 252.32 59022985
2017-10-03 252.46 252.89 252.23 252.86 66810169
2017-10-04 252.69 253.44 252.56 253.16 55953619
2017-10-05 253.54 254.68 253.20 254.66 63522757
2017-10-06 254.15 254.70 253.85 254.37 80645998
2017-10-09 254.63 254.70 253.65 253.95 35803138
2017-10-10 254.60 255.05 253.98 254.62 43057363
2017-10-11 254.51 254.64 254.32 254.32 2963114
2017-10-12 254.66 255.06 254.36 254.64 47065144
2017-10-13 255.14 255.27 254.64 254.95 54800435
2017-10-16 255.21 255.51 254.82 255.29 38221675
2017-10-17 255.23 255.52 254.98 255.47 31560964
2017-10-18 255.90 255.95 255.50 255.72 40888330

250 rows × 5 columns

! Clearly the API of the DataReader or the one of Google Finance has changed


In [95]:
filtered_pred_df = pred_df.loc[:,pred_df.columns != 'RIG']
print(pred_df.shape)
filtered_pred_df.shape


(6024, 503)
Out[95]:
(6024, 502)

In [97]:
description_f_df = filtered_pred_df.describe()
description_f_df


Out[97]:
SPY MMM ABT ABBV ACN ATVI AYI ADBE AMD AAP ... XEL XRX XLNX XL XYL YHOO YUM ZBH ZION ZTS
count 6024.000000 6024.000000 6024.000000 1023.000000 3886.000000 5837.000000 3798.000000 6023.000000 6022.000000 3797.000000 ... 6024.000000 6024.000000 6023.000000 112.000000 1312.000000 5214.000000 4853.000000 3882.000000 6023.000000 987.000000
mean 120.379515 74.138275 23.928069 55.164673 49.421904 8.717500 69.421403 29.287536 15.965330 64.918667 ... 23.993572 16.756667 28.025208 35.003839 33.941387 24.188189 36.630474 70.848998 38.208409 40.142432
std 44.215695 38.612950 10.034862 8.991563 28.829258 9.591969 64.673705 24.356283 13.338299 47.194735 ... 6.198760 11.937684 16.451688 1.585963 7.540993 16.424537 25.370764 23.899683 21.335254 7.749647
min 43.410000 23.620000 5.750000 33.000000 11.850000 0.290000 9.160000 2.060000 1.620000 12.530000 ... 5.660000 4.170000 2.290000 32.810000 23.000000 0.660000 5.940000 25.020000 6.480000 28.400000
25% 92.847500 44.802500 18.840000 48.835000 26.330000 1.130000 27.815000 7.410000 5.870000 32.630000 ... 20.250000 9.487500 15.425000 33.707500 27.220000 13.230000 13.950000 53.665000 20.625000 32.170000
50% 119.540000 71.500000 23.420000 56.230000 38.925000 6.040000 44.950000 27.030000 14.030000 42.300000 ... 23.230000 13.330000 26.930000 34.320000 34.670000 21.770000 29.480000 68.465000 30.900000 42.370000
75% 141.412500 86.860000 27.720000 62.220000 72.365000 12.030000 71.472500 37.785000 22.347500 82.060000 ... 27.460000 17.660000 38.485000 36.425000 37.415000 34.230000 63.060000 84.515000 55.100000 47.145000
max 227.760000 181.420000 51.200000 71.230000 125.400000 45.470000 279.150000 110.810000 94.810000 200.380000 ... 45.330000 63.690000 97.940000 38.320000 54.750000 118.750000 94.880000 133.090000 88.280000 55.380000

8 rows × 502 columns


In [99]:
description_f_df.loc['mean'].plot()
plt.figure()
description_f_df.loc['std'].plot()


Out[99]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb991248160>

In [117]:
description_f_df.loc['mean'].hist(bins=100)
plt.title('Mean Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')
plt.figure()
description_f_df.loc['std'].hist(bins=100)
plt.title('Standard Deviation of the Close price')
plt.xlabel('Std value')
plt.ylabel('Number of symbols')
plt.figure()
description_f_df.loc['max'].hist(bins=100)
plt.title('Maximum Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')


Out[117]:
<matplotlib.text.Text at 0x7fb992c66be0>

In [104]:
description_f_df.loc['mean'].argmax()


Out[104]:
'AIG'

In [119]:
description_f_df.loc['max'].argmax()


Out[119]:
'AIG'

From Yahoo Finance

American International Group, Inc. provides insurance products for commercial, institutional, and individual customers primarily in the United States, Europe, and Japan. The company’s Commercial Insurance segment offers general liability, environmental, commercial automobile liability, workers’ compensation, excess casualty, and crisis management insurance products, as well as various risk-sharing and other customized structured programs; commercial, industrial, and energy-related property insurance; aerospace, political risk, trade credit, surety, and marine insurance; and various insurance products for small and medium sized enterprises. It also provides professional liability insurance products for a range of businesses and risks, including directors and officers liability, fidelity, employment practices, fiduciary liability, cybersecurity risk, kidnap and ransom, and errors and omissions insurance. This segment sells its products through independent retail and wholesale brokers. Its Consumer Insurance segment offers fixed annuities, fixed index annuities, variable annuities, and retail mutual funds; group mutual funds, fixed annuities, and variable annuities, as well as individual annuity and investment products, and financial planning and advisory services; and term life and universal life insurance. Its products include personal auto and property insurance, voluntary and sponsor-paid personal accident, and supplemental health products; travel insurance products; and extended warranty insurance. This segment sells its products through independent marketing organizations, independent insurance agents, financial advisors, direct marketing, banks, wirehouses, and broker-dealers. The company also provides stable value wrap products, and structured settlement and terminal funding annuities; and corporate- and bank-owned life insurance and guaranteed investment contracts. American International Group, Inc. was founded in 1919 and is based in New York, New York.

The data seems ok this time.

Let's filter the symbols with more than 1% missing data


In [108]:
from utils import preprocessing as pp

no_missing_df = pp.drop_irrelevant_symbols(filtered_pred_df, 0.99)
print(no_missing_df.shape)
no_missing_df.head()


(6024, 286)
Out[108]:
SPY MMM ABT ADBE AMD AES AET AFL APD ALK ... HCN WDC WY WHR WFM WMB XEL XRX XLNX ZION
date
1993-01-29 43.94 24.50 6.88 2.59 18.75 4.41 6.42 4.49 21.94 4.19 ... 22.50 4.50 41.50 46.12 1.81 6.88 22.00 14.28 2.50 10.94
1993-02-01 44.25 24.69 6.88 2.72 19.12 4.53 6.64 4.52 22.38 4.19 ... 23.00 4.50 42.00 46.50 1.78 7.00 22.19 14.09 2.62 11.06
1993-02-02 44.34 24.72 6.53 2.84 20.25 4.53 6.62 4.57 22.31 4.16 ... 22.75 4.62 42.12 46.50 1.78 6.98 22.06 14.09 2.64 11.12
1993-02-03 44.81 25.19 6.91 2.70 20.50 4.49 6.50 4.65 22.69 4.22 ... 23.25 4.69 42.00 47.12 1.81 6.84 22.38 14.03 2.68 11.25
1993-02-04 45.00 26.06 6.84 2.73 20.12 4.49 6.73 4.84 23.19 4.38 ... 23.00 4.56 42.62 48.38 1.84 6.90 22.81 14.15 2.67 11.69

5 rows × 286 columns

Let's check if the filtering was done ok.


In [120]:
missing_df = no_missing_df.isnull().sum() / no_missing_df.shape[0]
missing_df.hist(bins=200)
plt.xlabel('Missing data')
plt.ylabel('Number of symbols')
plt.axvline(x=0.01, color='r', label='1% missing data level')
plt.legend(loc='upper center')


Out[120]:
<matplotlib.legend.Legend at 0x7fb99359f710>

Ok, that's good

There are too many symbols for a scatter matrix

Let's look at the data used for the Automatic Trader


In [122]:
auto_data_df = data_df['SPY'].unstack()
print(auto_data_df.shape)
auto_data_df.head()


(6024, 5)
Out[122]:
feature Close High Low Open Volume
date
1993-01-29 43.94 43.97 43.75 0.0 1003200.0
1993-02-01 44.25 44.25 43.97 0.0 480500.0
1993-02-02 44.34 44.38 44.12 0.0 201300.0
1993-02-03 44.81 44.84 44.38 0.0 529400.0
1993-02-04 45.00 45.09 44.88 0.0 531500.0

In [123]:
desc_auto_df = auto_data_df.describe()
desc_auto_df


Out[123]:
feature Close High Low Open Volume
count 6024.000000 6005.000000 6005.000000 6005.000000 6.024000e+03
mean 120.379515 121.140626 119.524448 97.583151 5.931430e+07
std 44.215695 44.437438 44.109630 69.694832 7.937370e+07
min 43.410000 43.530000 42.810000 0.000000 0.000000e+00
25% 92.847500 93.720000 91.600000 0.000000 5.380350e+06
50% 119.540000 120.290000 118.770000 115.300000 3.417315e+07
75% 141.412500 142.300000 140.570000 141.170000 7.948068e+07
max 227.760000 228.340000 227.000000 227.410000 8.141804e+08

In [126]:
auto_data_df['Close'].hist(bins=100)
plt.title('Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')
plt.figure()
auto_data_df['Volume'].hist(bins=100)
plt.title('Volume')
plt.xlabel('Volume value')
plt.ylabel('Number of symbols')


Out[126]:
<matplotlib.text.Text at 0x7fb9924a5b00>

In [129]:
pd.scatter_matrix(auto_data_df[['Close', 'Volume']], alpha = 0.3, figsize = (14,8), diagonal = 'kde');


/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: pandas.scatter_matrix is deprecated. Use pandas.plotting.scatter_matrix instead
  """Entry point for launching an IPython kernel.

In [ ]: