In [115]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import scipy.optimize as spo
import sys
import seaborn as sns
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (20.0, 10.0)
%load_ext autoreload
%autoreload 2
sys.path.append('../../')
Populating the interactive namespace from numpy and matplotlib
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['f']
`%matplotlib` prevents importing * from pylab and numpy
"\n`%matplotlib` prevents importing * from pylab and numpy"
In [50]:
data_df = pd.read_pickle('../../data/data_df.pkl')
print(data_df.shape)
data_df.head(25)
(30120, 503)
Out[50]:
SPY
MMM
ABT
ABBV
ACN
ATVI
AYI
ADBE
AMD
AAP
...
XEL
XRX
XLNX
XL
XYL
YHOO
YUM
ZBH
ZION
ZTS
date
feature
1993-01-29
Open
0.00
0.00
0.00
NaN
NaN
NaN
NaN
0.00
0.00
NaN
...
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
0.00
NaN
High
43.97
24.62
6.88
NaN
NaN
NaN
NaN
2.64
19.12
NaN
...
22.00
14.32
2.50
NaN
NaN
NaN
NaN
NaN
10.94
NaN
Low
43.75
24.47
6.75
NaN
NaN
NaN
NaN
2.56
18.62
NaN
...
21.88
13.84
2.46
NaN
NaN
NaN
NaN
NaN
10.62
NaN
Close
43.94
24.50
6.88
NaN
NaN
NaN
NaN
2.59
18.75
NaN
...
22.00
14.28
2.50
NaN
NaN
NaN
NaN
NaN
10.94
NaN
Volume
1003200.00
1242800.00
4638400.00
NaN
NaN
NaN
NaN
4990400.00
730600.00
NaN
...
87800.00
7633602.00
1745196.00
NaN
NaN
NaN
NaN
NaN
33600.00
NaN
1993-02-01
Open
0.00
0.00
0.00
NaN
NaN
NaN
NaN
0.00
0.00
NaN
...
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
0.00
NaN
High
44.25
24.69
6.97
NaN
NaN
NaN
NaN
2.75
19.25
NaN
...
22.19
14.24
2.62
NaN
NaN
NaN
NaN
NaN
11.25
NaN
Low
43.97
24.47
6.78
NaN
NaN
NaN
NaN
2.53
18.50
NaN
...
21.94
14.07
2.46
NaN
NaN
NaN
NaN
NaN
10.75
NaN
Close
44.25
24.69
6.88
NaN
NaN
NaN
NaN
2.72
19.12
NaN
...
22.19
14.09
2.62
NaN
NaN
NaN
NaN
NaN
11.06
NaN
Volume
480500.00
749600.00
4450400.00
NaN
NaN
NaN
NaN
8670400.00
750300.00
NaN
...
72400.00
3001200.00
3574800.00
NaN
NaN
NaN
NaN
NaN
32000.00
NaN
1993-02-02
Open
0.00
0.00
0.00
NaN
NaN
NaN
NaN
0.00
0.00
NaN
...
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
0.00
NaN
High
44.38
24.88
6.88
NaN
NaN
NaN
NaN
2.86
20.25
NaN
...
22.12
14.13
2.65
NaN
NaN
NaN
NaN
NaN
11.12
NaN
Low
44.12
24.69
6.53
NaN
NaN
NaN
NaN
2.73
19.38
NaN
...
21.88
13.99
2.60
NaN
NaN
NaN
NaN
NaN
10.88
NaN
Close
44.34
24.72
6.53
NaN
NaN
NaN
NaN
2.84
20.25
NaN
...
22.06
14.09
2.64
NaN
NaN
NaN
NaN
NaN
11.12
NaN
Volume
201300.00
1233600.00
10030000.00
NaN
NaN
NaN
NaN
11491200.00
1418100.00
NaN
...
242200.00
1388598.00
2652396.00
NaN
NaN
NaN
NaN
NaN
251600.00
NaN
1993-02-03
Open
0.00
0.00
0.00
NaN
NaN
NaN
NaN
0.00
0.00
NaN
...
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
0.00
NaN
High
44.84
25.41
6.94
NaN
NaN
NaN
NaN
2.86
20.62
NaN
...
22.38
14.13
2.68
NaN
NaN
NaN
NaN
NaN
11.28
NaN
Low
44.38
24.88
6.50
NaN
NaN
NaN
NaN
2.69
20.12
NaN
...
22.12
14.01
2.62
NaN
NaN
NaN
NaN
NaN
10.88
NaN
Close
44.81
25.19
6.91
NaN
NaN
NaN
NaN
2.70
20.50
NaN
...
22.38
14.03
2.68
NaN
NaN
NaN
NaN
NaN
11.25
NaN
Volume
529400.00
2900400.00
12490000.00
NaN
NaN
NaN
NaN
11788800.00
2163500.00
NaN
...
272200.00
1228200.00
5040396.00
NaN
NaN
NaN
NaN
NaN
254800.00
NaN
1993-02-04
Open
0.00
0.00
0.00
NaN
NaN
NaN
NaN
0.00
0.00
NaN
...
0.00
0.00
0.00
NaN
NaN
NaN
NaN
NaN
0.00
NaN
High
45.09
26.47
6.97
NaN
NaN
NaN
NaN
2.78
20.88
NaN
...
22.81
14.17
2.77
NaN
NaN
NaN
NaN
NaN
11.75
NaN
Low
44.88
25.88
6.78
NaN
NaN
NaN
NaN
2.70
20.12
NaN
...
22.50
14.09
2.66
NaN
NaN
NaN
NaN
NaN
11.44
NaN
Close
45.00
26.06
6.84
NaN
NaN
NaN
NaN
2.73
20.12
NaN
...
22.81
14.15
2.67
NaN
NaN
NaN
NaN
NaN
11.69
NaN
Volume
531500.00
4122400.00
5190800.00
NaN
NaN
NaN
NaN
6441600.00
1330200.00
NaN
...
162800.00
1675602.00
7033200.00
NaN
NaN
NaN
NaN
NaN
317200.00
NaN
25 rows × 503 columns
In [51]:
data_df.unstack().describe()
Out[51]:
SPY
MMM
...
ZION
ZTS
feature
Close
High
Low
Open
Volume
Close
High
Low
Open
Volume
...
Close
High
Low
Open
Volume
Close
High
Low
Open
Volume
count
6024.000000
6005.000000
6005.000000
6005.000000
6.024000e+03
6024.000000
6024.000000
6024.000000
6024.000000
6.024000e+03
...
6023.000000
6022.000000
6022.000000
6022.000000
6.023000e+03
987.000000
987.000000
987.000000
987.000000
9.870000e+02
mean
120.379515
121.140626
119.524448
97.583151
5.931430e+07
74.138275
74.734484
73.493715
63.757797
3.009044e+06
...
38.208409
38.710394
37.703298
29.917592
1.614751e+06
40.142432
40.530537
39.714985
40.149909
4.012389e+06
std
44.215695
44.437438
44.109630
69.694832
7.937370e+07
38.612950
38.757122
38.435405
50.283724
1.932584e+06
...
21.335254
21.495934
21.175324
26.028264
2.159297e+06
7.749647
7.809476
7.676569
7.749595
3.810260e+06
min
43.410000
43.530000
42.810000
0.000000
0.000000e+00
23.620000
23.940000
23.190000
0.000000
0.000000e+00
...
6.480000
7.410000
5.900000
0.000000
0.000000e+00
28.400000
28.570000
28.140000
28.480000
4.386320e+05
25%
92.847500
93.720000
91.600000
0.000000
5.380350e+06
44.802500
45.370000
44.250000
0.000000
1.789800e+06
...
20.625000
20.947500
20.255000
0.000000
2.684500e+05
32.170000
32.500000
31.855000
32.190000
2.378748e+06
50%
119.540000
120.290000
118.770000
115.300000
3.417315e+07
71.500000
72.070000
70.975000
71.530000
2.613242e+06
...
30.900000
31.295000
30.445000
26.565000
6.121840e+05
42.370000
42.960000
41.580000
42.270000
3.242213e+06
75%
141.412500
142.300000
140.570000
141.170000
7.948068e+07
86.860000
87.502500
86.212500
86.810000
3.720424e+06
...
55.100000
55.895000
54.360000
51.920000
2.390842e+06
47.145000
47.540000
46.720000
47.155000
4.516790e+06
max
227.760000
228.340000
227.000000
227.410000
8.141804e+08
181.420000
182.270000
181.320000
181.730000
2.874960e+07
...
88.280000
107.210000
87.810000
88.270000
2.633482e+07
55.380000
55.380000
53.650000
53.930000
6.678948e+07
8 rows × 2515 columns
In [52]:
u_data_df = data_df.unstack()
print(u_data_df.shape)
u_data_df.head()
(6024, 2515)
Out[52]:
SPY
MMM
...
ZION
ZTS
feature
Close
High
Low
Open
Volume
Close
High
Low
Open
Volume
...
Close
High
Low
Open
Volume
Close
High
Low
Open
Volume
date
1993-01-29
43.94
43.97
43.75
0.0
1003200.0
24.50
24.62
24.47
0.0
1242800.0
...
10.94
10.94
10.62
0.0
33600.0
NaN
NaN
NaN
NaN
NaN
1993-02-01
44.25
44.25
43.97
0.0
480500.0
24.69
24.69
24.47
0.0
749600.0
...
11.06
11.25
10.75
0.0
32000.0
NaN
NaN
NaN
NaN
NaN
1993-02-02
44.34
44.38
44.12
0.0
201300.0
24.72
24.88
24.69
0.0
1233600.0
...
11.12
11.12
10.88
0.0
251600.0
NaN
NaN
NaN
NaN
NaN
1993-02-03
44.81
44.84
44.38
0.0
529400.0
25.19
25.41
24.88
0.0
2900400.0
...
11.25
11.28
10.88
0.0
254800.0
NaN
NaN
NaN
NaN
NaN
1993-02-04
45.00
45.09
44.88
0.0
531500.0
26.06
26.47
25.88
0.0
4122400.0
...
11.69
11.75
11.44
0.0
317200.0
NaN
NaN
NaN
NaN
NaN
5 rows × 2515 columns
In [53]:
len(np.unique(u_data_df.columns.get_level_values(0)))
Out[53]:
503
In [54]:
pred_df = u_data_df.loc[:,(slice(None), 'Close')]
pred_df.columns = pred_df.columns.droplevel('feature')
print(pred_df.shape)
pred_df.head()
(6024, 503)
Out[54]:
SPY
MMM
ABT
ABBV
ACN
ATVI
AYI
ADBE
AMD
AAP
...
XEL
XRX
XLNX
XL
XYL
YHOO
YUM
ZBH
ZION
ZTS
date
1993-01-29
43.94
24.50
6.88
NaN
NaN
NaN
NaN
2.59
18.75
NaN
...
22.00
14.28
2.50
NaN
NaN
NaN
NaN
NaN
10.94
NaN
1993-02-01
44.25
24.69
6.88
NaN
NaN
NaN
NaN
2.72
19.12
NaN
...
22.19
14.09
2.62
NaN
NaN
NaN
NaN
NaN
11.06
NaN
1993-02-02
44.34
24.72
6.53
NaN
NaN
NaN
NaN
2.84
20.25
NaN
...
22.06
14.09
2.64
NaN
NaN
NaN
NaN
NaN
11.12
NaN
1993-02-03
44.81
25.19
6.91
NaN
NaN
NaN
NaN
2.70
20.50
NaN
...
22.38
14.03
2.68
NaN
NaN
NaN
NaN
NaN
11.25
NaN
1993-02-04
45.00
26.06
6.84
NaN
NaN
NaN
NaN
2.73
20.12
NaN
...
22.81
14.15
2.67
NaN
NaN
NaN
NaN
NaN
11.69
NaN
5 rows × 503 columns
In [55]:
missing_df = pred_df.isnull().sum() / pred_df.shape[0]
missing_df.hist(bins=200)
plt.xlabel('Missing data')
plt.ylabel('Number of symbols')
plt.axvline(x=0.01, color='r', label='1% missing data level')
plt.legend()
Out[55]:
<matplotlib.legend.Legend at 0x7fb99b11df28>
In [56]:
description_df = pred_df.describe()
description_df
Out[56]:
SPY
MMM
ABT
ABBV
ACN
ATVI
AYI
ADBE
AMD
AAP
...
XEL
XRX
XLNX
XL
XYL
YHOO
YUM
ZBH
ZION
ZTS
count
6024.000000
6024.000000
6024.000000
1023.000000
3886.000000
5837.000000
3798.000000
6023.000000
6022.000000
3797.000000
...
6024.000000
6024.000000
6023.000000
112.000000
1312.000000
5214.000000
4853.000000
3882.000000
6023.000000
987.000000
mean
120.379515
74.138275
23.928069
55.164673
49.421904
8.717500
69.421403
29.287536
15.965330
64.918667
...
23.993572
16.756667
28.025208
35.003839
33.941387
24.188189
36.630474
70.848998
38.208409
40.142432
std
44.215695
38.612950
10.034862
8.991563
28.829258
9.591969
64.673705
24.356283
13.338299
47.194735
...
6.198760
11.937684
16.451688
1.585963
7.540993
16.424537
25.370764
23.899683
21.335254
7.749647
min
43.410000
23.620000
5.750000
33.000000
11.850000
0.290000
9.160000
2.060000
1.620000
12.530000
...
5.660000
4.170000
2.290000
32.810000
23.000000
0.660000
5.940000
25.020000
6.480000
28.400000
25%
92.847500
44.802500
18.840000
48.835000
26.330000
1.130000
27.815000
7.410000
5.870000
32.630000
...
20.250000
9.487500
15.425000
33.707500
27.220000
13.230000
13.950000
53.665000
20.625000
32.170000
50%
119.540000
71.500000
23.420000
56.230000
38.925000
6.040000
44.950000
27.030000
14.030000
42.300000
...
23.230000
13.330000
26.930000
34.320000
34.670000
21.770000
29.480000
68.465000
30.900000
42.370000
75%
141.412500
86.860000
27.720000
62.220000
72.365000
12.030000
71.472500
37.785000
22.347500
82.060000
...
27.460000
17.660000
38.485000
36.425000
37.415000
34.230000
63.060000
84.515000
55.100000
47.145000
max
227.760000
181.420000
51.200000
71.230000
125.400000
45.470000
279.150000
110.810000
94.810000
200.380000
...
45.330000
63.690000
97.940000
38.320000
54.750000
118.750000
94.880000
133.090000
88.280000
55.380000
8 rows × 503 columns
In [127]:
description_df.loc['mean'].plot()
plt.title('Mean Close price')
plt.xlabel('Symbol')
plt.ylabel('Price')
Out[127]:
<matplotlib.text.Text at 0x7fb990f9d6a0>
In [58]:
description_df.loc['std'].plot()
Out[58]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb9912f0cf8>
In [59]:
description_df.loc['mean'].argmax()
Out[59]:
'RIG'
In [60]:
description_df['RIG']
Out[60]:
count 5941.000000
mean 43135.840766
std 50552.663339
min 8.200000
25% 58.840000
50% 34031.250000
75% 64500.000000
max 242100.000000
Name: RIG, dtype: float64
In [61]:
pred_df['RIG'].plot()
plt.ylabel('Close price')
plt.title('RIG')
Out[61]:
<matplotlib.text.Text at 0x7fb991566be0>
Transocean Ltd., together with its subsidiaries, provides offshore contract drilling services for oil and gas wells worldwide. The company primarily offers deepwater and harsh environment drilling services. As of February 9, 2017, it owned or had partial ownership interests in, and operated 56 mobile offshore drilling units that consist of 30 ultra-deepwater floaters, 7 harsh environment floaters, 3 deepwater floaters, 6 midwater floaters, and 10 high-specification jackups. The company serves government-controlled oil companies and independent oil companies. Transocean Ltd. was founded in 1953 and is based in Vernier, Switzerland.
In [71]:
from utils import data_sources as ds
%config Application.log_level="DEBUG"
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
In [74]:
rig_df = ds.download_ticker('RIG', ds.START_DATE, ds.END_DATE)
DEBUG:utils.data_sources:sd = 1993-01-22 00:00:00 , ed = 2003-01-20 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+22%2C+1993&enddate=Jan+20%2C+2003&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 0 size: 251
DEBUG:utils.data_sources:Batch 1 of 3
DEBUG:utils.data_sources:(251, 5)
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 10.31 10.59 10.27 10.59 9036772
2016-10-21 10.48 10.56 10.38 10.50 8151599
2016-10-24 10.43 10.50 9.97 10.11 13201607
2016-10-25 10.13 10.37 9.99 10.03 8959886
2016-10-26 10.02 10.34 9.86 10.32 11533498
2016-10-27 10.36 10.49 10.24 10.38 11944157
2016-10-28 10.32 10.50 10.06 10.21 12103351
2016-10-31 10.16 10.19 9.60 9.61 13400042
2016-11-01 9.71 9.79 9.40 9.59 11332111
2016-11-02 9.50 9.56 9.10 9.29 20245898
2016-11-03 9.67 10.32 9.57 9.87 27368099
2016-11-04 9.85 9.94 9.65 9.67 16625668
2016-11-07 9.87 9.87 9.60 9.73 11755506
2016-11-08 9.70 9.99 9.57 9.85 12319611
2016-11-09 9.66 10.36 9.66 10.25 14373054
2016-11-10 10.18 10.68 10.18 10.50 15033317
2016-11-11 10.36 10.47 9.94 10.37 14414817
2016-11-14 10.27 10.70 10.14 10.63 21180015
2016-11-15 10.61 11.06 10.61 10.82 12955746
2016-11-16 10.81 10.89 10.53 10.83 12654890
2016-11-17 10.91 11.42 10.91 11.01 14073493
2016-11-18 11.07 11.40 11.00 11.21 14752642
2016-11-21 11.50 11.85 11.45 11.66 16131679
2016-11-22 11.69 11.74 11.31 11.53 14791018
2016-11-23 11.47 11.80 11.42 11.77 7937796
2016-11-25 11.64 11.72 11.50 11.66 5021862
2016-11-28 11.71 11.80 11.33 11.40 11105990
2016-11-29 11.10 11.10 10.62 11.02 22251355
2016-11-30 11.82 13.28 11.82 12.90 43157259
2016-12-01 13.22 14.00 12.96 13.37 32363847
... ... ... ... ... ...
2017-09-07 8.72 8.90 8.54 8.70 12028244
2017-09-08 8.64 8.66 8.35 8.47 12492108
2017-09-11 8.40 8.57 8.37 8.49 9259895
2017-09-12 8.53 9.02 8.50 8.79 16177452
2017-09-13 8.82 9.38 8.80 9.21 23033144
2017-09-14 9.33 9.52 9.10 9.22 22587988
2017-09-15 9.20 9.34 9.07 9.24 19083133
2017-09-18 9.23 9.48 9.10 9.29 13913577
2017-09-19 9.35 9.38 9.19 9.35 12896168
2017-09-20 9.36 9.92 9.36 9.77 24758331
2017-09-21 9.56 9.59 8.92 9.08 28086991
2017-09-22 9.09 9.44 9.04 9.32 16036979
2017-09-25 9.67 10.15 9.67 10.03 34404313
2017-09-26 9.96 10.30 9.86 10.24 18783227
2017-09-27 10.29 10.45 10.11 10.43 14579685
2017-09-28 10.55 10.84 10.46 10.59 24523781
2017-09-29 10.53 10.81 10.46 10.76 18860794
2017-10-02 10.38 10.49 10.20 10.33 16884904
2017-10-03 10.30 10.34 10.06 10.28 11682347
2017-10-04 10.26 10.40 10.14 10.29 12037699
2017-10-05 10.24 10.62 10.24 10.54 13428721
2017-10-06 10.38 10.56 10.20 10.31 16729569
2017-10-09 10.35 10.63 10.35 10.51 13483086
2017-10-10 10.72 10.93 10.51 10.52 14186664
2017-10-11 10.50 10.65 10.31 10.36 1864673
2017-10-12 10.56 10.68 10.27 10.50 15385898
2017-10-13 10.63 11.22 10.63 10.86 20493251
2017-10-16 11.05 11.28 11.03 11.05 16595613
2017-10-17 11.03 11.10 10.71 10.79 13703714
2017-10-18 10.53 10.58 10.25 10.31 18185708
[251 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2003-01-21 00:00:00 , ed = 2013-01-17 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+21%2C+2003&enddate=Jan+17%2C+2013&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 1 size: 502
DEBUG:utils.data_sources:Batch 2 of 3
DEBUG:utils.data_sources:(502, 5)
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 10.31 10.59 10.27 10.59 9036772
2016-10-21 10.48 10.56 10.38 10.50 8151599
2016-10-24 10.43 10.50 9.97 10.11 13201607
2016-10-25 10.13 10.37 9.99 10.03 8959886
2016-10-26 10.02 10.34 9.86 10.32 11533498
2016-10-27 10.36 10.49 10.24 10.38 11944157
2016-10-28 10.32 10.50 10.06 10.21 12103351
2016-10-31 10.16 10.19 9.60 9.61 13400042
2016-11-01 9.71 9.79 9.40 9.59 11332111
2016-11-02 9.50 9.56 9.10 9.29 20245898
2016-11-03 9.67 10.32 9.57 9.87 27368099
2016-11-04 9.85 9.94 9.65 9.67 16625668
2016-11-07 9.87 9.87 9.60 9.73 11755506
2016-11-08 9.70 9.99 9.57 9.85 12319611
2016-11-09 9.66 10.36 9.66 10.25 14373054
2016-11-10 10.18 10.68 10.18 10.50 15033317
2016-11-11 10.36 10.47 9.94 10.37 14414817
2016-11-14 10.27 10.70 10.14 10.63 21180015
2016-11-15 10.61 11.06 10.61 10.82 12955746
2016-11-16 10.81 10.89 10.53 10.83 12654890
2016-11-17 10.91 11.42 10.91 11.01 14073493
2016-11-18 11.07 11.40 11.00 11.21 14752642
2016-11-21 11.50 11.85 11.45 11.66 16131679
2016-11-22 11.69 11.74 11.31 11.53 14791018
2016-11-23 11.47 11.80 11.42 11.77 7937796
2016-11-25 11.64 11.72 11.50 11.66 5021862
2016-11-28 11.71 11.80 11.33 11.40 11105990
2016-11-29 11.10 11.10 10.62 11.02 22251355
2016-11-30 11.82 13.28 11.82 12.90 43157259
2016-12-01 13.22 14.00 12.96 13.37 32363847
... ... ... ... ... ...
2017-09-07 8.72 8.90 8.54 8.70 12028244
2017-09-08 8.64 8.66 8.35 8.47 12492108
2017-09-11 8.40 8.57 8.37 8.49 9259895
2017-09-12 8.53 9.02 8.50 8.79 16177452
2017-09-13 8.82 9.38 8.80 9.21 23033144
2017-09-14 9.33 9.52 9.10 9.22 22587988
2017-09-15 9.20 9.34 9.07 9.24 19083133
2017-09-18 9.23 9.48 9.10 9.29 13913577
2017-09-19 9.35 9.38 9.19 9.35 12896168
2017-09-20 9.36 9.92 9.36 9.77 24758331
2017-09-21 9.56 9.59 8.92 9.08 28086991
2017-09-22 9.09 9.44 9.04 9.32 16036979
2017-09-25 9.67 10.15 9.67 10.03 34404313
2017-09-26 9.96 10.30 9.86 10.24 18783227
2017-09-27 10.29 10.45 10.11 10.43 14579685
2017-09-28 10.55 10.84 10.46 10.59 24523781
2017-09-29 10.53 10.81 10.46 10.76 18860794
2017-10-02 10.38 10.49 10.20 10.33 16884904
2017-10-03 10.30 10.34 10.06 10.28 11682347
2017-10-04 10.26 10.40 10.14 10.29 12037699
2017-10-05 10.24 10.62 10.24 10.54 13428721
2017-10-06 10.38 10.56 10.20 10.31 16729569
2017-10-09 10.35 10.63 10.35 10.51 13483086
2017-10-10 10.72 10.93 10.51 10.52 14186664
2017-10-11 10.50 10.65 10.31 10.36 1864673
2017-10-12 10.56 10.68 10.27 10.50 15385898
2017-10-13 10.63 11.22 10.63 10.86 20493251
2017-10-16 11.05 11.28 11.03 11.05 16595613
2017-10-17 11.03 11.10 10.71 10.79 13703714
2017-10-18 10.53 10.58 10.25 10.31 18185708
[502 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2013-01-18 00:00:00 , ed = 2017-01-01 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=RIG&startdate=Jan+18%2C+2013&enddate=Jan+01%2C+2017&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=RIG&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 2 size: 753
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 10.31 10.59 10.27 10.59 9036772
2016-10-21 10.48 10.56 10.38 10.50 8151599
2016-10-24 10.43 10.50 9.97 10.11 13201607
2016-10-25 10.13 10.37 9.99 10.03 8959886
2016-10-26 10.02 10.34 9.86 10.32 11533498
2016-10-27 10.36 10.49 10.24 10.38 11944157
2016-10-28 10.32 10.50 10.06 10.21 12103351
2016-10-31 10.16 10.19 9.60 9.61 13400042
2016-11-01 9.71 9.79 9.40 9.59 11332111
2016-11-02 9.50 9.56 9.10 9.29 20245898
2016-11-03 9.67 10.32 9.57 9.87 27368099
2016-11-04 9.85 9.94 9.65 9.67 16625668
2016-11-07 9.87 9.87 9.60 9.73 11755506
2016-11-08 9.70 9.99 9.57 9.85 12319611
2016-11-09 9.66 10.36 9.66 10.25 14373054
2016-11-10 10.18 10.68 10.18 10.50 15033317
2016-11-11 10.36 10.47 9.94 10.37 14414817
2016-11-14 10.27 10.70 10.14 10.63 21180015
2016-11-15 10.61 11.06 10.61 10.82 12955746
2016-11-16 10.81 10.89 10.53 10.83 12654890
2016-11-17 10.91 11.42 10.91 11.01 14073493
2016-11-18 11.07 11.40 11.00 11.21 14752642
2016-11-21 11.50 11.85 11.45 11.66 16131679
2016-11-22 11.69 11.74 11.31 11.53 14791018
2016-11-23 11.47 11.80 11.42 11.77 7937796
2016-11-25 11.64 11.72 11.50 11.66 5021862
2016-11-28 11.71 11.80 11.33 11.40 11105990
2016-11-29 11.10 11.10 10.62 11.02 22251355
2016-11-30 11.82 13.28 11.82 12.90 43157259
2016-12-01 13.22 14.00 12.96 13.37 32363847
... ... ... ... ... ...
2017-09-07 8.72 8.90 8.54 8.70 12028244
2017-09-08 8.64 8.66 8.35 8.47 12492108
2017-09-11 8.40 8.57 8.37 8.49 9259895
2017-09-12 8.53 9.02 8.50 8.79 16177452
2017-09-13 8.82 9.38 8.80 9.21 23033144
2017-09-14 9.33 9.52 9.10 9.22 22587988
2017-09-15 9.20 9.34 9.07 9.24 19083133
2017-09-18 9.23 9.48 9.10 9.29 13913577
2017-09-19 9.35 9.38 9.19 9.35 12896168
2017-09-20 9.36 9.92 9.36 9.77 24758331
2017-09-21 9.56 9.59 8.92 9.08 28086991
2017-09-22 9.09 9.44 9.04 9.32 16036979
2017-09-25 9.67 10.15 9.67 10.03 34404313
2017-09-26 9.96 10.30 9.86 10.24 18783227
2017-09-27 10.29 10.45 10.11 10.43 14579685
2017-09-28 10.55 10.84 10.46 10.59 24523781
2017-09-29 10.53 10.81 10.46 10.76 18860794
2017-10-02 10.38 10.49 10.20 10.33 16884904
2017-10-03 10.30 10.34 10.06 10.28 11682347
2017-10-04 10.26 10.40 10.14 10.29 12037699
2017-10-05 10.24 10.62 10.24 10.54 13428721
2017-10-06 10.38 10.56 10.20 10.31 16729569
2017-10-09 10.35 10.63 10.35 10.51 13483086
2017-10-10 10.72 10.93 10.51 10.52 14186664
2017-10-11 10.50 10.65 10.31 10.36 1864673
2017-10-12 10.56 10.68 10.27 10.50 15385898
2017-10-13 10.63 11.22 10.63 10.86 20493251
2017-10-16 11.05 11.28 11.03 11.05 16595613
2017-10-17 11.03 11.10 10.71 10.79 13703714
2017-10-18 10.53 10.58 10.25 10.31 18185708
[502 rows x 5 columns]
../../utils/data_sources.py:103: PerformanceWarning: indexing past lexsort depth may impact performance.
return data_df
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-74-9468062e1390> in <module>()
----> 1 rig_df = ds.download_ticker('RIG', ds.START_DATE, ds.END_DATE)
/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in download_ticker(symbol, start_date, end_date)
87 log.debug('batch %i size: %i' % (batch_index, raw_df.shape[0]))
88 log.debug(raw_df[raw_df.index.duplicated()])
---> 89 return raw_to_multiindex(raw_df, symbol)
90
91
/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in raw_to_multiindex(raw_df, name)
101 for date in raw_df.index:
102 for col in raw_df.columns:
--> 103 data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
104 return data_df
105
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2427 else:
2428 # set column
-> 2429 self._set_item(key, value)
2430
2431 def _setitem_slice(self, key, value):
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2493
2494 self._ensure_valid_index(value)
-> 2495 value = self._sanitize_column(key, value)
2496 NDFrame._set_item(self, key, value)
2497
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
2643
2644 if isinstance(value, Series):
-> 2645 value = reindexer(value)
2646
2647 elif isinstance(value, DataFrame):
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
2635 # duplicate axis
2636 if not value.index.is_unique:
-> 2637 raise e
2638
2639 # other
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
2630 # GH 4107
2631 try:
-> 2632 value = value.reindex(self.index)._values
2633 except Exception as e:
2634
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
2424 @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
2425 def reindex(self, index=None, **kwargs):
-> 2426 return super(Series, self).reindex(index=index, **kwargs)
2427
2428 @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
2402 # perform the reindex on the axes
2403 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 2404 fill_value, copy).__finalize__(self)
2405
2406 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
2415 ax = self._get_axis(a)
2416 new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 2417 tolerance=tolerance, method=method)
2418
2419 axis = self._get_axis_number(a)
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
2836 raise ValueError("cannot reindex a non-unique index "
2837 "with a method or limit")
-> 2838 indexer, missing = self.get_indexer_non_unique(target)
2839
2840 if preserve_names and target.nlevels == 1 and target.name != self.name:
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
2657 tgt_values = target._values
2658
-> 2659 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
2660 return Index(indexer), missing
2661
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique (pandas/_libs/index.c:7681)()
TypeError: 'NoneType' object is not iterable
In [75]:
rig_df = ds.download_ticker('SPY', ds.START_DATE, ds.END_DATE)
DEBUG:utils.data_sources:sd = 1993-01-22 00:00:00 , ed = 2003-01-20 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+22%2C+1993&enddate=Jan+20%2C+2003&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 0 size: 251
DEBUG:utils.data_sources:Batch 1 of 3
DEBUG:utils.data_sources:(251, 5)
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 213.87 214.53 213.11 213.88 73639830
2016-10-21 212.96 214.08 212.76 213.98 89089092
2016-10-24 215.00 215.32 214.48 214.89 60146592
2016-10-25 214.68 214.98 213.98 214.17 66542329
2016-10-26 213.21 214.42 212.93 213.74 75705478
2016-10-27 214.58 214.62 213.08 213.17 77220213
2016-10-28 213.14 213.93 211.71 212.54 140623183
2016-10-31 212.93 213.19 212.36 212.55 61272507
2016-11-01 212.93 212.99 209.60 211.01 122781818
2016-11-02 210.65 211.10 209.23 209.74 103330806
2016-11-03 209.99 210.24 208.46 208.78 88939346
2016-11-04 208.91 209.89 208.38 208.55 109122059
2016-11-07 211.45 213.19 211.30 213.15 109794861
2016-11-08 212.69 214.77 212.38 214.11 106772138
2016-11-09 212.37 217.10 212.34 216.38 258428972
2016-11-10 217.30 218.31 215.22 216.92 172113313
2016-11-11 216.08 216.70 215.32 216.42 100552732
2016-11-14 217.03 217.27 215.72 216.59 94579982
2016-11-15 217.04 218.28 216.80 218.28 91652580
2016-11-16 217.56 218.14 217.42 217.87 65617697
2016-11-17 218.05 219.06 217.92 218.99 69797191
2016-11-18 219.07 219.27 218.29 218.50 86265751
2016-11-21 219.17 220.18 219.00 220.15 72402638
2016-11-22 220.51 220.79 219.73 220.58 67428957
2016-11-23 219.98 220.76 219.75 220.70 56620237
2016-11-25 221.10 221.56 221.01 221.52 37872255
2016-11-28 221.16 221.48 220.36 220.48 76572511
2016-11-29 220.52 221.44 220.17 220.91 69886690
2016-11-30 221.63 221.82 220.31 220.38 113291793
2016-12-01 220.73 220.73 219.15 219.57 79040487
... ... ... ... ... ...
2017-09-07 247.25 247.27 246.40 246.87 58034730
2017-09-08 246.54 247.11 246.30 246.58 63832825
2017-09-11 248.04 249.30 248.02 249.21 71364848
2017-09-12 249.63 250.09 249.42 250.05 56896027
2017-09-13 249.72 250.21 249.59 250.17 59228002
2017-09-14 249.80 250.32 249.60 250.09 95446349
2017-09-15 248.69 249.29 248.57 249.19 95432382
2017-09-18 249.61 250.12 249.28 249.72 46235238
2017-09-19 250.00 250.07 249.60 249.97 47108148
2017-09-20 250.07 250.19 248.92 250.06 59574083
2017-09-21 249.88 249.98 249.18 249.39 48211398
2017-09-22 249.05 249.63 249.02 249.44 51214032
2017-09-25 249.15 249.55 248.08 248.93 57064357
2017-09-26 249.42 249.70 248.80 249.08 54081959
2017-09-27 249.88 250.49 248.87 250.05 71852148
2017-09-28 249.73 250.44 249.63 250.35 44778841
2017-09-29 250.34 251.32 250.13 251.23 85578002
2017-10-02 251.49 252.32 251.29 252.32 59022985
2017-10-03 252.46 252.89 252.23 252.86 66810169
2017-10-04 252.69 253.44 252.56 253.16 55953619
2017-10-05 253.54 254.68 253.20 254.66 63522757
2017-10-06 254.15 254.70 253.85 254.37 80645998
2017-10-09 254.63 254.70 253.65 253.95 35803138
2017-10-10 254.60 255.05 253.98 254.62 43057363
2017-10-11 254.51 254.64 254.32 254.32 2963114
2017-10-12 254.66 255.06 254.36 254.64 47065144
2017-10-13 255.14 255.27 254.64 254.95 54800435
2017-10-16 255.21 255.51 254.82 255.29 38221675
2017-10-17 255.23 255.52 254.98 255.47 31560964
2017-10-18 255.90 255.95 255.50 255.72 40888330
[251 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2003-01-21 00:00:00 , ed = 2013-01-17 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+21%2C+2003&enddate=Jan+17%2C+2013&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 1 size: 502
DEBUG:utils.data_sources:Batch 2 of 3
DEBUG:utils.data_sources:(502, 5)
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 213.87 214.53 213.11 213.88 73639830
2016-10-21 212.96 214.08 212.76 213.98 89089092
2016-10-24 215.00 215.32 214.48 214.89 60146592
2016-10-25 214.68 214.98 213.98 214.17 66542329
2016-10-26 213.21 214.42 212.93 213.74 75705478
2016-10-27 214.58 214.62 213.08 213.17 77220213
2016-10-28 213.14 213.93 211.71 212.54 140623183
2016-10-31 212.93 213.19 212.36 212.55 61272507
2016-11-01 212.93 212.99 209.60 211.01 122781818
2016-11-02 210.65 211.10 209.23 209.74 103330806
2016-11-03 209.99 210.24 208.46 208.78 88939346
2016-11-04 208.91 209.89 208.38 208.55 109122059
2016-11-07 211.45 213.19 211.30 213.15 109794861
2016-11-08 212.69 214.77 212.38 214.11 106772138
2016-11-09 212.37 217.10 212.34 216.38 258428972
2016-11-10 217.30 218.31 215.22 216.92 172113313
2016-11-11 216.08 216.70 215.32 216.42 100552732
2016-11-14 217.03 217.27 215.72 216.59 94579982
2016-11-15 217.04 218.28 216.80 218.28 91652580
2016-11-16 217.56 218.14 217.42 217.87 65617697
2016-11-17 218.05 219.06 217.92 218.99 69797191
2016-11-18 219.07 219.27 218.29 218.50 86265751
2016-11-21 219.17 220.18 219.00 220.15 72402638
2016-11-22 220.51 220.79 219.73 220.58 67428957
2016-11-23 219.98 220.76 219.75 220.70 56620237
2016-11-25 221.10 221.56 221.01 221.52 37872255
2016-11-28 221.16 221.48 220.36 220.48 76572511
2016-11-29 220.52 221.44 220.17 220.91 69886690
2016-11-30 221.63 221.82 220.31 220.38 113291793
2016-12-01 220.73 220.73 219.15 219.57 79040487
... ... ... ... ... ...
2017-09-07 247.25 247.27 246.40 246.87 58034730
2017-09-08 246.54 247.11 246.30 246.58 63832825
2017-09-11 248.04 249.30 248.02 249.21 71364848
2017-09-12 249.63 250.09 249.42 250.05 56896027
2017-09-13 249.72 250.21 249.59 250.17 59228002
2017-09-14 249.80 250.32 249.60 250.09 95446349
2017-09-15 248.69 249.29 248.57 249.19 95432382
2017-09-18 249.61 250.12 249.28 249.72 46235238
2017-09-19 250.00 250.07 249.60 249.97 47108148
2017-09-20 250.07 250.19 248.92 250.06 59574083
2017-09-21 249.88 249.98 249.18 249.39 48211398
2017-09-22 249.05 249.63 249.02 249.44 51214032
2017-09-25 249.15 249.55 248.08 248.93 57064357
2017-09-26 249.42 249.70 248.80 249.08 54081959
2017-09-27 249.88 250.49 248.87 250.05 71852148
2017-09-28 249.73 250.44 249.63 250.35 44778841
2017-09-29 250.34 251.32 250.13 251.23 85578002
2017-10-02 251.49 252.32 251.29 252.32 59022985
2017-10-03 252.46 252.89 252.23 252.86 66810169
2017-10-04 252.69 253.44 252.56 253.16 55953619
2017-10-05 253.54 254.68 253.20 254.66 63522757
2017-10-06 254.15 254.70 253.85 254.37 80645998
2017-10-09 254.63 254.70 253.65 253.95 35803138
2017-10-10 254.60 255.05 253.98 254.62 43057363
2017-10-11 254.51 254.64 254.32 254.32 2963114
2017-10-12 254.66 255.06 254.36 254.64 47065144
2017-10-13 255.14 255.27 254.64 254.95 54800435
2017-10-16 255.21 255.51 254.82 255.29 38221675
2017-10-17 255.23 255.52 254.98 255.47 31560964
2017-10-18 255.90 255.95 255.50 255.72 40888330
[502 rows x 5 columns]
DEBUG:utils.data_sources:----------------------------------------------------------------------------------------------------
DEBUG:utils.data_sources:sd = 2013-01-18 00:00:00 , ed = 2017-01-01 00:00:00
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+18%2C+2013&enddate=Jan+01%2C+2017&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
DEBUG:utils.data_sources:batch 2 size: 753
DEBUG:utils.data_sources: Open High Low Close Volume
Date
2016-10-20 213.87 214.53 213.11 213.88 73639830
2016-10-21 212.96 214.08 212.76 213.98 89089092
2016-10-24 215.00 215.32 214.48 214.89 60146592
2016-10-25 214.68 214.98 213.98 214.17 66542329
2016-10-26 213.21 214.42 212.93 213.74 75705478
2016-10-27 214.58 214.62 213.08 213.17 77220213
2016-10-28 213.14 213.93 211.71 212.54 140623183
2016-10-31 212.93 213.19 212.36 212.55 61272507
2016-11-01 212.93 212.99 209.60 211.01 122781818
2016-11-02 210.65 211.10 209.23 209.74 103330806
2016-11-03 209.99 210.24 208.46 208.78 88939346
2016-11-04 208.91 209.89 208.38 208.55 109122059
2016-11-07 211.45 213.19 211.30 213.15 109794861
2016-11-08 212.69 214.77 212.38 214.11 106772138
2016-11-09 212.37 217.10 212.34 216.38 258428972
2016-11-10 217.30 218.31 215.22 216.92 172113313
2016-11-11 216.08 216.70 215.32 216.42 100552732
2016-11-14 217.03 217.27 215.72 216.59 94579982
2016-11-15 217.04 218.28 216.80 218.28 91652580
2016-11-16 217.56 218.14 217.42 217.87 65617697
2016-11-17 218.05 219.06 217.92 218.99 69797191
2016-11-18 219.07 219.27 218.29 218.50 86265751
2016-11-21 219.17 220.18 219.00 220.15 72402638
2016-11-22 220.51 220.79 219.73 220.58 67428957
2016-11-23 219.98 220.76 219.75 220.70 56620237
2016-11-25 221.10 221.56 221.01 221.52 37872255
2016-11-28 221.16 221.48 220.36 220.48 76572511
2016-11-29 220.52 221.44 220.17 220.91 69886690
2016-11-30 221.63 221.82 220.31 220.38 113291793
2016-12-01 220.73 220.73 219.15 219.57 79040487
... ... ... ... ... ...
2017-09-07 247.25 247.27 246.40 246.87 58034730
2017-09-08 246.54 247.11 246.30 246.58 63832825
2017-09-11 248.04 249.30 248.02 249.21 71364848
2017-09-12 249.63 250.09 249.42 250.05 56896027
2017-09-13 249.72 250.21 249.59 250.17 59228002
2017-09-14 249.80 250.32 249.60 250.09 95446349
2017-09-15 248.69 249.29 248.57 249.19 95432382
2017-09-18 249.61 250.12 249.28 249.72 46235238
2017-09-19 250.00 250.07 249.60 249.97 47108148
2017-09-20 250.07 250.19 248.92 250.06 59574083
2017-09-21 249.88 249.98 249.18 249.39 48211398
2017-09-22 249.05 249.63 249.02 249.44 51214032
2017-09-25 249.15 249.55 248.08 248.93 57064357
2017-09-26 249.42 249.70 248.80 249.08 54081959
2017-09-27 249.88 250.49 248.87 250.05 71852148
2017-09-28 249.73 250.44 249.63 250.35 44778841
2017-09-29 250.34 251.32 250.13 251.23 85578002
2017-10-02 251.49 252.32 251.29 252.32 59022985
2017-10-03 252.46 252.89 252.23 252.86 66810169
2017-10-04 252.69 253.44 252.56 253.16 55953619
2017-10-05 253.54 254.68 253.20 254.66 63522757
2017-10-06 254.15 254.70 253.85 254.37 80645998
2017-10-09 254.63 254.70 253.65 253.95 35803138
2017-10-10 254.60 255.05 253.98 254.62 43057363
2017-10-11 254.51 254.64 254.32 254.32 2963114
2017-10-12 254.66 255.06 254.36 254.64 47065144
2017-10-13 255.14 255.27 254.64 254.95 54800435
2017-10-16 255.21 255.51 254.82 255.29 38221675
2017-10-17 255.23 255.52 254.98 255.47 31560964
2017-10-18 255.90 255.95 255.50 255.72 40888330
[502 rows x 5 columns]
../../utils/data_sources.py:103: PerformanceWarning: indexing past lexsort depth may impact performance.
data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-75-1390cbd86ee7> in <module>()
----> 1 rig_df = ds.download_ticker('SPY', ds.START_DATE, ds.END_DATE)
/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in download_ticker(symbol, start_date, end_date)
87 log.debug('batch %i size: %i' % (batch_index, raw_df.shape[0]))
88 log.debug(raw_df[raw_df.index.duplicated()])
---> 89 return raw_to_multiindex(raw_df, symbol)
90
91
/home/miguel/github_repos/Machine-Learning-Nanodegree-Capstone/utils/data_sources.py in raw_to_multiindex(raw_df, name)
101 for date in raw_df.index:
102 for col in raw_df.columns:
--> 103 data_df.loc[date, col][name] = raw_df.loc[date, col].copy()
104 return data_df
105
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in __setitem__(self, key, value)
2427 else:
2428 # set column
-> 2429 self._set_item(key, value)
2430
2431 def _setitem_slice(self, key, value):
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _set_item(self, key, value)
2493
2494 self._ensure_valid_index(value)
-> 2495 value = self._sanitize_column(key, value)
2496 NDFrame._set_item(self, key, value)
2497
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in _sanitize_column(self, key, value, broadcast)
2643
2644 if isinstance(value, Series):
-> 2645 value = reindexer(value)
2646
2647 elif isinstance(value, DataFrame):
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
2635 # duplicate axis
2636 if not value.index.is_unique:
-> 2637 raise e
2638
2639 # other
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/frame.py in reindexer(value)
2630 # GH 4107
2631 try:
-> 2632 value = value.reindex(self.index)._values
2633 except Exception as e:
2634
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/series.py in reindex(self, index, **kwargs)
2424 @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
2425 def reindex(self, index=None, **kwargs):
-> 2426 return super(Series, self).reindex(index=index, **kwargs)
2427
2428 @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
2402 # perform the reindex on the axes
2403 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 2404 fill_value, copy).__finalize__(self)
2405
2406 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/generic.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
2415 ax = self._get_axis(a)
2416 new_index, indexer = ax.reindex(labels, level=level, limit=limit,
-> 2417 tolerance=tolerance, method=method)
2418
2419 axis = self._get_axis_number(a)
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in reindex(self, target, method, level, limit, tolerance)
2836 raise ValueError("cannot reindex a non-unique index "
2837 "with a method or limit")
-> 2838 indexer, missing = self.get_indexer_non_unique(target)
2839
2840 if preserve_names and target.nlevels == 1 and target.name != self.name:
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_indexer_non_unique(self, target)
2657 tgt_values = target._values
2658
-> 2659 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
2660 return Index(indexer), missing
2661
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_indexer_non_unique (pandas/_libs/index.c:7681)()
TypeError: 'NoneType' object is not iterable
In [78]:
symbol='SPY'
import datetime as dt
from pandas_datareader import data
raw_df = data.DataReader(name=symbol,
data_source=ds.DATA_SOURCE,
start=ds.START_DATE,
end=ds.START_DATE + dt.timedelta(days=2000))
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jan+22%2C+1993&enddate=Jul+15%2C+1998&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
In [79]:
raw_df
Out[79]:
Open
High
Low
Close
Volume
Date
2016-10-20
213.87
214.53
213.11
213.88
73639830
2016-10-21
212.96
214.08
212.76
213.98
89089092
2016-10-24
215.00
215.32
214.48
214.89
60146592
2016-10-25
214.68
214.98
213.98
214.17
66542329
2016-10-26
213.21
214.42
212.93
213.74
75705478
2016-10-27
214.58
214.62
213.08
213.17
77220213
2016-10-28
213.14
213.93
211.71
212.54
140623183
2016-10-31
212.93
213.19
212.36
212.55
61272507
2016-11-01
212.93
212.99
209.60
211.01
122781818
2016-11-02
210.65
211.10
209.23
209.74
103330806
2016-11-03
209.99
210.24
208.46
208.78
88939346
2016-11-04
208.91
209.89
208.38
208.55
109122059
2016-11-07
211.45
213.19
211.30
213.15
109794861
2016-11-08
212.69
214.77
212.38
214.11
106772138
2016-11-09
212.37
217.10
212.34
216.38
258428972
2016-11-10
217.30
218.31
215.22
216.92
172113313
2016-11-11
216.08
216.70
215.32
216.42
100552732
2016-11-14
217.03
217.27
215.72
216.59
94579982
2016-11-15
217.04
218.28
216.80
218.28
91652580
2016-11-16
217.56
218.14
217.42
217.87
65617697
2016-11-17
218.05
219.06
217.92
218.99
69797191
2016-11-18
219.07
219.27
218.29
218.50
86265751
2016-11-21
219.17
220.18
219.00
220.15
72402638
2016-11-22
220.51
220.79
219.73
220.58
67428957
2016-11-23
219.98
220.76
219.75
220.70
56620237
2016-11-25
221.10
221.56
221.01
221.52
37872255
2016-11-28
221.16
221.48
220.36
220.48
76572511
2016-11-29
220.52
221.44
220.17
220.91
69886690
2016-11-30
221.63
221.82
220.31
220.38
113291793
2016-12-01
220.73
220.73
219.15
219.57
79040487
...
...
...
...
...
...
2017-09-07
247.25
247.27
246.40
246.87
58034730
2017-09-08
246.54
247.11
246.30
246.58
63832825
2017-09-11
248.04
249.30
248.02
249.21
71364848
2017-09-12
249.63
250.09
249.42
250.05
56896027
2017-09-13
249.72
250.21
249.59
250.17
59228002
2017-09-14
249.80
250.32
249.60
250.09
95446349
2017-09-15
248.69
249.29
248.57
249.19
95432382
2017-09-18
249.61
250.12
249.28
249.72
46235238
2017-09-19
250.00
250.07
249.60
249.97
47108148
2017-09-20
250.07
250.19
248.92
250.06
59574083
2017-09-21
249.88
249.98
249.18
249.39
48211398
2017-09-22
249.05
249.63
249.02
249.44
51214032
2017-09-25
249.15
249.55
248.08
248.93
57064357
2017-09-26
249.42
249.70
248.80
249.08
54081959
2017-09-27
249.88
250.49
248.87
250.05
71852148
2017-09-28
249.73
250.44
249.63
250.35
44778841
2017-09-29
250.34
251.32
250.13
251.23
85578002
2017-10-02
251.49
252.32
251.29
252.32
59022985
2017-10-03
252.46
252.89
252.23
252.86
66810169
2017-10-04
252.69
253.44
252.56
253.16
55953619
2017-10-05
253.54
254.68
253.20
254.66
63522757
2017-10-06
254.15
254.70
253.85
254.37
80645998
2017-10-09
254.63
254.70
253.65
253.95
35803138
2017-10-10
254.60
255.05
253.98
254.62
43057363
2017-10-11
254.51
254.64
254.32
254.32
2963114
2017-10-12
254.66
255.06
254.36
254.64
47065144
2017-10-13
255.14
255.27
254.64
254.95
54800435
2017-10-16
255.21
255.51
254.82
255.29
38221675
2017-10-17
255.23
255.52
254.98
255.47
31560964
2017-10-18
255.90
255.95
255.50
255.72
40888330
251 rows × 5 columns
In [80]:
raw_2_df = data.DataReader(name=symbol,
data_source=ds.DATA_SOURCE,
start=ds.START_DATE + dt.timedelta(days=2000),
end=ds.START_DATE + dt.timedelta(days=4000))
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): www.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://www.google.com:80 "GET /finance/historical?q=SPY&startdate=Jul+15%2C+1998&enddate=Jan+05%2C+2004&output=csv HTTP/1.1" 302 262
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTP connection (1): finance.google.com
DEBUG:requests.packages.urllib3.connectionpool:http://finance.google.com:80 "GET /finance/historical?q=SPY&output=csv HTTP/1.1" 200 None
In [81]:
raw_2_df
Out[81]:
Open
High
Low
Close
Volume
Date
2016-10-21
212.96
214.08
212.76
213.98
89089092
2016-10-24
215.00
215.32
214.48
214.89
60146592
2016-10-25
214.68
214.98
213.98
214.17
66542329
2016-10-26
213.21
214.42
212.93
213.74
75705478
2016-10-27
214.58
214.62
213.08
213.17
77220213
2016-10-28
213.14
213.93
211.71
212.54
140623183
2016-10-31
212.93
213.19
212.36
212.55
61272507
2016-11-01
212.93
212.99
209.60
211.01
122781818
2016-11-02
210.65
211.10
209.23
209.74
103330806
2016-11-03
209.99
210.24
208.46
208.78
88939346
2016-11-04
208.91
209.89
208.38
208.55
109122059
2016-11-07
211.45
213.19
211.30
213.15
109794861
2016-11-08
212.69
214.77
212.38
214.11
106772138
2016-11-09
212.37
217.10
212.34
216.38
258428972
2016-11-10
217.30
218.31
215.22
216.92
172113313
2016-11-11
216.08
216.70
215.32
216.42
100552732
2016-11-14
217.03
217.27
215.72
216.59
94579982
2016-11-15
217.04
218.28
216.80
218.28
91652580
2016-11-16
217.56
218.14
217.42
217.87
65617697
2016-11-17
218.05
219.06
217.92
218.99
69797191
2016-11-18
219.07
219.27
218.29
218.50
86265751
2016-11-21
219.17
220.18
219.00
220.15
72402638
2016-11-22
220.51
220.79
219.73
220.58
67428957
2016-11-23
219.98
220.76
219.75
220.70
56620237
2016-11-25
221.10
221.56
221.01
221.52
37872255
2016-11-28
221.16
221.48
220.36
220.48
76572511
2016-11-29
220.52
221.44
220.17
220.91
69886690
2016-11-30
221.63
221.82
220.31
220.38
113291793
2016-12-01
220.73
220.73
219.15
219.57
79040487
2016-12-02
219.67
220.25
219.26
219.68
74840347
...
...
...
...
...
...
2017-09-07
247.25
247.27
246.40
246.87
58034730
2017-09-08
246.54
247.11
246.30
246.58
63832825
2017-09-11
248.04
249.30
248.02
249.21
71364848
2017-09-12
249.63
250.09
249.42
250.05
56896027
2017-09-13
249.72
250.21
249.59
250.17
59228002
2017-09-14
249.80
250.32
249.60
250.09
95446349
2017-09-15
248.69
249.29
248.57
249.19
95432382
2017-09-18
249.61
250.12
249.28
249.72
46235238
2017-09-19
250.00
250.07
249.60
249.97
47108148
2017-09-20
250.07
250.19
248.92
250.06
59574083
2017-09-21
249.88
249.98
249.18
249.39
48211398
2017-09-22
249.05
249.63
249.02
249.44
51214032
2017-09-25
249.15
249.55
248.08
248.93
57064357
2017-09-26
249.42
249.70
248.80
249.08
54081959
2017-09-27
249.88
250.49
248.87
250.05
71852148
2017-09-28
249.73
250.44
249.63
250.35
44778841
2017-09-29
250.34
251.32
250.13
251.23
85578002
2017-10-02
251.49
252.32
251.29
252.32
59022985
2017-10-03
252.46
252.89
252.23
252.86
66810169
2017-10-04
252.69
253.44
252.56
253.16
55953619
2017-10-05
253.54
254.68
253.20
254.66
63522757
2017-10-06
254.15
254.70
253.85
254.37
80645998
2017-10-09
254.63
254.70
253.65
253.95
35803138
2017-10-10
254.60
255.05
253.98
254.62
43057363
2017-10-11
254.51
254.64
254.32
254.32
2963114
2017-10-12
254.66
255.06
254.36
254.64
47065144
2017-10-13
255.14
255.27
254.64
254.95
54800435
2017-10-16
255.21
255.51
254.82
255.29
38221675
2017-10-17
255.23
255.52
254.98
255.47
31560964
2017-10-18
255.90
255.95
255.50
255.72
40888330
250 rows × 5 columns
In [95]:
filtered_pred_df = pred_df.loc[:,pred_df.columns != 'RIG']
print(pred_df.shape)
filtered_pred_df.shape
(6024, 503)
Out[95]:
(6024, 502)
In [97]:
description_f_df = filtered_pred_df.describe()
description_f_df
Out[97]:
SPY
MMM
ABT
ABBV
ACN
ATVI
AYI
ADBE
AMD
AAP
...
XEL
XRX
XLNX
XL
XYL
YHOO
YUM
ZBH
ZION
ZTS
count
6024.000000
6024.000000
6024.000000
1023.000000
3886.000000
5837.000000
3798.000000
6023.000000
6022.000000
3797.000000
...
6024.000000
6024.000000
6023.000000
112.000000
1312.000000
5214.000000
4853.000000
3882.000000
6023.000000
987.000000
mean
120.379515
74.138275
23.928069
55.164673
49.421904
8.717500
69.421403
29.287536
15.965330
64.918667
...
23.993572
16.756667
28.025208
35.003839
33.941387
24.188189
36.630474
70.848998
38.208409
40.142432
std
44.215695
38.612950
10.034862
8.991563
28.829258
9.591969
64.673705
24.356283
13.338299
47.194735
...
6.198760
11.937684
16.451688
1.585963
7.540993
16.424537
25.370764
23.899683
21.335254
7.749647
min
43.410000
23.620000
5.750000
33.000000
11.850000
0.290000
9.160000
2.060000
1.620000
12.530000
...
5.660000
4.170000
2.290000
32.810000
23.000000
0.660000
5.940000
25.020000
6.480000
28.400000
25%
92.847500
44.802500
18.840000
48.835000
26.330000
1.130000
27.815000
7.410000
5.870000
32.630000
...
20.250000
9.487500
15.425000
33.707500
27.220000
13.230000
13.950000
53.665000
20.625000
32.170000
50%
119.540000
71.500000
23.420000
56.230000
38.925000
6.040000
44.950000
27.030000
14.030000
42.300000
...
23.230000
13.330000
26.930000
34.320000
34.670000
21.770000
29.480000
68.465000
30.900000
42.370000
75%
141.412500
86.860000
27.720000
62.220000
72.365000
12.030000
71.472500
37.785000
22.347500
82.060000
...
27.460000
17.660000
38.485000
36.425000
37.415000
34.230000
63.060000
84.515000
55.100000
47.145000
max
227.760000
181.420000
51.200000
71.230000
125.400000
45.470000
279.150000
110.810000
94.810000
200.380000
...
45.330000
63.690000
97.940000
38.320000
54.750000
118.750000
94.880000
133.090000
88.280000
55.380000
8 rows × 502 columns
In [99]:
description_f_df.loc['mean'].plot()
plt.figure()
description_f_df.loc['std'].plot()
Out[99]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fb991248160>
In [117]:
description_f_df.loc['mean'].hist(bins=100)
plt.title('Mean Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')
plt.figure()
description_f_df.loc['std'].hist(bins=100)
plt.title('Standard Deviation of the Close price')
plt.xlabel('Std value')
plt.ylabel('Number of symbols')
plt.figure()
description_f_df.loc['max'].hist(bins=100)
plt.title('Maximum Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')
Out[117]:
<matplotlib.text.Text at 0x7fb992c66be0>
In [104]:
description_f_df.loc['mean'].argmax()
Out[104]:
'AIG'
In [119]:
description_f_df.loc['max'].argmax()
Out[119]:
'AIG'
American International Group, Inc. provides insurance products for commercial, institutional, and individual customers primarily in the United States, Europe, and Japan. The companys Commercial Insurance segment offers general liability, environmental, commercial automobile liability, workers compensation, excess casualty, and crisis management insurance products, as well as various risk-sharing and other customized structured programs; commercial, industrial, and energy-related property insurance; aerospace, political risk, trade credit, surety, and marine insurance; and various insurance products for small and medium sized enterprises. It also provides professional liability insurance products for a range of businesses and risks, including directors and officers liability, fidelity, employment practices, fiduciary liability, cybersecurity risk, kidnap and ransom, and errors and omissions insurance. This segment sells its products through independent retail and wholesale brokers. Its Consumer Insurance segment offers fixed annuities, fixed index annuities, variable annuities, and retail mutual funds; group mutual funds, fixed annuities, and variable annuities, as well as individual annuity and investment products, and financial planning and advisory services; and term life and universal life insurance. Its products include personal auto and property insurance, voluntary and sponsor-paid personal accident, and supplemental health products; travel insurance products; and extended warranty insurance. This segment sells its products through independent marketing organizations, independent insurance agents, financial advisors, direct marketing, banks, wirehouses, and broker-dealers. The company also provides stable value wrap products, and structured settlement and terminal funding annuities; and corporate- and bank-owned life insurance and guaranteed investment contracts. American International Group, Inc. was founded in 1919 and is based in New York, New York.
In [108]:
from utils import preprocessing as pp
no_missing_df = pp.drop_irrelevant_symbols(filtered_pred_df, 0.99)
print(no_missing_df.shape)
no_missing_df.head()
(6024, 286)
Out[108]:
SPY
MMM
ABT
ADBE
AMD
AES
AET
AFL
APD
ALK
...
HCN
WDC
WY
WHR
WFM
WMB
XEL
XRX
XLNX
ZION
date
1993-01-29
43.94
24.50
6.88
2.59
18.75
4.41
6.42
4.49
21.94
4.19
...
22.50
4.50
41.50
46.12
1.81
6.88
22.00
14.28
2.50
10.94
1993-02-01
44.25
24.69
6.88
2.72
19.12
4.53
6.64
4.52
22.38
4.19
...
23.00
4.50
42.00
46.50
1.78
7.00
22.19
14.09
2.62
11.06
1993-02-02
44.34
24.72
6.53
2.84
20.25
4.53
6.62
4.57
22.31
4.16
...
22.75
4.62
42.12
46.50
1.78
6.98
22.06
14.09
2.64
11.12
1993-02-03
44.81
25.19
6.91
2.70
20.50
4.49
6.50
4.65
22.69
4.22
...
23.25
4.69
42.00
47.12
1.81
6.84
22.38
14.03
2.68
11.25
1993-02-04
45.00
26.06
6.84
2.73
20.12
4.49
6.73
4.84
23.19
4.38
...
23.00
4.56
42.62
48.38
1.84
6.90
22.81
14.15
2.67
11.69
5 rows × 286 columns
In [120]:
missing_df = no_missing_df.isnull().sum() / no_missing_df.shape[0]
missing_df.hist(bins=200)
plt.xlabel('Missing data')
plt.ylabel('Number of symbols')
plt.axvline(x=0.01, color='r', label='1% missing data level')
plt.legend(loc='upper center')
Out[120]:
<matplotlib.legend.Legend at 0x7fb99359f710>
There are too many symbols for a scatter matrix
In [122]:
auto_data_df = data_df['SPY'].unstack()
print(auto_data_df.shape)
auto_data_df.head()
(6024, 5)
Out[122]:
feature
Close
High
Low
Open
Volume
date
1993-01-29
43.94
43.97
43.75
0.0
1003200.0
1993-02-01
44.25
44.25
43.97
0.0
480500.0
1993-02-02
44.34
44.38
44.12
0.0
201300.0
1993-02-03
44.81
44.84
44.38
0.0
529400.0
1993-02-04
45.00
45.09
44.88
0.0
531500.0
In [123]:
desc_auto_df = auto_data_df.describe()
desc_auto_df
Out[123]:
feature
Close
High
Low
Open
Volume
count
6024.000000
6005.000000
6005.000000
6005.000000
6.024000e+03
mean
120.379515
121.140626
119.524448
97.583151
5.931430e+07
std
44.215695
44.437438
44.109630
69.694832
7.937370e+07
min
43.410000
43.530000
42.810000
0.000000
0.000000e+00
25%
92.847500
93.720000
91.600000
0.000000
5.380350e+06
50%
119.540000
120.290000
118.770000
115.300000
3.417315e+07
75%
141.412500
142.300000
140.570000
141.170000
7.948068e+07
max
227.760000
228.340000
227.000000
227.410000
8.141804e+08
In [126]:
auto_data_df['Close'].hist(bins=100)
plt.title('Close price')
plt.xlabel('Price value')
plt.ylabel('Number of symbols')
plt.figure()
auto_data_df['Volume'].hist(bins=100)
plt.title('Volume')
plt.xlabel('Volume value')
plt.ylabel('Number of symbols')
Out[126]:
<matplotlib.text.Text at 0x7fb9924a5b00>
In [129]:
pd.scatter_matrix(auto_data_df[['Close', 'Volume']], alpha = 0.3, figsize = (14,8), diagonal = 'kde');
/home/miguel/anaconda3/envs/cap_env/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: pandas.scatter_matrix is deprecated. Use pandas.plotting.scatter_matrix instead
"""Entry point for launching an IPython kernel.
In [ ]:
Content source: mtasende/Machine-Learning-Nanodegree-Capstone
Similar notebooks: