In [1]:
import subprocess
import os
import warnings
warnings.filterwarnings('ignore')
os.chdir(subprocess.getoutput("git rev-parse --show-toplevel"))
%matplotlib inline

In [2]:
from src import get_data, preprocess, utility, linear_model
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

S&P500 daily stock value get and watch


In [4]:
panel = get_data.symbols2daily_values(kinds='sandp500')
panel


Out[4]:
<class 'pandas.core.panel.Panel'>
Dimensions: 6 (items) x 1780 (major_axis) x 504 (minor_axis)
Items axis: Open to Adj Close
Major_axis axis: 2010-01-04 00:00:00 to 2017-01-27 00:00:00
Minor_axis axis: A to ZTS
  • dim-1: key(item axis)

In [6]:
panel.items


Out[6]:
Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], dtype='object')
  • dim-2: date index(monday-friday during 2010~2016)

In [7]:
panel.major_axis


Out[7]:
DatetimeIndex(['2010-01-04', '2010-01-05', '2010-01-06', '2010-01-07',
               '2010-01-08', '2010-01-11', '2010-01-12', '2010-01-13',
               '2010-01-14', '2010-01-15',
               ...
               '2017-01-13', '2017-01-17', '2017-01-18', '2017-01-19',
               '2017-01-20', '2017-01-23', '2017-01-24', '2017-01-25',
               '2017-01-26', '2017-01-27'],
              dtype='datetime64[ns]', name='Date', length=1780, freq=None)
  • dim-3: enterprise lists (S&P500 symbols)

In [8]:
panel.minor_axis


Out[8]:
Index(['A', 'AA', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACN', 'ADBE',
       ...
       'XLNX', 'XOM', 'XRAY', 'XRX', 'XYL', 'YHOO', 'YUM', 'ZBH', 'ZION',
       'ZTS'],
      dtype='object', length=504)

preprocessing

  • notation
    • $t$: time index
    • $i$: enterprise symbols index
    • $x_{t,i}$: $i$-th enterprise adj close (conitinuation) value at time $t$
  • to note change rate,
    • drop enterprise includes missing value
    • change rate: $c_{t,i} \leftarrow \log x_{t,i} - \log x_{t-1,i}$
  • all $c_{t,i}$ stats

In [6]:
adj_value = preprocess.filter_key_nan(panel, key='Adj Close')
pd.DataFrame(utility.np1(adj_value), columns=['all change rate stats']).describe()


Out[6]:
all change rate stats
count 832572.000000
mean 0.000514
std 0.017712
min -0.497378
25% -0.007713
50% 0.000639
75% 0.009030
max 0.481849
  • mean, variance distribution