In [13]:
import pandas as pd
import matplotlib.pyplot as plt

%pylab inline


Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python2.7/dist-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['close']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [14]:
# read data from csv file
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')
data = pd.read_csv("ARMH.csv", 
                   parse_dates = ['Date'],
                   index_col = 'Date',
                   date_parser = dateparse)
print data.info()
print "\n\n data shape: ", data.shape


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2466 entries, 2016-09-05 to 2007-03-26
Data columns (total 8 columns):
Open                       2466 non-null float64
High                       2466 non-null float64
Low                        2466 non-null float64
Close                      2466 non-null float64
Volume                     2466 non-null int64
Price to Earnings Ratio    1902 non-null float64
Price to Sales Ratio       1902 non-null float64
Unnamed: 8                 0 non-null float64
dtypes: float64(7), int64(1)
memory usage: 173.4 KB
None


 data shape:  (2466, 8)

In [15]:
# drop last coloumn
data.drop(data.columns[[7]], axis = 1, inplace = True)

data = data.iloc[::-1]

print data.info()
print data.shape
print data.head()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2466 entries, 2007-03-26 to 2016-09-05
Data columns (total 7 columns):
Open                       2466 non-null float64
High                       2466 non-null float64
Low                        2466 non-null float64
Close                      2466 non-null float64
Volume                     2466 non-null int64
Price to Earnings Ratio    1902 non-null float64
Price to Sales Ratio       1902 non-null float64
dtypes: float64(6), int64(1)
memory usage: 154.1 KB
None
(2466, 7)
            Open  High   Low  Close  Volume  Price to Earnings Ratio  \
Date                                                                   
2007-03-26  7.90  7.90  7.77   7.89  237167                      NaN   
2007-03-27  7.71  7.76  7.67   7.73  151323                      NaN   
2007-03-28  7.70  7.79  7.67   7.71  150269                      NaN   
2007-03-29  7.95  7.95  7.77   7.80  648457                      NaN   
2007-03-30  7.77  7.89  7.75   7.85  271083                      NaN   

            Price to Sales Ratio  
Date                              
2007-03-26                   NaN  
2007-03-27                   NaN  
2007-03-28                   NaN  
2007-03-29                   NaN  
2007-03-30                   NaN  

In [16]:
close = data['Close']

print close.head(10)


Date
2007-03-26    7.89
2007-03-27    7.73
2007-03-28    7.71
2007-03-29    7.80
2007-03-30    7.85
2007-04-02    7.84
2007-04-03    7.83
2007-04-04    7.82
2007-04-05    7.77
2007-04-06    7.77
Name: Close, dtype: float64

In [17]:
close.plot()


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f14f2c4c050>

In [18]:
close.diff().plot(title = 'diff')


Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f14f5295b50>

In [19]:
ts_log = np.log(close)
ts_log.diff().plot(title = 'log diff')


Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f14e841c110>

In [20]:
ts_sqrt = np.sqrt(close)
ts_sqrt.diff().plot(title = 'sqrt diff')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f14e82295d0>

In [21]:
from statsmodels.tsa.stattools import adfuller

def test_stationarity(ts):
    rol_mean = ts.rolling(window = 30, center = False).mean()
    rol_std = ts.rolling(window = 30, center = False).std()
    
    org = plt.plot(ts, color = 'blue', label = 'Original')
    mean = plt.plot(rol_mean, color = 'red', label = 'Rolling Mean')
    std = plt.plot(rol_std, color = 'black', label = 'Rolling Std')
    plt.legend(loc = 'best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block = False)
    
    # Dicky-Fuller Test
    dftest = adfuller(ts, autolag = 'AIC')
    dfoutput = pd.Series(dftest[0:4], index = ['Test Statistics', 'p-value', '#Lags Used', 'Number of Observation Used'])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print dfoutput

test_stationarity(close)


Test Statistics                 -0.102832
p-value                          0.949156
#Lags Used                       0.000000
Number of Observation Used    2465.000000
Critical Value (5%)             -2.862713
Critical Value (1%)             -3.433006
Critical Value (10%)            -2.567395
dtype: float64