In [21]:
import pandas as pd

import platform
print 'python', platform.python_version()
print 'numpy', np.version.version
print 'pandas', pd.__version__

np.set_printoptions(linewidth =150)


python 2.7.3
numpy 1.7.1
pandas 0.13.0

In [22]:
smarket0 = pd.read_csv('data/Smarket.csv')
del smarket0['Unnamed: 0']
smarket0.head()


Out[22]:
Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
0 2001 0.381 -0.192 -2.624 -1.055 5.010 1.1913 0.959 Up
1 2001 0.959 0.381 -0.192 -2.624 -1.055 1.2965 1.032 Up
2 2001 1.032 0.959 0.381 -0.192 -2.624 1.4112 -0.623 Down
3 2001 -0.623 1.032 0.959 0.381 -0.192 1.2760 0.614 Up
4 2001 0.614 -0.623 1.032 0.959 0.381 1.2057 0.213 Up

5 rows × 9 columns


In [23]:
smarket0.Direction.unique()


Out[23]:
array(['Up', 'Down'], dtype=object)

In [24]:
smarket = smarket0.copy()
smarket['Direction'] = smarket.Direction == 'Up'
smarket.head()


Out[24]:
Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction
0 2001 0.381 -0.192 -2.624 -1.055 5.010 1.1913 0.959 True
1 2001 0.959 0.381 -0.192 -2.624 -1.055 1.2965 1.032 True
2 2001 1.032 0.959 0.381 -0.192 -2.624 1.4112 -0.623 False
3 2001 -0.623 1.032 0.959 0.381 -0.192 1.2760 0.614 True
4 2001 0.614 -0.623 1.032 0.959 0.381 1.2057 0.213 True

5 rows × 9 columns


In [25]:
outcome = ['Direction']
factors = [col for col in smarket.columns if col != outcome[0]] 
X = smarket[factors].as_matrix()
y = smarket[outcome].as_matrix().ravel()

In [26]:
_ = pd.tools.plotting.scatter_matrix(smarket, figsize=(14, 10))



In [27]:
from sklearn import linear_model

In [28]:
clf = linear_model.LogisticRegression()
clf.fit(X, y)
clf.coef_


Out[28]:
array([[ -8.99500259e-05,  -1.89286206e-02,  -2.43928135e-02,   4.97025653e-02,   1.29504251e-02,   8.07052367e-02,   2.16372795e-01,   9.34329171e+00]])

In [ ]: