In [2]:
%pylab inline
In [90]:
# Import libraries
from __future__ import absolute_import, division, print_function
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('/Users/omojumiller/mycode/tools')
import numpy as np
import pandas as pd
import scipy.stats as st
from tools import plot_features_by_target
# Graphing Libraries
import matplotlib.pyplot as pyplt
import seaborn as sns
sns.set_style("whitegrid")
# Configure for presentation
np.set_printoptions(threshold=50, linewidth=50)
import matplotlib as mpl
mpl.rc('font', size=16)
from IPython.display import display
In [91]:
df = pd.read_csv('data/Advertising.csv')
df.shape
Out[91]:
In [92]:
df.head()
Out[92]:
In [93]:
x_vars = ['TV', 'Radio', 'Newspaper']
y_vars=['Sales']
In [94]:
plot_features_by_target(df, x_vars, y_vars)
In [95]:
g = sns.PairGrid(df.ix[:, [1,2,3,4]], diag_sharey=False)
g.map_lower(sns.kdeplot, cmap="Blues_d")
g.map_upper(pyplt.scatter)
g.map_diag(sns.kdeplot, lw=3);
In [96]:
X = df.ix[:, [1,2,3]]
y = df.ix[:, [4]]
In [97]:
from sklearn.cross_validation import train_test_split
from sklearn import linear_model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
reg = linear_model.LinearRegression()
reg.fit(X_train, y_train)
y_preds = reg.predict(X_test)
In [102]:
print( "slope of regression is", reg.coef_)
print ("intercepts of regression is %.2f" % reg.intercept_)
print ("\n ********stats on dataset********\n")
print ("r-squared score on testing data: ", reg.score(X_test, y_test))
print ("r-squared score on training data: ", reg.score(X_train, y_train))
In [110]:
import glob
glob.glob('*_[0-9].*')
Out[110]:
In [104]:
ls
In [ ]: