In [ ]:
%pylab
%matplotlib inline
import pandas as pd
from pandas import DataFrame, Series

import json
import seaborn as sns

import IPython
from IPython.display import Image, display

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier

sns.set_style("darkgrid")
sns.set_palette("bright")

plt.rcParams['figure.figsize'] = (13, 9)
plt.rcParams['font.family'] = 'sans-serif'

import statsmodels.api as sm
import statsmodels.formula.api as smf

Toffee Ads


In [ ]:
advertdata = pd.read_table('../../DSUR/06/Advert.dat')
advertdata

In [ ]:
plt.scatter(np.arange(5), advertdata.adverts)
plt.plot([-1,5],[mean(advertdata.adverts),mean(advertdata.adverts)])
plt.scatter(np.arange(5), advertdata.packets, c="red")
plt.plot([-1,5],[mean(advertdata.packets),mean(advertdata.packets)], c="red", )
plt.show()

Covariance

$cov(x,y)=\frac{\Sigma(x_i-\bar{x})(y_i-\bar{y})}{N-1}$


In [ ]:
sum(
    (advertdata.adverts - mean(advertdata.adverts))* \
    (advertdata.packets - mean(advertdata.packets)) \
    ) \
 /(len(advertdata.adverts)-1)

Correlation Coefficient

$r=\frac{cov(x,y)}{s_x s_y}$


In [ ]:
4.25 / (std(advertdata.adverts, ddof=1) * std(advertdata.packets, ddof=1))

In [ ]:
print .8711**2 
advertdata.corr()

In [ ]:
advertdata.corr(method="spearman")

Exam Anxiety


In [ ]:
examdata = pd.read_table('../../DSUR/06/Exam Anxiety.dat')
examdata.head(10)

In [ ]:
examdata[['Exam','Anxiety','Revise']].corr()

In [ ]:
from scipy.stats import pearsonr
print(pearsonr(examdata.Exam, examdata.Anxiety))
print(pearsonr(examdata.Exam, examdata.Revise))
print(pearsonr(examdata.Revise, examdata.Anxiety))

In [ ]:
examdata[['Exam','Anxiety','Revise']].corr(method='spearman')

In [ ]:
from scipy.stats import spearmanr
print(spearmanr(examdata.Exam, examdata.Anxiety))
print(spearmanr(examdata.Exam, examdata.Revise))
print(spearmanr(examdata.Revise, examdata.Anxiety))

In [ ]: