In [1]:
import pandas as pd
import statsmodels.formula.api as sm
import numpy as np
import ggplot as gg

In [9]:
! pip install ggplot


Requirement already satisfied: ggplot in c:\users\dell\anaconda3\lib\site-packages
Requirement already satisfied: matplotlib in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: scipy in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: brewer2mpl in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: patsy>=0.4 in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: six in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: statsmodels in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: pandas in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: cycler in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: numpy in c:\users\dell\anaconda3\lib\site-packages (from ggplot)
Requirement already satisfied: python-dateutil in c:\users\dell\anaconda3\lib\site-packages (from matplotlib->ggplot)
Requirement already satisfied: pytz in c:\users\dell\anaconda3\lib\site-packages (from matplotlib->ggplot)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,>=1.5.6 in c:\users\dell\anaconda3\lib\site-packages (from matplotlib->ggplot)

In [2]:
anscombe=pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/anscombe.csv")

In [5]:
anscombe=anscombe.drop("Unnamed: 0",1)

In [6]:
anscombe


Out[6]:
x1 x2 x3 x4 y1 y2 y3 y4
0 10 10 10 8 8.04 9.14 7.46 6.58
1 8 8 8 8 6.95 8.14 6.77 5.76
2 13 13 13 8 7.58 8.74 12.74 7.71
3 9 9 9 8 8.81 8.77 7.11 8.84
4 11 11 11 8 8.33 9.26 7.81 8.47
5 14 14 14 8 9.96 8.10 8.84 7.04
6 6 6 6 8 7.24 6.13 6.08 5.25
7 4 4 4 19 4.26 3.10 5.39 12.50
8 12 12 12 8 10.84 9.13 8.15 5.56
9 7 7 7 8 4.82 7.26 6.42 7.91
10 5 5 5 8 5.68 4.74 5.73 6.89

In [7]:
np.mean(anscombe)


Out[7]:
x1    9.000000
x2    9.000000
x3    9.000000
x4    9.000000
y1    7.500909
y2    7.500909
y3    7.500000
y4    7.500909
dtype: float64

In [8]:
np.std(anscombe)


Out[8]:
x1    3.162278
x2    3.162278
x3    3.162278
x4    3.162278
y1    1.937024
y2    1.937109
y3    1.935933
y4    1.936081
dtype: float64

In [10]:
result1 = sm.ols(formula="y1 ~ x1 ", data=anscombe).fit()
result2 = sm.ols(formula="y2 ~ x2 ", data=anscombe).fit()
result3 = sm.ols(formula="y3 ~ x3 ", data=anscombe).fit()
result4 = sm.ols(formula="y4 ~ x4 ", data=anscombe).fit()

In [12]:
result1.summary()


C:\Users\Dell\Anaconda3\lib\site-packages\scipy\stats\stats.py:1327: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=11
  "anyway, n=%i" % int(n))
Out[12]:
OLS Regression Results
Dep. Variable: y1 R-squared: 0.667
Model: OLS Adj. R-squared: 0.629
Method: Least Squares F-statistic: 17.99
Date: Tue, 18 Apr 2017 Prob (F-statistic): 0.00217
Time: 20:21:59 Log-Likelihood: -16.841
No. Observations: 11 AIC: 37.68
Df Residuals: 9 BIC: 38.48
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 3.0001 1.125 2.667 0.026 0.456 5.544
x1 0.5001 0.118 4.241 0.002 0.233 0.767
Omnibus: 0.082 Durbin-Watson: 3.212
Prob(Omnibus): 0.960 Jarque-Bera (JB): 0.289
Skew: -0.122 Prob(JB): 0.865
Kurtosis: 2.244 Cond. No. 29.1

In [13]:
print(result1.params)
print(result2.params)
print(result3.params)
print(result4.params)


Intercept    3.000091
x1           0.500091
dtype: float64
Intercept    3.000909
x2           0.500000
dtype: float64
Intercept    3.002455
x3           0.499727
dtype: float64
Intercept    3.001727
x4           0.499909
dtype: float64

In [ ]:
print(result1.rsquared)
print(result2.rsquared)
print(result3.rsquared)
print(result4.rsquared)

In [14]:
%matplotlib inline

In [15]:
p = gg.ggplot(gg.aes(x='x1', y='y1'), data=anscombe)

In [16]:
p + gg.geom_point()


Out[16]:
<ggplot: (12797788)>

In [17]:
p2 = gg.ggplot(gg.aes(x='x2', y='y2'), data=anscombe)
p2 + gg.geom_point()


Out[17]:
<ggplot: (12841764)>

In [18]:
p3 = gg.ggplot(gg.aes(x='x3', y='y3'), data=anscombe)
p3 + gg.geom_point()


Out[18]:
<ggplot: (-9223372036841546566)>

In [19]:
p4= gg.ggplot(gg.aes(x='x4', y='y4'), data=anscombe)
p4 + gg.geom_point()


Out[19]:
<ggplot: (-9223372036841149899)>

In [ ]: