notebook.community



In [38]:

    
# Tell IPython that we want to show matplotlib plots using the inline renderer.
%matplotlib inline

import pandas as pd
from pandas.stats.api import ols
import numpy as np



In [39]:

    
# Build some fake data on which to do a regression.
x_start = 50
x_stop = 150

# np.arange(X, Y) returns a numpy array containing integer values in [X,Y).
x_vals = np.arange(x_start, x_stop)

# Build fake observations by taking a linear model 
# and adding a random fuzz value to each entry.
base = 5.6 + (3 * x_vals)
observations_0 = base + 15 * np.random.randn(len(x_vals))
observations_1 = base + 15 * np.random.randn(len(x_vals))

df = pd.DataFrame(
    {
        'x': x_vals,
        'y0': observations_0,
        'y1': observations_1,
    },
    # This isn't strictly necessary, but will make it easier to align
    # our plots later.
    index=x_vals,
)
df









    Out[39]:






  
    
      
      x
      y0
      y1
    
  
  
    
      50 
        50
       172.854190
       145.772328
    
    
      51 
        51
       146.172851
       177.290998
    
    
      52 
        52
       150.585412
       166.771689
    
    
      53 
        53
       167.788975
       164.723109
    
    
      54 
        54
       192.526672
       176.700035
    
    
      55 
        55
       173.087062
       196.051053
    
    
      56 
        56
       169.319521
       179.012183
    
    
      57 
        57
       171.688592
       174.852734
    
    
      58 
        58
       169.765965
       192.957446
    
    
      59 
        59
       195.677395
       165.677555
    
    
      60 
        60
       211.965887
       187.194226
    
    
      61 
        61
       174.972141
       200.603028
    
    
      62 
        62
       214.138771
       192.090558
    
    
      63 
        63
       206.196159
       209.814485
    
    
      64 
        64
       191.253703
       225.309439
    
    
      65 
        65
       210.448703
       192.892762
    
    
      66 
        66
       211.215431
       206.644236
    
    
      67 
        67
       193.563510
       203.880360
    
    
      68 
        68
       197.270521
       201.113099
    
    
      69 
        69
       220.618016
       201.688256
    
    
      70 
        70
       207.151778
       196.817555
    
    
      71 
        71
       213.060158
       232.184963
    
    
      72 
        72
       247.754713
       211.653061
    
    
      73 
        73
       213.057432
       234.618208
    
    
      74 
        74
       226.267727
       226.889442
    
    
      75 
        75
       232.616644
       243.718635
    
    
      76 
        76
       238.005354
       220.478286
    
    
      77 
        77
       227.736492
       238.212674
    
    
      78 
        78
       263.640435
       261.701072
    
    
      79 
        79
       230.519727
       245.474255
    
    
      ...
      ...
      ...
      ...
    
    
      120
       120
       364.626304
       347.354081
    
    
      121
       121
       398.163848
       372.939704
    
    
      122
       122
       354.719352
       357.856101
    
    
      123
       123
       381.885688
       375.899306
    
    
      124
       124
       386.171630
       366.608254
    
    
      125
       125
       383.883295
       385.406969
    
    
      126
       126
       390.743554
       347.937944
    
    
      127
       127
       408.241602
       377.825537
    
    
      128
       128
       370.128210
       389.405398
    
    
      129
       129
       392.087396
       377.953093
    
    
      130
       130
       390.125402
       396.472032
    
    
      131
       131
       401.071693
       395.520709
    
    
      132
       132
       391.293586
       384.902955
    
    
      133
       133
       419.354889
       398.454692
    
    
      134
       134
       414.409858
       408.179517
    
    
      135
       135
       397.060690
       396.944569
    
    
      136
       136
       423.654914
       372.181352
    
    
      137
       137
       416.444577
       421.441720
    
    
      138
       138
       414.907222
       408.083432
    
    
      139
       139
       438.579730
       418.188859
    
    
      140
       140
       419.688440
       437.514931
    
    
      141
       141
       449.265980
       421.402262
    
    
      142
       142
       424.936533
       421.858118
    
    
      143
       143
       448.530916
       451.793766
    
    
      144
       144
       403.320395
       448.833260
    
    
      145
       145
       448.641680
       438.132560
    
    
      146
       146
       464.186444
       438.631212
    
    
      147
       147
       439.629830
       457.988423
    
    
      148
       148
       449.603870
       459.015180
    
    
      149
       149
       477.391707
       469.187296
    
  

100 rows × 3 columns



In [40]:

    
regression = ols(x=df.x, y=pd.concat([df.y0, df.y1]))
regression









    Out[40]:





-------------------------Summary of Regression Analysis-------------------------

Formula: Y ~ <x> + <intercept>

Number of Observations:         200
Number of Degrees of Freedom:   2

R-squared:         0.9741
Adj R-squared:     0.9740

Rmse:             14.0016

F-stat (1, 198):  7446.3785, p-value:     0.0000

Degrees of Freedom: model 1, resid 198

-----------------------Summary of Estimated Coefficients------------------------
      Variable       Coef    Std Err     t-stat    p-value    CI 2.5%   CI 97.5%
--------------------------------------------------------------------------------
             x     2.9597     0.0343      86.29     0.0000     2.8925     3.0269
     intercept     9.9445     3.5534       2.80     0.0056     2.9799    16.9092
---------------------------------End of Summary---------------------------------

Plot our data to get a visual sense of whether this is a reasonable model.



In [65]:

    
import matplotlib.pyplot as plt
from matplotlib import lines

axis = df.plot(
    x='x',
    y='y0',
    kind='scatter',
    # Everything after this is optional.
    marker='x',
    linewidth='1.5',
    color='red',
    xlim=(45, 155),
    ylim=(125, 485),
    figsize=(12, 7),
    label='Trial 0',
)
df.plot(
    x='x',
    y='y1',
    kind='scatter',
    ax=axis, # This tells matplotlib to add this plot on top of the previous one.
    # Everything after this is optional.
    marker='x',
    linewidth='1.5',
    color='blue',
    label='Trial 1',
)
ols_result.y_fitted.plot(
    ax=axis,
    color='purple',
    linewidth='2',
    # See https://docs.python.org/2/library/string.html#format-specification-mini-language
    # for an explanation of the format of the strings inside the curly braces here.
    label='Regression: Y = {m:.4f}X + {b:.5}'.format(
        m=regression.beta.x,
        b=regression.beta.intercept,
    )
)

plt.xlabel('My Awesome X Axis')
plt.ylabel('My Awesome Y Axis')

plt.legend(
    loc='upper left',
    scatterpoints=1,
)

plt.title('My Awesome Regression Analysis')









    Out[65]:





<matplotlib.text.Text at 0x110802bd0>

	x	y0	y1
50	50	172.854190	145.772328
51	51	146.172851	177.290998
52	52	150.585412	166.771689
53	53	167.788975	164.723109
54	54	192.526672	176.700035
55	55	173.087062	196.051053
56	56	169.319521	179.012183
57	57	171.688592	174.852734
58	58	169.765965	192.957446
59	59	195.677395	165.677555
60	60	211.965887	187.194226
61	61	174.972141	200.603028
62	62	214.138771	192.090558
63	63	206.196159	209.814485
64	64	191.253703	225.309439
65	65	210.448703	192.892762
66	66	211.215431	206.644236
67	67	193.563510	203.880360
68	68	197.270521	201.113099
69	69	220.618016	201.688256
70	70	207.151778	196.817555
71	71	213.060158	232.184963
72	72	247.754713	211.653061
73	73	213.057432	234.618208
74	74	226.267727	226.889442
75	75	232.616644	243.718635
76	76	238.005354	220.478286
77	77	227.736492	238.212674
78	78	263.640435	261.701072
79	79	230.519727	245.474255
...	...	...	...
120	120	364.626304	347.354081
121	121	398.163848	372.939704
122	122	354.719352	357.856101
123	123	381.885688	375.899306
124	124	386.171630	366.608254
125	125	383.883295	385.406969
126	126	390.743554	347.937944
127	127	408.241602	377.825537
128	128	370.128210	389.405398
129	129	392.087396	377.953093
130	130	390.125402	396.472032
131	131	401.071693	395.520709
132	132	391.293586	384.902955
133	133	419.354889	398.454692
134	134	414.409858	408.179517
135	135	397.060690	396.944569
136	136	423.654914	372.181352
137	137	416.444577	421.441720
138	138	414.907222	408.083432
139	139	438.579730	418.188859
140	140	419.688440	437.514931
141	141	449.265980	421.402262
142	142	424.936533	421.858118
143	143	448.530916	451.793766
144	144	403.320395	448.833260
145	145	448.641680	438.132560
146	146	464.186444	438.631212
147	147	439.629830	457.988423
148	148	449.603870	459.015180
149	149	477.391707	469.187296