Regression Fundamentals

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

data = pd.read_csv("data/bivariate.csv")

Case X W Y
0 A 16 48 100
1 B 14 47 92
2 C 16 45 88
3 D 12 45 95
4 E 18 46 98
5 F 18 46 101
6 G 13 47 97
7 H 16 48 98
8 I 18 49 110
9 J 22 49 124
10 K 18 50 102
11 L 19 51 115
12 M 16 52 92
13 N 16 52 102
14 O 22 50 104
15 P 12 51 85
16 Q 20 54 118
17 R 14 53 105
18 S 21 52 111
19 T 17 53 122

# Load data
dat = sm.datasets.get_rdataset("Guerry", "HistData").data

# Fit regression model (using the natural log of one of the regressors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()

# Inspect the results
print results.summary()