Regression Fundamentals



In [7]:

    
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf



In [8]:

    
cd ../../../









    



/home/wmmurrah/FOCAL



In [9]:

    
data = pd.read_csv("data/bivariate.csv")









    



---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-9-39938acec277> in <module>()
----> 1 data = pd.read_csv("data/bivariate.csv")

/home/wmmurrah/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    496                     skip_blank_lines=skip_blank_lines)
    497 
--> 498         return _read(filepath_or_buffer, kwds)
    499 
    500     parser_f.__name__ = name

/home/wmmurrah/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    273 
    274     # Create the parser.
--> 275     parser = TextFileReader(filepath_or_buffer, **kwds)
    276 
    277     if (nrows is not None) and (chunksize is not None):

/home/wmmurrah/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    588             self.options['has_index_names'] = kwds['has_index_names']
    589 
--> 590         self._make_engine(self.engine)
    591 
    592     def _get_options_with_defaults(self, engine):

/home/wmmurrah/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    729     def _make_engine(self, engine='c'):
    730         if engine == 'c':
--> 731             self._engine = CParserWrapper(self.f, **self.options)
    732         else:
    733             if engine == 'python':

/home/wmmurrah/anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1101         kwds['allow_leading_cols'] = self.index_col is not False
   1102 
-> 1103         self._reader = _parser.TextReader(src, **kwds)
   1104 
   1105         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3246)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6111)()

IOError: File data/bivariate.csv does not exist



In [6]:

    
data



In [ ]:

    
# Load data
dat = sm.datasets.get_rdataset("Guerry", "HistData").data

# Fit regression model (using the natural log of one of the regressors)
results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()

# Inspect the results
print results.summary()

	Case	X	W	Y
0	A	16	48	100
1	B	14	47	92
2	C	16	45	88
3	D	12	45	95
4	E	18	46	98
5	F	18	46	101
6	G	13	47	97
7	H	16	48	98
8	I	18	49	110
9	J	22	49	124
10	K	18	50	102
11	L	19	51	115
12	M	16	52	92
13	N	16	52	102
14	O	22	50	104
15	P	12	51	85
16	Q	20	54	118
17	R	14	53	105
18	S	21	52	111
19	T	17	53	122

	Case	X	W	Y
0	A	16	48	100
1	B	14	47	92
2	C	16	45	88
3	D	12	45	95
4	E	18	46	98
5	F	18	46	101
6	G	13	47	97
7	H	16	48	98
8	I	18	49	110
9	J	22	49	124
10	K	18	50	102
11	L	19	51	115
12	M	16	52	92
13	N	16	52	102
14	O	22	50	104
15	P	12	51	85
16	Q	20	54	118
17	R	14	53	105
18	S	21	52	111
19	T	17	53	122

	Case	X	W	Y
0	A	16	48	100
1	B	14	47	92
2	C	16	45	88
3	D	12	45	95
4	E	18	46	98
5	F	18	46	101
6	G	13	47	97
7	H	16	48	98
8	I	18	49	110
9	J	22	49	124
10	K	18	50	102
11	L	19	51	115
12	M	16	52	92
13	N	16	52	102
14	O	22	50	104
15	P	12	51	85
16	Q	20	54	118
17	R	14	53	105
18	S	21	52	111
19	T	17	53	122

	Case	X	W	Y
0	A	16	48	100
1	B	14	47	92
2	C	16	45	88
3	D	12	45	95
4	E	18	46	98
5	F	18	46	101
6	G	13	47	97
7	H	16	48	98
8	I	18	49	110
9	J	22	49	124
10	K	18	50	102
11	L	19	51	115
12	M	16	52	92
13	N	16	52	102
14	O	22	50	104
15	P	12	51	85
16	Q	20	54	118
17	R	14	53	105
18	S	21	52	111
19	T	17	53	122