The dataset below records the average weight of the brain and body for a number of mammal species.
There are 62 rows of data. The 3 data columns include:
  I,  the index,
  A1, the brain weight;
  B,  the body weight.
We seek a model of the form: B = A1 * X1.
http://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt
In [1]:
    
import pandas as pd
import requests
%pylab inline
pylab.style.use('ggplot')
URL = 'http://people.sc.fsu.edu/~jburkardt/datasets/regression/x01.txt'
result = requests.get(URL)
data = [line.strip() for line in result.text.split('\n') if not line.startswith('#')]
    
    
In [2]:
    
data
    
    Out[2]:
In [3]:
    
ncols = int(data[0].split(' ')[0])
nrows = int(data[1].split(' ')[0])
col_slice = slice(2, ncols + 2)
columns = data[col_slice]
    
In [4]:
    
columns
    
    Out[4]:
In [5]:
    
row_slice = slice(ncols + 2, ncols + nrows+2)
rows = data[row_slice]
    
In [6]:
    
rows
    
    Out[6]:
In [7]:
    
from io import StringIO
import re
csv_data = re.sub(r'[ ]+', ',', '\n'.join(rows))
data_df = pd.read_csv(StringIO(csv_data), header=None, names=columns)
data_df.head(5)
    
    Out[7]:
In [9]:
    
data_df = data_df.rename(columns={'Body Weight': 'body_weight', 'Brain Weight': 'brain_weight'})
data_df.plot(kind='scatter', x='body_weight', y='brain_weight')
    
    Out[9]:
    
In [11]:
    
import statsmodels.formula.api as smf
results = smf.ols('brain_weight ~ body_weight', data=data_df).fit()
results.summary()
    
    Out[11]:
In [13]:
    
results.params
    
    Out[13]:
In [17]:
    
fitted_model = data_df['body_weight'] * results.params['body_weight'] + results.params['Intercept']
predicted_df = pd.concat(
    {'predicted_brain_weight': fitted_model, 
     'actual_brain_weight': data_df['brain_weight'],
     'body_weight': data_df['body_weight'],
    }, axis=1)
    
In [22]:
    
ax = predicted_df.plot(kind='scatter', x='body_weight', y='actual_brain_weight')
predicted_df.plot(ax=ax, kind='scatter', x='body_weight', y='predicted_brain_weight', color='red')
    
    Out[22]:
    
In [ ]: