In [8]:
%matplotlib inline
import matplotlib

Linear Regression

This is one of the simplest models to use, since it has uses linearly correlated data to 'predict' a value given


In [9]:
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt

We will use pandas to handle reading of the data, pandas is pretty much the de facto standard for data manipulation in python.


In [13]:
df = pd.read_fwf('brain_body.txt')
x_values = df[['Brain']]
y_values = df[['Body']]
df.head()


Out[13]:
Brain Body
0 3.385 44.5
1 0.480 15.5
2 1.350 8.1
3 465.000 423.0
4 36.330 119.5

now lets train the model using the data


In [11]:
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

body_regression = linear_model.LinearRegression()
body_regression.fit(x_values, y_values)


Out[11]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [25]:
fig = plt.figure()

plt.scatter(x_values, y_values)
plt.plot(x_values, body_regression.predict(x_values))

#add some axes and labelling
fig.suptitle('Linear Regression', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)

ax.set_title('Body vs Brain')
fig.subplots_adjust(top=0.85)
ax.set_xlabel('Body weight (kg)')
ax.set_ylabel('Brain weight (kg)')

plt.show()



In [ ]: