In [8]:
%matplotlib inline
import matplotlib
This is one of the simplest models to use, since it has uses linearly correlated data to 'predict' a value given
In [9]:
import pandas as pd
from sklearn import linear_model
import matplotlib.pyplot as plt
We will use pandas to handle reading of the data, pandas is pretty much the de facto standard for data manipulation in python.
In [13]:
df = pd.read_fwf('brain_body.txt')
x_values = df[['Brain']]
y_values = df[['Body']]
df.head()
Out[13]:
now lets train the model using the data
In [11]:
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")
body_regression = linear_model.LinearRegression()
body_regression.fit(x_values, y_values)
Out[11]:
In [25]:
fig = plt.figure()
plt.scatter(x_values, y_values)
plt.plot(x_values, body_regression.predict(x_values))
#add some axes and labelling
fig.suptitle('Linear Regression', fontsize=14, fontweight='bold')
ax = fig.add_subplot(111)
ax.set_title('Body vs Brain')
fig.subplots_adjust(top=0.85)
ax.set_xlabel('Body weight (kg)')
ax.set_ylabel('Brain weight (kg)')
plt.show()
In [ ]: