# Linear Regression by Example

``````

In [6]:

%pylab inline
pylab.style.use('ggplot')
import pandas as pd

``````
``````

Populating the interactive namespace from numpy and matplotlib

``````
``````

In [7]:

``````
``````

Out[7]:

TV
Newspaper
Sales

1
230.1
37.8
69.2
22.1

2
44.5
39.3
45.1
10.4

3
17.2
45.9
69.3
9.3

4
151.5
41.3
58.5
18.5

5
180.8
10.8
58.4
12.9

``````

### Check for Collinearity

``````

In [8]:

import seaborn as sns
sns.pairplot(data.drop('Sales', axis=1))

``````
``````

Out[8]:

<seaborn.axisgrid.PairGrid at 0x28136469208>

``````
``````

In [9]:

# visualize the relationship between the features and the response using scatterplots
fig, axs = plt.subplots(1, 3, sharey=True)
data.plot(kind='scatter', x='TV', y='Sales', ax=axs[0], figsize=(16, 8))
data.plot(kind='scatter', x='Newspaper', y='Sales', ax=axs[2])

``````
``````

Out[9]:

<matplotlib.axes._subplots.AxesSubplot at 0x28137205f60>

``````
``````

In [13]:

fig, axs = pylab.subplots(1, 3, sharey=True, figsize=(16, 8))
sns.regplot(data=data, x='TV', y='Sales', ax=axs[0])
sns.regplot(data=data, x='Newspaper', y='Sales', ax=axs[2])

``````
``````

Out[13]:

<matplotlib.axes._subplots.AxesSubplot at 0x28137601630>

``````
``````

In [19]:

# this is using "formula notation" (similar to R)
import statsmodels.formula.api as smf

# create a fitted model in one line
lm1 = smf.ols(formula='Sales ~ TV', data=data).fit()

# print the coefficients
lm1.params

``````
``````

Out[19]:

Intercept    7.032594
TV           0.047537
dtype: float64

``````
``````

In [18]:

print('Spending \$100 on TV sales results in %s unit sales.' % (round(lm1.params['TV']*100)))

``````
``````

Spending \$100 on TV sales results in 5.0 unit sales.

``````
``````

In [20]:

lm1.rsquared

``````
``````

Out[20]:

0.61187505085007121

``````
``````

In [21]:

lm2 = smf.ols(formula='Sales ~ TV + Radio', data=data).fit()
lm2.params

``````
``````

Out[21]:

Intercept    2.921100
TV           0.045755
dtype: float64

``````
``````

In [22]:

lm2.rsquared

``````
``````

Out[22]:

0.89719426108289568

``````
``````

In [23]:

lm3 = smf.ols(formula='Sales ~ TV + Radio + Newspaper', data=data).fit()
lm3.params

``````
``````

Out[23]:

Intercept    2.938889
TV           0.045765
Newspaper   -0.001037
dtype: float64

``````
``````

In [24]:

lm3.rsquared

``````
``````

Out[24]:

0.89721063817895208

``````
``````

In [ ]:

``````