In [1]:
%autosave 0
from IPython.core.display import HTML, display
display(HTML('<style>.container { width:100%; !important }</style>'))


Autosave disabled

Simple Linear Regression with SciKit-Learn

We import the module pandas. This module implements so called data frames and is more convenient than the module csv when reading a csv file.


In [2]:
import pandas as pd

The data we want to read is contained in the csv file 'cars.csv'.


In [3]:
cars = pd.read_csv('cars.csv')
cars


Out[3]:
mpg cyl displacement hp weight acc year name
0 18.0 8 307.0 130.0 3504.0 12.0 70 chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693.0 11.5 70 buick skylark 320
2 18.0 8 318.0 150.0 3436.0 11.0 70 plymouth satellite
3 16.0 8 304.0 150.0 3433.0 12.0 70 amc rebel sst
4 17.0 8 302.0 140.0 3449.0 10.5 70 ford torino
5 15.0 8 429.0 198.0 4341.0 10.0 70 ford galaxie 500
6 14.0 8 454.0 220.0 4354.0 9.0 70 chevrolet impala
7 14.0 8 440.0 215.0 4312.0 8.5 70 plymouth fury iii
8 14.0 8 455.0 225.0 4425.0 10.0 70 pontiac catalina
9 15.0 8 390.0 190.0 3850.0 8.5 70 amc ambassador dpl
10 15.0 8 383.0 170.0 3563.0 10.0 70 dodge challenger se
11 14.0 8 340.0 160.0 3609.0 8.0 70 plymouth 'cuda 340
12 15.0 8 400.0 150.0 3761.0 9.5 70 chevrolet monte carlo
13 14.0 8 455.0 225.0 3086.0 10.0 70 buick estate wagon (sw)
14 24.0 4 113.0 95.0 2372.0 15.0 70 toyota corona mark ii
15 22.0 6 198.0 95.0 2833.0 15.5 70 plymouth duster
16 18.0 6 199.0 97.0 2774.0 15.5 70 amc hornet
17 21.0 6 200.0 85.0 2587.0 16.0 70 ford maverick
18 27.0 4 97.0 88.0 2130.0 14.5 70 datsun pl510
19 26.0 4 97.0 46.0 1835.0 20.5 70 volkswagen 1131 deluxe sedan
20 25.0 4 110.0 87.0 2672.0 17.5 70 peugeot 504
21 24.0 4 107.0 90.0 2430.0 14.5 70 audi 100 ls
22 25.0 4 104.0 95.0 2375.0 17.5 70 saab 99e
23 26.0 4 121.0 113.0 2234.0 12.5 70 bmw 2002
24 21.0 6 199.0 90.0 2648.0 15.0 70 amc gremlin
25 10.0 8 360.0 215.0 4615.0 14.0 70 ford f250
26 10.0 8 307.0 200.0 4376.0 15.0 70 chevy c20
27 11.0 8 318.0 210.0 4382.0 13.5 70 dodge d200
28 9.0 8 304.0 193.0 4732.0 18.5 70 hi 1200d
29 27.0 4 97.0 88.0 2130.0 14.5 71 datsun pl510
... ... ... ... ... ... ... ... ...
362 28.0 4 112.0 88.0 2605.0 19.6 82 chevrolet cavalier
363 27.0 4 112.0 88.0 2640.0 18.6 82 chevrolet cavalier wagon
364 34.0 4 112.0 88.0 2395.0 18.0 82 chevrolet cavalier 2-door
365 31.0 4 112.0 85.0 2575.0 16.2 82 pontiac j2000 se hatchback
366 29.0 4 135.0 84.0 2525.0 16.0 82 dodge aries se
367 27.0 4 151.0 90.0 2735.0 18.0 82 pontiac phoenix
368 24.0 4 140.0 92.0 2865.0 16.4 82 ford fairmont futura
369 36.0 4 105.0 74.0 1980.0 15.3 82 volkswagen rabbit l
370 37.0 4 91.0 68.0 2025.0 18.2 82 mazda glc custom l
371 31.0 4 91.0 68.0 1970.0 17.6 82 mazda glc custom
372 38.0 4 105.0 63.0 2125.0 14.7 82 plymouth horizon miser
373 36.0 4 98.0 70.0 2125.0 17.3 82 mercury lynx l
374 36.0 4 120.0 88.0 2160.0 14.5 82 nissan stanza xe
375 36.0 4 107.0 75.0 2205.0 14.5 82 honda accord
376 34.0 4 108.0 70.0 2245.0 16.9 82 toyota corolla
377 38.0 4 91.0 67.0 1965.0 15.0 82 honda civic
378 32.0 4 91.0 67.0 1965.0 15.7 82 honda civic (auto)
379 38.0 4 91.0 67.0 1995.0 16.2 82 datsun 310 gx
380 25.0 6 181.0 110.0 2945.0 16.4 82 buick century limited
381 38.0 6 262.0 85.0 3015.0 17.0 82 oldsmobile cutlass ciera (diesel)
382 26.0 4 156.0 92.0 2585.0 14.5 82 chrysler lebaron medallion
383 22.0 6 232.0 112.0 2835.0 14.7 82 ford granada l
384 32.0 4 144.0 96.0 2665.0 13.9 82 toyota celica gt
385 36.0 4 135.0 84.0 2370.0 13.0 82 dodge charger 2.2
386 27.0 4 151.0 90.0 2950.0 17.3 82 chevrolet camaro
387 27.0 4 140.0 86.0 2790.0 15.6 82 ford mustang gl
388 44.0 4 97.0 52.0 2130.0 24.6 82 vw pickup
389 32.0 4 135.0 84.0 2295.0 11.6 82 dodge rampage
390 28.0 4 120.0 79.0 2625.0 18.6 82 ford ranger
391 31.0 4 119.0 82.0 2720.0 19.4 82 chevy s-10

392 rows × 8 columns

We want to convert the columns containing mpg and displacement into NumPy arrays.


In [4]:
import numpy as np

X = np.array(cars['displacement'])
Y = np.array(cars['mpg'])

We convert cubic inches into litres.


In [5]:
X = 0.0163871 * X

In order to use SciKit-Learn we have to reshape the array X into a matrix.


In [7]:
X = np.reshape(X, (len(X), 1))
X


Out[7]:
array([[5.0308397 ],
       [5.735485  ],
       [5.2110978 ],
       [4.9816784 ],
       [4.9489042 ],
       [7.0300659 ],
       [7.4397434 ],
       [7.210324  ],
       [7.4561305 ],
       [6.390969  ],
       [6.2762593 ],
       [5.571614  ],
       [6.55484   ],
       [7.4561305 ],
       [1.8517423 ],
       [3.2446458 ],
       [3.2610329 ],
       [3.27742   ],
       [1.5895487 ],
       [1.5895487 ],
       [1.802581  ],
       [1.7534197 ],
       [1.7042584 ],
       [1.9828391 ],
       [3.2610329 ],
       [5.899356  ],
       [5.0308397 ],
       [5.2110978 ],
       [4.9816784 ],
       [1.5895487 ],
       [2.294194  ],
       [1.8517423 ],
       [3.8018072 ],
       [3.6870975 ],
       [4.096775  ],
       [4.096775  ],
       [3.8018072 ],
       [5.735485  ],
       [6.55484   ],
       [5.7518721 ],
       [5.2110978 ],
       [6.2762593 ],
       [6.55484   ],
       [6.55484   ],
       [4.2278718 ],
       [2.294194  ],
       [4.096775  ],
       [4.096775  ],
       [1.9992262 ],
       [1.9009036 ],
       [1.2945809 ],
       [1.4420648 ],
       [1.1634841 ],
       [1.1798712 ],
       [1.5895487 ],
       [1.4912261 ],
       [1.8517423 ],
       [1.59774225],
       [1.5895487 ],
       [2.294194  ],
       [1.9992262 ],
       [5.735485  ],
       [6.55484   ],
       [5.2110978 ],
       [5.7518721 ],
       [4.9816784 ],
       [7.0300659 ],
       [5.735485  ],
       [5.735485  ],
       [6.55484   ],
       [1.147097  ],
       [4.9816784 ],
       [5.0308397 ],
       [4.9489042 ],
       [5.2110978 ],
       [1.9828391 ],
       [1.9828391 ],
       [1.966452  ],
       [1.5731616 ],
       [1.9992262 ],
       [1.5895487 ],
       [1.966452  ],
       [1.6059358 ],
       [1.5895487 ],
       [5.735485  ],
       [4.9816784 ],
       [5.735485  ],
       [4.9489042 ],
       [5.2110978 ],
       [7.0300659 ],
       [6.55484   ],
       [5.7518721 ],
       [5.2110978 ],
       [7.210324  ],
       [7.4561305 ],
       [5.899356  ],
       [3.6870975 ],
       [4.096775  ],
       [3.8018072 ],
       [4.096775  ],
       [3.2446458 ],
       [1.5895487 ],
       [6.55484   ],
       [6.55484   ],
       [5.899356  ],
       [5.735485  ],
       [3.8018072 ],
       [1.5895487 ],
       [2.294194  ],
       [1.7698068 ],
       [1.147097  ],
       [1.9992262 ],
       [2.5400005 ],
       [1.6059358 ],
       [5.735485  ],
       [6.55484   ],
       [1.1143228 ],
       [1.9009036 ],
       [1.8681294 ],
       [1.9828391 ],
       [5.2110978 ],
       [1.9828391 ],
       [2.5563876 ],
       [5.735485  ],
       [3.2446458 ],
       [3.8018072 ],
       [4.096775  ],
       [1.2945809 ],
       [1.9992262 ],
       [1.1634841 ],
       [2.294194  ],
       [4.096775  ],
       [4.2278718 ],
       [3.6870975 ],
       [4.9489042 ],
       [5.735485  ],
       [5.2110978 ],
       [4.9489042 ],
       [4.9816784 ],
       [1.6059358 ],
       [1.2945809 ],
       [1.5895487 ],
       [1.2454196 ],
       [1.3601293 ],
       [1.474839  ],
       [1.474839  ],
       [1.9009036 ],
       [1.966452  ],
       [1.7698068 ],
       [1.2945809 ],
       [3.6870975 ],
       [4.096775  ],
       [4.096775  ],
       [4.096775  ],
       [6.55484   ],
       [5.735485  ],
       [5.2110978 ],
       [5.7518721 ],
       [3.7854201 ],
       [4.096775  ],
       [4.2278718 ],
       [3.6870975 ],
       [3.7854201 ],
       [4.2934202 ],
       [4.9489042 ],
       [1.5895487 ],
       [2.294194  ],
       [3.8018072 ],
       [2.294194  ],
       [2.1958714 ],
       [1.474839  ],
       [1.9500649 ],
       [2.8021941 ],
       [1.474839  ],
       [3.8018072 ],
       [1.8845165 ],
       [1.966452  ],
       [1.9828391 ],
       [1.9828391 ],
       [1.4912261 ],
       [1.7534197 ],
       [1.9009036 ],
       [2.294194  ],
       [1.6059358 ],
       [1.6550971 ],
       [4.9980655 ],
       [5.2110978 ],
       [4.9816784 ],
       [5.7518721 ],
       [3.6870975 ],
       [4.096775  ],
       [3.27742   ],
       [3.8018072 ],
       [1.3929035 ],
       [1.6059358 ],
       [1.474839  ],
       [1.4912261 ],
       [3.6870975 ],
       [4.096775  ],
       [4.096775  ],
       [4.2278718 ],
       [1.5895487 ],
       [1.3929035 ],
       [1.5895487 ],
       [2.294194  ],
       [2.130323  ],
       [5.2110978 ],
       [1.966452  ],
       [2.5563876 ],
       [2.7530328 ],
       [5.735485  ],
       [5.735485  ],
       [4.9489042 ],
       [5.2110978 ],
       [1.6059358 ],
       [1.8189681 ],
       [1.2945809 ],
       [1.9992262 ],
       [1.3929035 ],
       [4.9980655 ],
       [4.260646  ],
       [5.2110978 ],
       [4.9489042 ],
       [4.096775  ],
       [3.7854201 ],
       [3.6870975 ],
       [4.096775  ],
       [6.55484   ],
       [5.735485  ],
       [6.55484   ],
       [5.7518721 ],
       [1.5895487 ],
       [2.4744521 ],
       [1.5895487 ],
       [2.294194  ],
       [1.6059358 ],
       [1.6059358 ],
       [1.5895487 ],
       [1.5895487 ],
       [2.3925166 ],
       [1.9828391 ],
       [1.310968  ],
       [1.474839  ],
       [1.6059358 ],
       [1.2781938 ],
       [1.3929035 ],
       [1.4912261 ],
       [4.260646  ],
       [5.2110978 ],
       [4.9489042 ],
       [3.7854201 ],
       [3.27742   ],
       [3.27742   ],
       [2.294194  ],
       [3.6870975 ],
       [3.8018072 ],
       [3.7854201 ],
       [3.27742   ],
       [3.6870975 ],
       [4.2278718 ],
       [4.9980655 ],
       [3.7854201 ],
       [4.9489042 ],
       [5.2110978 ],
       [1.6059358 ],
       [2.1958714 ],
       [1.9500649 ],
       [1.7206455 ],
       [2.1958714 ],
       [2.5563876 ],
       [2.4744521 ],
       [1.9500649 ],
       [2.1467101 ],
       [2.6710973 ],
       [1.9828391 ],
       [2.6710973 ],
       [1.4584519 ],
       [1.6059358 ],
       [3.7854201 ],
       [3.27742   ],
       [2.294194  ],
       [3.8018072 ],
       [3.6870975 ],
       [4.9980655 ],
       [4.9489042 ],
       [5.7518721 ],
       [5.2110978 ],
       [5.735485  ],
       [5.7518721 ],
       [4.3753557 ],
       [5.899356  ],
       [1.4584519 ],
       [1.4092906 ],
       [1.6059358 ],
       [1.9828391 ],
       [2.9988393 ],
       [5.735485  ],
       [2.3105811 ],
       [4.260646  ],
       [1.7206455 ],
       [1.7206455 ],
       [1.3929035 ],
       [1.4912261 ],
       [2.4744521 ],
       [2.8349683 ],
       [2.8349683 ],
       [2.4744521 ],
       [1.6059358 ],
       [1.4584519 ],
       [1.6059358 ],
       [1.4092906 ],
       [2.4744521 ],
       [2.294194  ],
       [2.4744521 ],
       [3.6870975 ],
       [1.5895487 ],
       [2.1958714 ],
       [1.966452  ],
       [1.9500649 ],
       [1.7698068 ],
       [1.4092906 ],
       [2.5563876 ],
       [1.3929035 ],
       [1.474839  ],
       [1.474839  ],
       [1.9828391 ],
       [2.3925166 ],
       [1.4912261 ],
       [1.5895487 ],
       [1.4584519 ],
       [2.7530328 ],
       [1.147097  ],
       [1.9992262 ],
       [1.7534197 ],
       [2.2122585 ],
       [2.4744521 ],
       [2.5563876 ],
       [2.8349683 ],
       [2.2122585 ],
       [1.2945809 ],
       [1.4092906 ],
       [1.3273551 ],
       [1.5895487 ],
       [1.3929035 ],
       [1.4584519 ],
       [1.4912261 ],
       [1.7206455 ],
       [1.6059358 ],
       [1.6059358 ],
       [1.7206455 ],
       [1.7534197 ],
       [1.7698068 ],
       [1.9500649 ],
       [1.966452  ],
       [2.3105811 ],
       [2.3761295 ],
       [2.7530328 ],
       [2.3925166 ],
       [3.7854201 ],
       [5.735485  ],
       [3.27742   ],
       [3.6870975 ],
       [1.8353552 ],
       [1.8353552 ],
       [1.8353552 ],
       [1.8353552 ],
       [2.2122585 ],
       [2.4744521 ],
       [2.294194  ],
       [1.7206455 ],
       [1.4912261 ],
       [1.4912261 ],
       [1.7206455 ],
       [1.6059358 ],
       [1.966452  ],
       [1.7534197 ],
       [1.7698068 ],
       [1.4912261 ],
       [1.4912261 ],
       [1.4912261 ],
       [2.9660651 ],
       [4.2934202 ],
       [2.5563876 ],
       [3.8018072 ],
       [2.3597424 ],
       [2.2122585 ],
       [2.4744521 ],
       [2.294194  ],
       [1.5895487 ],
       [2.2122585 ],
       [1.966452  ],
       [1.9500649 ]])

We convert miles per gallon into kilometer per litre.


In [8]:
Y = 1.60934 / 3.78541 * Y

We convert kilometer per litre into litre per 100 kilometer.


In [9]:
Y = 100 / Y

We plot fuel consumption versus engine displacement.


In [10]:
import matplotlib.pyplot as plt
import seaborn           as sns
%matplotlib inline

plt.figure(figsize=(12, 10))
sns.set(style='darkgrid')
plt.scatter(X, Y, c='b') # 'b' is blue color
plt.xlabel('engine displacement in litres')
plt.ylabel('litre per 100 km')
plt.title('fuel consumption versus engine displacement')
plt.show()


We import the linear_modelfrom SciKit-Learn:


In [11]:
import sklearn.linear_model as lm

We create a linear model.


In [12]:
model = lm.LinearRegression()

We train this model using the data we have.


In [13]:
M = model.fit(X, Y)

The model M represents a linear relationship between X and Y of the form $$ \texttt{Y} = \vartheta_0 + \vartheta_1 \cdot \texttt{X} $$ We extract the coefficients $\vartheta_0$ and $\vartheta_1$.


In [14]:
ϑ0 = M.intercept_
ϑ0


Out[14]:
4.951240667279944

In [15]:
ϑ1 = M.coef_[0]
ϑ1


Out[15]:
1.9766520631043476

The values are, of course, the same values that we had already computed with the notebook Simple-Linear-Regression.ipynb. We plot the data together with the regression line.


In [16]:
xMax = max(X) + 0.2
plt.figure(figsize=(12, 10))
sns.set(style='darkgrid')
plt.scatter(X, Y, c='b')
plt.plot([0, xMax], [ϑ0, ϑ0 + ϑ1 * xMax], c='r')
plt.xlabel('engine displacement in cubic inches')
plt.ylabel('fuel consumption in litres per 100 km')
plt.title('Fuel Consumption versus Engine Displacement')
plt.show()



In [ ]: