In [2]:
import pandas as pd
import numpy as np
 
wine = pd.read_csv("winequality-red.csv", sep=";")
wine.head


Out[2]:
<bound method NDFrame.head of       fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0               7.4             0.700         0.00             1.9      0.076   
1               7.8             0.880         0.00             2.6      0.098   
2               7.8             0.760         0.04             2.3      0.092   
3              11.2             0.280         0.56             1.9      0.075   
4               7.4             0.700         0.00             1.9      0.076   
5               7.4             0.660         0.00             1.8      0.075   
6               7.9             0.600         0.06             1.6      0.069   
7               7.3             0.650         0.00             1.2      0.065   
8               7.8             0.580         0.02             2.0      0.073   
9               7.5             0.500         0.36             6.1      0.071   
10              6.7             0.580         0.08             1.8      0.097   
11              7.5             0.500         0.36             6.1      0.071   
12              5.6             0.615         0.00             1.6      0.089   
13              7.8             0.610         0.29             1.6      0.114   
14              8.9             0.620         0.18             3.8      0.176   
15              8.9             0.620         0.19             3.9      0.170   
16              8.5             0.280         0.56             1.8      0.092   
17              8.1             0.560         0.28             1.7      0.368   
18              7.4             0.590         0.08             4.4      0.086   
19              7.9             0.320         0.51             1.8      0.341   
20              8.9             0.220         0.48             1.8      0.077   
21              7.6             0.390         0.31             2.3      0.082   
22              7.9             0.430         0.21             1.6      0.106   
23              8.5             0.490         0.11             2.3      0.084   
24              6.9             0.400         0.14             2.4      0.085   
25              6.3             0.390         0.16             1.4      0.080   
26              7.6             0.410         0.24             1.8      0.080   
27              7.9             0.430         0.21             1.6      0.106   
28              7.1             0.710         0.00             1.9      0.080   
29              7.8             0.645         0.00             2.0      0.082   
...             ...               ...          ...             ...        ...   
1569            6.2             0.510         0.14             1.9      0.056   
1570            6.4             0.360         0.53             2.2      0.230   
1571            6.4             0.380         0.14             2.2      0.038   
1572            7.3             0.690         0.32             2.2      0.069   
1573            6.0             0.580         0.20             2.4      0.075   
1574            5.6             0.310         0.78            13.9      0.074   
1575            7.5             0.520         0.40             2.2      0.060   
1576            8.0             0.300         0.63             1.6      0.081   
1577            6.2             0.700         0.15             5.1      0.076   
1578            6.8             0.670         0.15             1.8      0.118   
1579            6.2             0.560         0.09             1.7      0.053   
1580            7.4             0.350         0.33             2.4      0.068   
1581            6.2             0.560         0.09             1.7      0.053   
1582            6.1             0.715         0.10             2.6      0.053   
1583            6.2             0.460         0.29             2.1      0.074   
1584            6.7             0.320         0.44             2.4      0.061   
1585            7.2             0.390         0.44             2.6      0.066   
1586            7.5             0.310         0.41             2.4      0.065   
1587            5.8             0.610         0.11             1.8      0.066   
1588            7.2             0.660         0.33             2.5      0.068   
1589            6.6             0.725         0.20             7.8      0.073   
1590            6.3             0.550         0.15             1.8      0.077   
1591            5.4             0.740         0.09             1.7      0.089   
1592            6.3             0.510         0.13             2.3      0.076   
1593            6.8             0.620         0.08             1.9      0.068   
1594            6.2             0.600         0.08             2.0      0.090   
1595            5.9             0.550         0.10             2.2      0.062   
1596            6.3             0.510         0.13             2.3      0.076   
1597            5.9             0.645         0.12             2.0      0.075   
1598            6.0             0.310         0.47             3.6      0.067   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                    11.0                  34.0  0.99780  3.51       0.56   
1                    25.0                  67.0  0.99680  3.20       0.68   
2                    15.0                  54.0  0.99700  3.26       0.65   
3                    17.0                  60.0  0.99800  3.16       0.58   
4                    11.0                  34.0  0.99780  3.51       0.56   
5                    13.0                  40.0  0.99780  3.51       0.56   
6                    15.0                  59.0  0.99640  3.30       0.46   
7                    15.0                  21.0  0.99460  3.39       0.47   
8                     9.0                  18.0  0.99680  3.36       0.57   
9                    17.0                 102.0  0.99780  3.35       0.80   
10                   15.0                  65.0  0.99590  3.28       0.54   
11                   17.0                 102.0  0.99780  3.35       0.80   
12                   16.0                  59.0  0.99430  3.58       0.52   
13                    9.0                  29.0  0.99740  3.26       1.56   
14                   52.0                 145.0  0.99860  3.16       0.88   
15                   51.0                 148.0  0.99860  3.17       0.93   
16                   35.0                 103.0  0.99690  3.30       0.75   
17                   16.0                  56.0  0.99680  3.11       1.28   
18                    6.0                  29.0  0.99740  3.38       0.50   
19                   17.0                  56.0  0.99690  3.04       1.08   
20                   29.0                  60.0  0.99680  3.39       0.53   
21                   23.0                  71.0  0.99820  3.52       0.65   
22                   10.0                  37.0  0.99660  3.17       0.91   
23                    9.0                  67.0  0.99680  3.17       0.53   
24                   21.0                  40.0  0.99680  3.43       0.63   
25                   11.0                  23.0  0.99550  3.34       0.56   
26                    4.0                  11.0  0.99620  3.28       0.59   
27                   10.0                  37.0  0.99660  3.17       0.91   
28                   14.0                  35.0  0.99720  3.47       0.55   
29                    8.0                  16.0  0.99640  3.38       0.59   
...                   ...                   ...      ...   ...        ...   
1569                 15.0                  34.0  0.99396  3.48       0.57   
1570                 19.0                  35.0  0.99340  3.37       0.93   
1571                 15.0                  25.0  0.99514  3.44       0.65   
1572                 35.0                 104.0  0.99632  3.33       0.51   
1573                 15.0                  50.0  0.99467  3.58       0.67   
1574                 23.0                  92.0  0.99677  3.39       0.48   
1575                 12.0                  20.0  0.99474  3.26       0.64   
1576                 16.0                  29.0  0.99588  3.30       0.78   
1577                 13.0                  27.0  0.99622  3.54       0.60   
1578                 13.0                  20.0  0.99540  3.42       0.67   
1579                 24.0                  32.0  0.99402  3.54       0.60   
1580                  9.0                  26.0  0.99470  3.36       0.60   
1581                 24.0                  32.0  0.99402  3.54       0.60   
1582                 13.0                  27.0  0.99362  3.57       0.50   
1583                 32.0                  98.0  0.99578  3.33       0.62   
1584                 24.0                  34.0  0.99484  3.29       0.80   
1585                 22.0                  48.0  0.99494  3.30       0.84   
1586                 34.0                  60.0  0.99492  3.34       0.85   
1587                 18.0                  28.0  0.99483  3.55       0.66   
1588                 34.0                 102.0  0.99414  3.27       0.78   
1589                 29.0                  79.0  0.99770  3.29       0.54   
1590                 26.0                  35.0  0.99314  3.32       0.82   
1591                 16.0                  26.0  0.99402  3.67       0.56   
1592                 29.0                  40.0  0.99574  3.42       0.75   
1593                 28.0                  38.0  0.99651  3.42       0.82   
1594                 32.0                  44.0  0.99490  3.45       0.58   
1595                 39.0                  51.0  0.99512  3.52       0.76   
1596                 29.0                  40.0  0.99574  3.42       0.75   
1597                 32.0                  44.0  0.99547  3.57       0.71   
1598                 18.0                  42.0  0.99549  3.39       0.66   

      alcohol  quality  
0         9.4        5  
1         9.8        5  
2         9.8        5  
3         9.8        6  
4         9.4        5  
5         9.4        5  
6         9.4        5  
7        10.0        7  
8         9.5        7  
9        10.5        5  
10        9.2        5  
11       10.5        5  
12        9.9        5  
13        9.1        5  
14        9.2        5  
15        9.2        5  
16       10.5        7  
17        9.3        5  
18        9.0        4  
19        9.2        6  
20        9.4        6  
21        9.7        5  
22        9.5        5  
23        9.4        5  
24        9.7        6  
25        9.3        5  
26        9.5        5  
27        9.5        5  
28        9.4        5  
29        9.8        6  
...       ...      ...  
1569     11.5        6  
1570     12.4        6  
1571     11.1        6  
1572      9.5        5  
1573     12.5        6  
1574     10.5        6  
1575     11.8        6  
1576     10.8        6  
1577     11.9        6  
1578     11.3        6  
1579     11.3        5  
1580     11.9        6  
1581     11.3        5  
1582     11.9        5  
1583      9.8        5  
1584     11.6        7  
1585     11.5        6  
1586     11.4        6  
1587     10.9        6  
1588     12.8        6  
1589      9.2        5  
1590     11.6        6  
1591     11.6        6  
1592     11.0        6  
1593      9.5        6  
1594     10.5        5  
1595     11.2        6  
1596     11.0        6  
1597     10.2        5  
1598     11.0        6  

[1599 rows x 12 columns]>

In [8]:
# http://pythondatascience.plavox.info/scikit-learn/%E7%B7%9A%E5%BD%A2%E5%9B%9E%E5%B8%B0
# sklearn.linear_model.LinearRegression クラスを読み込み
from sklearn import linear_model
clf = linear_model.LinearRegression()
 
# 説明変数に "density (濃度)" を利用
X = wine.loc[:, ['density']].as_matrix()
 
# 目的変数に "alcohol (アルコール度数)" を利用
Y = wine['alcohol'].as_matrix()
 
# 予測モデルを作成
clf.fit(X, Y)
 
# 回帰係数
print(clf.coef_)
 
# 切片 (誤差)
print(clf.intercept_)
 
# 決定係数
print(clf.score(X, Y))


# res [alcohol] = -280.16382307 × [density] + 289.675343383


[-280.16382307]
289.675343383
0.246194364397

In [9]:
# matplotlib パッケージを読み込み
import matplotlib.pyplot as plt
 
# 散布図
plt.scatter(X, Y)
 
# 回帰直線
plt.plot(X, clf.predict(X))


Out[9]:
[<matplotlib.lines.Line2D at 0x7f5994214fd0>]

In [ ]:


In [10]:
from sklearn import linear_model
clf = linear_model.LinearRegression()
 
# 説明変数に "quality (品質スコア以外すべて)" を利用
wine_except_quality = wine.drop("quality", axis=1)
X = wine_except_quality.as_matrix()
 
# 目的変数に "quality (品質スコア)" を利用
Y = wine['quality'].as_matrix()
 
# 予測モデルを作成
clf.fit(X, Y)
 
# 偏回帰係数
print(pd.DataFrame({"Name":wine_except_quality.columns,
                    "Coefficients":clf.coef_}).sort_values(by='Coefficients') )
 
# 切片 (誤差)
print(clf.intercept_)


    Coefficients                  Name
7     -17.881164               density
4      -1.874225             chlorides
1      -1.083590      volatile acidity
8      -0.413653                    pH
2      -0.182564           citric acid
6      -0.003265  total sulfur dioxide
5       0.004361   free sulfur dioxide
3       0.016331        residual sugar
0       0.024991         fixed acidity
10      0.276198               alcohol
9       0.916334             sulphates
21.9652084495

In [12]:
#[quality] = -17.881164 × [density] + -1.874225 × [chlorides] +
#            -1.083590 × [volatile acidity] + -0.413653 × [pH] + 
#            -0.182564 × [citric acid] + -0.003265 × [total sulfur dioxide] + 
#            0.004361 × [free sulfur dioxide] + 0.016331 × [residual sugar] + 
#            0.024991 × [fixed acidity] + 0.276198 × [alcohol] + 
#            0.916334 × [sulphates] + 21.9652084495

In [13]:
from sklearn import linear_model
clf = linear_model.LinearRegression()
 
# データフレームの各列を正規化
wine2 = wine.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))
wine2.head()
 
# 説明変数に "quality (品質スコア以外すべて)" を利用
wine2_except_quality = wine2.drop("quality", axis=1)
X = wine2_except_quality.as_matrix()
 
# 目的変数に "quality (品質スコア)" を利用
Y = wine2['quality'].as_matrix()
 
# 予測モデルを作成
clf.fit(X, Y)
 
# 偏回帰係数
print(pd.DataFrame({"Name":wine2_except_quality.columns,
                    "Coefficients":np.abs(clf.coef_)}).sort_values(by='Coefficients') )
 
# 切片 (誤差)
print(clf.intercept_)


    Coefficients                  Name
2       0.036513           citric acid
3       0.047687        residual sugar
7       0.048708               density
0       0.056479         fixed acidity
5       0.061931   free sulfur dioxide
8       0.105068                    pH
6       0.184775  total sulfur dioxide
4       0.224532             chlorides
9       0.306056             sulphates
1       0.316408      volatile acidity
10      0.359057               alcohol
1.27015586565e-15

In [ ]: