In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
In [2]:
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/'
In [4]:
dframe_wine = pd.read_csv('winequality-red.csv', sep=';')
In [5]:
dframe_wine
Out[5]:
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
0
7.4
0.700
0.00
1.9
0.076
11.0
34.0
0.99780
3.51
0.56
9.4
5
1
7.8
0.880
0.00
2.6
0.098
25.0
67.0
0.99680
3.20
0.68
9.8
5
2
7.8
0.760
0.04
2.3
0.092
15.0
54.0
0.99700
3.26
0.65
9.8
5
3
11.2
0.280
0.56
1.9
0.075
17.0
60.0
0.99800
3.16
0.58
9.8
6
4
7.4
0.700
0.00
1.9
0.076
11.0
34.0
0.99780
3.51
0.56
9.4
5
5
7.4
0.660
0.00
1.8
0.075
13.0
40.0
0.99780
3.51
0.56
9.4
5
6
7.9
0.600
0.06
1.6
0.069
15.0
59.0
0.99640
3.30
0.46
9.4
5
7
7.3
0.650
0.00
1.2
0.065
15.0
21.0
0.99460
3.39
0.47
10.0
7
8
7.8
0.580
0.02
2.0
0.073
9.0
18.0
0.99680
3.36
0.57
9.5
7
9
7.5
0.500
0.36
6.1
0.071
17.0
102.0
0.99780
3.35
0.80
10.5
5
10
6.7
0.580
0.08
1.8
0.097
15.0
65.0
0.99590
3.28
0.54
9.2
5
11
7.5
0.500
0.36
6.1
0.071
17.0
102.0
0.99780
3.35
0.80
10.5
5
12
5.6
0.615
0.00
1.6
0.089
16.0
59.0
0.99430
3.58
0.52
9.9
5
13
7.8
0.610
0.29
1.6
0.114
9.0
29.0
0.99740
3.26
1.56
9.1
5
14
8.9
0.620
0.18
3.8
0.176
52.0
145.0
0.99860
3.16
0.88
9.2
5
15
8.9
0.620
0.19
3.9
0.170
51.0
148.0
0.99860
3.17
0.93
9.2
5
16
8.5
0.280
0.56
1.8
0.092
35.0
103.0
0.99690
3.30
0.75
10.5
7
17
8.1
0.560
0.28
1.7
0.368
16.0
56.0
0.99680
3.11
1.28
9.3
5
18
7.4
0.590
0.08
4.4
0.086
6.0
29.0
0.99740
3.38
0.50
9.0
4
19
7.9
0.320
0.51
1.8
0.341
17.0
56.0
0.99690
3.04
1.08
9.2
6
20
8.9
0.220
0.48
1.8
0.077
29.0
60.0
0.99680
3.39
0.53
9.4
6
21
7.6
0.390
0.31
2.3
0.082
23.0
71.0
0.99820
3.52
0.65
9.7
5
22
7.9
0.430
0.21
1.6
0.106
10.0
37.0
0.99660
3.17
0.91
9.5
5
23
8.5
0.490
0.11
2.3
0.084
9.0
67.0
0.99680
3.17
0.53
9.4
5
24
6.9
0.400
0.14
2.4
0.085
21.0
40.0
0.99680
3.43
0.63
9.7
6
25
6.3
0.390
0.16
1.4
0.080
11.0
23.0
0.99550
3.34
0.56
9.3
5
26
7.6
0.410
0.24
1.8
0.080
4.0
11.0
0.99620
3.28
0.59
9.5
5
27
7.9
0.430
0.21
1.6
0.106
10.0
37.0
0.99660
3.17
0.91
9.5
5
28
7.1
0.710
0.00
1.9
0.080
14.0
35.0
0.99720
3.47
0.55
9.4
5
29
7.8
0.645
0.00
2.0
0.082
8.0
16.0
0.99640
3.38
0.59
9.8
6
...
...
...
...
...
...
...
...
...
...
...
...
...
1569
6.2
0.510
0.14
1.9
0.056
15.0
34.0
0.99396
3.48
0.57
11.5
6
1570
6.4
0.360
0.53
2.2
0.230
19.0
35.0
0.99340
3.37
0.93
12.4
6
1571
6.4
0.380
0.14
2.2
0.038
15.0
25.0
0.99514
3.44
0.65
11.1
6
1572
7.3
0.690
0.32
2.2
0.069
35.0
104.0
0.99632
3.33
0.51
9.5
5
1573
6.0
0.580
0.20
2.4
0.075
15.0
50.0
0.99467
3.58
0.67
12.5
6
1574
5.6
0.310
0.78
13.9
0.074
23.0
92.0
0.99677
3.39
0.48
10.5
6
1575
7.5
0.520
0.40
2.2
0.060
12.0
20.0
0.99474
3.26
0.64
11.8
6
1576
8.0
0.300
0.63
1.6
0.081
16.0
29.0
0.99588
3.30
0.78
10.8
6
1577
6.2
0.700
0.15
5.1
0.076
13.0
27.0
0.99622
3.54
0.60
11.9
6
1578
6.8
0.670
0.15
1.8
0.118
13.0
20.0
0.99540
3.42
0.67
11.3
6
1579
6.2
0.560
0.09
1.7
0.053
24.0
32.0
0.99402
3.54
0.60
11.3
5
1580
7.4
0.350
0.33
2.4
0.068
9.0
26.0
0.99470
3.36
0.60
11.9
6
1581
6.2
0.560
0.09
1.7
0.053
24.0
32.0
0.99402
3.54
0.60
11.3
5
1582
6.1
0.715
0.10
2.6
0.053
13.0
27.0
0.99362
3.57
0.50
11.9
5
1583
6.2
0.460
0.29
2.1
0.074
32.0
98.0
0.99578
3.33
0.62
9.8
5
1584
6.7
0.320
0.44
2.4
0.061
24.0
34.0
0.99484
3.29
0.80
11.6
7
1585
7.2
0.390
0.44
2.6
0.066
22.0
48.0
0.99494
3.30
0.84
11.5
6
1586
7.5
0.310
0.41
2.4
0.065
34.0
60.0
0.99492
3.34
0.85
11.4
6
1587
5.8
0.610
0.11
1.8
0.066
18.0
28.0
0.99483
3.55
0.66
10.9
6
1588
7.2
0.660
0.33
2.5
0.068
34.0
102.0
0.99414
3.27
0.78
12.8
6
1589
6.6
0.725
0.20
7.8
0.073
29.0
79.0
0.99770
3.29
0.54
9.2
5
1590
6.3
0.550
0.15
1.8
0.077
26.0
35.0
0.99314
3.32
0.82
11.6
6
1591
5.4
0.740
0.09
1.7
0.089
16.0
26.0
0.99402
3.67
0.56
11.6
6
1592
6.3
0.510
0.13
2.3
0.076
29.0
40.0
0.99574
3.42
0.75
11.0
6
1593
6.8
0.620
0.08
1.9
0.068
28.0
38.0
0.99651
3.42
0.82
9.5
6
1594
6.2
0.600
0.08
2.0
0.090
32.0
44.0
0.99490
3.45
0.58
10.5
5
1595
5.9
0.550
0.10
2.2
0.062
39.0
51.0
0.99512
3.52
0.76
11.2
6
1596
6.3
0.510
0.13
2.3
0.076
29.0
40.0
0.99574
3.42
0.75
11.0
6
1597
5.9
0.645
0.12
2.0
0.075
32.0
44.0
0.99547
3.57
0.71
10.2
5
1598
6.0
0.310
0.47
3.6
0.067
18.0
42.0
0.99549
3.39
0.66
11.0
6
1599 rows × 12 columns
In [6]:
dframe_wine['alcohol'].mean()
Out[6]:
10.422983114446502
In [7]:
def max_to_min(arr):
return arr.max() - arr.min()
In [9]:
wino = dframe_wine.groupby('quality')
wino.describe()
Out[9]:
alcohol
chlorides
citric acid
density
fixed acidity
free sulfur dioxide
pH
residual sugar
sulphates
total sulfur dioxide
volatile acidity
quality
3
count
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
10.000000
mean
9.955000
0.122500
0.171000
0.997464
8.360000
11.000000
3.398000
2.635000
0.570000
24.900000
0.884500
std
0.818009
0.066241
0.250664
0.002002
1.770875
9.763879
0.144052
1.401596
0.122020
16.828877
0.331256
min
8.400000
0.061000
0.000000
0.994710
6.700000
3.000000
3.160000
1.200000
0.400000
9.000000
0.440000
25%
9.725000
0.079000
0.005000
0.996150
7.150000
5.000000
3.312500
1.875000
0.512500
12.500000
0.647500
50%
9.925000
0.090500
0.035000
0.997565
7.500000
6.000000
3.390000
2.100000
0.545000
15.000000
0.845000
75%
10.575000
0.143000
0.327500
0.998770
9.875000
14.500000
3.495000
3.100000
0.615000
42.500000
1.010000
max
11.000000
0.267000
0.660000
1.000800
11.600000
34.000000
3.630000
5.700000
0.860000
49.000000
1.580000
4
count
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
53.000000
mean
10.265094
0.090679
0.174151
0.996542
7.779245
12.264151
3.381509
2.694340
0.596415
36.245283
0.693962
std
0.934776
0.076192
0.201030
0.001575
1.626624
9.025926
0.181441
1.789436
0.239391
27.583374
0.220110
min
9.000000
0.045000
0.000000
0.993400
4.600000
3.000000
2.740000
1.300000
0.330000
7.000000
0.230000
25%
9.600000
0.067000
0.030000
0.995650
6.800000
6.000000
3.300000
1.900000
0.490000
14.000000
0.530000
50%
10.000000
0.080000
0.090000
0.996500
7.500000
11.000000
3.370000
2.100000
0.560000
26.000000
0.670000
75%
11.000000
0.089000
0.270000
0.997450
8.400000
15.000000
3.500000
2.800000
0.600000
49.000000
0.870000
max
13.100000
0.610000
1.000000
1.001000
12.500000
41.000000
3.900000
12.900000
2.000000
119.000000
1.130000
5
count
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
681.000000
mean
9.899706
0.092736
0.243686
0.997104
8.167254
16.983847
3.304949
2.528855
0.620969
56.513950
0.577041
std
0.736521
0.053707
0.180003
0.001589
1.563988
10.955446
0.150618
1.359753
0.171062
36.993116
0.164801
min
8.500000
0.039000
0.000000
0.992560
5.000000
3.000000
2.880000
1.200000
0.370000
6.000000
0.180000
25%
9.400000
0.074000
0.090000
0.996200
7.100000
9.000000
3.200000
1.900000
0.530000
26.000000
0.460000
50%
9.700000
0.081000
0.230000
0.997000
7.800000
15.000000
3.300000
2.200000
0.580000
47.000000
0.580000
75%
10.200000
0.094000
0.360000
0.997900
8.900000
23.000000
3.400000
2.600000
0.660000
84.000000
0.670000
max
14.900000
0.611000
0.790000
1.003150
15.900000
68.000000
3.740000
15.500000
1.980000
155.000000
1.330000
6
count
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
638.000000
mean
10.629519
0.084956
0.273824
0.996615
8.347179
15.711599
3.318072
2.477194
0.675329
40.869906
0.497484
std
1.049639
0.039563
0.195108
0.002000
1.797849
9.940911
0.153995
1.441576
0.158650
25.038250
0.160962
min
8.400000
0.034000
0.000000
0.990070
4.700000
1.000000
2.860000
0.900000
0.400000
6.000000
0.160000
25%
9.800000
0.068250
0.090000
0.995402
7.000000
8.000000
3.220000
1.900000
0.580000
23.000000
0.380000
50%
10.500000
0.078000
0.260000
0.996560
7.900000
14.000000
3.320000
2.200000
0.640000
35.000000
0.490000
75%
11.300000
0.088000
0.430000
0.997893
9.400000
21.000000
3.410000
2.500000
0.750000
54.000000
0.600000
max
14.000000
0.415000
0.780000
1.003690
14.300000
72.000000
4.010000
15.400000
1.950000
165.000000
1.040000
7
count
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
199.000000
mean
11.465913
0.076588
0.375176
0.996104
8.872362
14.045226
3.290754
2.720603
0.741256
35.020101
0.403920
std
0.961933
0.029456
0.194432
0.002176
1.992483
10.175255
0.150101
1.371509
0.135639
33.191206
0.145224
min
9.200000
0.012000
0.000000
0.990640
4.900000
3.000000
2.920000
1.200000
0.390000
7.000000
0.120000
25%
10.800000
0.062000
0.305000
0.994765
7.400000
6.000000
3.200000
2.000000
0.650000
17.500000
0.300000
50%
11.500000
0.073000
0.400000
0.995770
8.800000
11.000000
3.280000
2.300000
0.740000
27.000000
0.370000
75%
12.100000
0.087000
0.490000
0.997360
10.100000
18.000000
3.380000
2.750000
0.830000
43.000000
0.485000
max
14.000000
0.358000
0.760000
1.003200
15.600000
54.000000
3.780000
8.900000
1.360000
289.000000
0.915000
8
count
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
18.000000
mean
12.094444
0.068444
0.391111
0.995212
8.566667
13.277778
3.267222
2.577778
0.767778
33.444444
0.423333
std
1.224011
0.011678
0.199526
0.002378
2.119656
11.155613
0.200640
1.295038
0.115379
25.433240
0.144914
min
9.800000
0.044000
0.030000
0.990800
5.000000
3.000000
2.880000
1.400000
0.630000
12.000000
0.260000
25%
11.325000
0.062000
0.302500
0.994175
7.250000
6.000000
3.162500
1.800000
0.690000
16.000000
0.335000
50%
12.150000
0.070500
0.420000
0.994940
8.250000
7.500000
3.230000
2.100000
0.740000
21.500000
0.370000
75%
12.875000
0.075500
0.530000
0.997200
10.225000
16.500000
3.350000
2.600000
0.820000
43.000000
0.472500
max
14.000000
0.086000
0.720000
0.998800
12.600000
42.000000
3.720000
6.400000
1.100000
88.000000
0.850000
In [10]:
wino.agg(max_to_min)
Out[10]:
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
3
4.9
1.140
0.66
4.5
0.206
31.0
40.0
0.00609
0.47
0.46
2.6
4
7.9
0.900
1.00
11.6
0.565
38.0
112.0
0.00760
1.16
1.67
4.1
5
10.9
1.150
0.79
14.3
0.572
65.0
149.0
0.01059
0.86
1.61
6.4
6
9.6
0.880
0.78
14.5
0.381
71.0
159.0
0.01362
1.15
1.55
5.6
7
10.7
0.795
0.76
7.7
0.346
51.0
282.0
0.01256
0.86
0.97
4.8
8
7.6
0.590
0.69
5.0
0.042
39.0
76.0
0.00800
0.84
0.47
4.2
In [11]:
wino.agg('mean')
Out[11]:
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
3
8.360000
0.884500
0.171000
2.635000
0.122500
11.000000
24.900000
0.997464
3.398000
0.570000
9.955000
4
7.779245
0.693962
0.174151
2.694340
0.090679
12.264151
36.245283
0.996542
3.381509
0.596415
10.265094
5
8.167254
0.577041
0.243686
2.528855
0.092736
16.983847
56.513950
0.997104
3.304949
0.620969
9.899706
6
8.347179
0.497484
0.273824
2.477194
0.084956
15.711599
40.869906
0.996615
3.318072
0.675329
10.629519
7
8.872362
0.403920
0.375176
2.720603
0.076588
14.045226
35.020101
0.996104
3.290754
0.741256
11.465913
8
8.566667
0.423333
0.391111
2.577778
0.068444
13.277778
33.444444
0.995212
3.267222
0.767778
12.094444
In [12]:
dframe_wine['qual/alc ratio'] = dframe_wine['quality'] / dframe_wine['alcohol']
In [13]:
dframe_wine.head()
Out[13]:
fixed acidity
volatile acidity
citric acid
residual sugar
chlorides
free sulfur dioxide
total sulfur dioxide
density
pH
sulphates
alcohol
quality
qual/alc ratio
0
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
5
0.531915
1
7.8
0.88
0.00
2.6
0.098
25.0
67.0
0.9968
3.20
0.68
9.8
5
0.510204
2
7.8
0.76
0.04
2.3
0.092
15.0
54.0
0.9970
3.26
0.65
9.8
5
0.510204
3
11.2
0.28
0.56
1.9
0.075
17.0
60.0
0.9980
3.16
0.58
9.8
6
0.612245
4
7.4
0.70
0.00
1.9
0.076
11.0
34.0
0.9978
3.51
0.56
9.4
5
0.531915
In [14]:
dframe_wine.pivot_table(index=['quality'])
Out[14]:
alcohol
chlorides
citric acid
density
fixed acidity
free sulfur dioxide
pH
qual/alc ratio
residual sugar
sulphates
total sulfur dioxide
volatile acidity
quality
3
9.955000
0.122500
0.171000
0.997464
8.360000
11.000000
3.398000
0.303286
2.635000
0.570000
24.900000
0.884500
4
10.265094
0.090679
0.174151
0.996542
7.779245
12.264151
3.381509
0.392724
2.694340
0.596415
36.245283
0.693962
5
9.899706
0.092736
0.243686
0.997104
8.167254
16.983847
3.304949
0.507573
2.528855
0.620969
56.513950
0.577041
6
10.629519
0.084956
0.273824
0.996615
8.347179
15.711599
3.318072
0.569801
2.477194
0.675329
40.869906
0.497484
7
11.465913
0.076588
0.375176
0.996104
8.872362
14.045226
3.290754
0.614855
2.720603
0.741256
35.020101
0.403920
8
12.094444
0.068444
0.391111
0.995212
8.566667
13.277778
3.267222
0.668146
2.577778
0.767778
33.444444
0.423333
In [15]:
%matplotlib inline
In [16]:
dframe_wine.plot(kind='scatter', x='quality', y='alcohol')
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x1158bbd90>
In [ ]:
Content source: thatguyandy27/python-sandbox
Similar notebooks: