In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame, Series

In [3]:
dframe_wine = pd.read_csv('winequality-red.csv', sep=';')
dframe_wine


Out[3]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality
0 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5
1 7.8 0.880 0.00 2.6 0.098 25.0 67.0 0.99680 3.20 0.68 9.8 5
2 7.8 0.760 0.04 2.3 0.092 15.0 54.0 0.99700 3.26 0.65 9.8 5
3 11.2 0.280 0.56 1.9 0.075 17.0 60.0 0.99800 3.16 0.58 9.8 6
4 7.4 0.700 0.00 1.9 0.076 11.0 34.0 0.99780 3.51 0.56 9.4 5
5 7.4 0.660 0.00 1.8 0.075 13.0 40.0 0.99780 3.51 0.56 9.4 5
6 7.9 0.600 0.06 1.6 0.069 15.0 59.0 0.99640 3.30 0.46 9.4 5
7 7.3 0.650 0.00 1.2 0.065 15.0 21.0 0.99460 3.39 0.47 10.0 7
8 7.8 0.580 0.02 2.0 0.073 9.0 18.0 0.99680 3.36 0.57 9.5 7
9 7.5 0.500 0.36 6.1 0.071 17.0 102.0 0.99780 3.35 0.80 10.5 5
10 6.7 0.580 0.08 1.8 0.097 15.0 65.0 0.99590 3.28 0.54 9.2 5
11 7.5 0.500 0.36 6.1 0.071 17.0 102.0 0.99780 3.35 0.80 10.5 5
12 5.6 0.615 0.00 1.6 0.089 16.0 59.0 0.99430 3.58 0.52 9.9 5
13 7.8 0.610 0.29 1.6 0.114 9.0 29.0 0.99740 3.26 1.56 9.1 5
14 8.9 0.620 0.18 3.8 0.176 52.0 145.0 0.99860 3.16 0.88 9.2 5
15 8.9 0.620 0.19 3.9 0.170 51.0 148.0 0.99860 3.17 0.93 9.2 5
16 8.5 0.280 0.56 1.8 0.092 35.0 103.0 0.99690 3.30 0.75 10.5 7
17 8.1 0.560 0.28 1.7 0.368 16.0 56.0 0.99680 3.11 1.28 9.3 5
18 7.4 0.590 0.08 4.4 0.086 6.0 29.0 0.99740 3.38 0.50 9.0 4
19 7.9 0.320 0.51 1.8 0.341 17.0 56.0 0.99690 3.04 1.08 9.2 6
20 8.9 0.220 0.48 1.8 0.077 29.0 60.0 0.99680 3.39 0.53 9.4 6
21 7.6 0.390 0.31 2.3 0.082 23.0 71.0 0.99820 3.52 0.65 9.7 5
22 7.9 0.430 0.21 1.6 0.106 10.0 37.0 0.99660 3.17 0.91 9.5 5
23 8.5 0.490 0.11 2.3 0.084 9.0 67.0 0.99680 3.17 0.53 9.4 5
24 6.9 0.400 0.14 2.4 0.085 21.0 40.0 0.99680 3.43 0.63 9.7 6
25 6.3 0.390 0.16 1.4 0.080 11.0 23.0 0.99550 3.34 0.56 9.3 5
26 7.6 0.410 0.24 1.8 0.080 4.0 11.0 0.99620 3.28 0.59 9.5 5
27 7.9 0.430 0.21 1.6 0.106 10.0 37.0 0.99660 3.17 0.91 9.5 5
28 7.1 0.710 0.00 1.9 0.080 14.0 35.0 0.99720 3.47 0.55 9.4 5
29 7.8 0.645 0.00 2.0 0.082 8.0 16.0 0.99640 3.38 0.59 9.8 6
... ... ... ... ... ... ... ... ... ... ... ... ...
1569 6.2 0.510 0.14 1.9 0.056 15.0 34.0 0.99396 3.48 0.57 11.5 6
1570 6.4 0.360 0.53 2.2 0.230 19.0 35.0 0.99340 3.37 0.93 12.4 6
1571 6.4 0.380 0.14 2.2 0.038 15.0 25.0 0.99514 3.44 0.65 11.1 6
1572 7.3 0.690 0.32 2.2 0.069 35.0 104.0 0.99632 3.33 0.51 9.5 5
1573 6.0 0.580 0.20 2.4 0.075 15.0 50.0 0.99467 3.58 0.67 12.5 6
1574 5.6 0.310 0.78 13.9 0.074 23.0 92.0 0.99677 3.39 0.48 10.5 6
1575 7.5 0.520 0.40 2.2 0.060 12.0 20.0 0.99474 3.26 0.64 11.8 6
1576 8.0 0.300 0.63 1.6 0.081 16.0 29.0 0.99588 3.30 0.78 10.8 6
1577 6.2 0.700 0.15 5.1 0.076 13.0 27.0 0.99622 3.54 0.60 11.9 6
1578 6.8 0.670 0.15 1.8 0.118 13.0 20.0 0.99540 3.42 0.67 11.3 6
1579 6.2 0.560 0.09 1.7 0.053 24.0 32.0 0.99402 3.54 0.60 11.3 5
1580 7.4 0.350 0.33 2.4 0.068 9.0 26.0 0.99470 3.36 0.60 11.9 6
1581 6.2 0.560 0.09 1.7 0.053 24.0 32.0 0.99402 3.54 0.60 11.3 5
1582 6.1 0.715 0.10 2.6 0.053 13.0 27.0 0.99362 3.57 0.50 11.9 5
1583 6.2 0.460 0.29 2.1 0.074 32.0 98.0 0.99578 3.33 0.62 9.8 5
1584 6.7 0.320 0.44 2.4 0.061 24.0 34.0 0.99484 3.29 0.80 11.6 7
1585 7.2 0.390 0.44 2.6 0.066 22.0 48.0 0.99494 3.30 0.84 11.5 6
1586 7.5 0.310 0.41 2.4 0.065 34.0 60.0 0.99492 3.34 0.85 11.4 6
1587 5.8 0.610 0.11 1.8 0.066 18.0 28.0 0.99483 3.55 0.66 10.9 6
1588 7.2 0.660 0.33 2.5 0.068 34.0 102.0 0.99414 3.27 0.78 12.8 6
1589 6.6 0.725 0.20 7.8 0.073 29.0 79.0 0.99770 3.29 0.54 9.2 5
1590 6.3 0.550 0.15 1.8 0.077 26.0 35.0 0.99314 3.32 0.82 11.6 6
1591 5.4 0.740 0.09 1.7 0.089 16.0 26.0 0.99402 3.67 0.56 11.6 6
1592 6.3 0.510 0.13 2.3 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 6
1593 6.8 0.620 0.08 1.9 0.068 28.0 38.0 0.99651 3.42 0.82 9.5 6
1594 6.2 0.600 0.08 2.0 0.090 32.0 44.0 0.99490 3.45 0.58 10.5 5
1595 5.9 0.550 0.10 2.2 0.062 39.0 51.0 0.99512 3.52 0.76 11.2 6
1596 6.3 0.510 0.13 2.3 0.076 29.0 40.0 0.99574 3.42 0.75 11.0 6
1597 5.9 0.645 0.12 2.0 0.075 32.0 44.0 0.99547 3.57 0.71 10.2 5
1598 6.0 0.310 0.47 3.6 0.067 18.0 42.0 0.99549 3.39 0.66 11.0 6

1599 rows × 12 columns


In [4]:
def ranker(df):
    df['alc_content_rank'] = np.arange(len(df)) + 1
    return df

In [6]:
dframe_wine.sort_values('alcohol', ascending = False, inplace = True)

In [7]:
dframe_wine = dframe_wine.groupby('quality').apply(ranker)

In [8]:
dframe_wine.head()


Out[8]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality alc_content_rank
652 15.9 0.36 0.65 7.5 0.096 22.0 71.0 0.99760 2.98 0.84 14.9 5 1
588 5.0 0.42 0.24 2.0 0.060 19.0 50.0 0.99170 3.72 0.74 14.0 8 1
142 5.2 0.34 0.00 1.8 0.050 27.0 63.0 0.99160 3.68 0.79 14.0 6 1
144 5.2 0.34 0.00 1.8 0.050 27.0 63.0 0.99160 3.68 0.79 14.0 6 2
1270 5.0 0.38 0.01 1.6 0.048 26.0 60.0 0.99084 3.70 0.75 14.0 6 3

In [9]:
num_of_qual = dframe_wine['quality'].value_counts()

In [10]:
num_of_qual


Out[10]:
5    681
6    638
7    199
4     53
8     18
3     10
Name: quality, dtype: int64

In [12]:
dframe_wine[dframe_wine.alc_content_rank == 1].head(len(num_of_qual))


Out[12]:
fixed acidity volatile acidity citric acid residual sugar chlorides free sulfur dioxide total sulfur dioxide density pH sulphates alcohol quality alc_content_rank
652 15.9 0.36 0.65 7.5 0.096 22.0 71.0 0.99760 2.98 0.84 14.9 5 1
588 5.0 0.42 0.24 2.0 0.060 19.0 50.0 0.99170 3.72 0.74 14.0 8 1
142 5.2 0.34 0.00 1.8 0.050 27.0 63.0 0.99160 3.68 0.79 14.0 6 1
821 4.9 0.42 0.00 2.1 0.048 16.0 42.0 0.99154 3.71 0.74 14.0 7 1
45 4.6 0.52 0.15 2.1 0.054 8.0 65.0 0.99340 3.90 0.56 13.1 4 1
899 8.3 1.02 0.02 3.4 0.084 6.0 11.0 0.99892 3.48 0.49 11.0 3 1

In [ ]: