My notebook to practics Pandas

This is some notes



In [1]:

    
import pandas as pd

Working with series

loc uses the specified index and it is inclusive
iloc uses the python index and is exclusive



In [2]:

    
## difference between loc and iloc
vals = [0, 1, 2]
idx = [10, 11, 12]
ser = pd.Series(vals, index=idx)

print("...using loc")
print(ser.loc[10:11])
print("\n...using iloc")
print(ser.iloc[0:2])









    



...using loc
10    0
11    1
dtype: int64

...using iloc
10    0
11    1
dtype: int64



In [3]:

    
## creating series
vals = [0, 1, 2, 3]
idx = ['a', 'b', 'c', 'd']
ser = pd.Series(vals, index=idx)
print(ser)
print("\n...single index")
print(ser.loc['a'])
print("\n...a slice")
print(ser.loc['a':'c'])









    



a    0
b    1
c    2
d    3
dtype: int64

...single index
0

...a slice
a    0
b    1
c    2
dtype: int64

Working with Data Frames

can create from a list of lists
can do it from a dictionaries
or you can import from somewhere like a csv



In [4]:

    
## from a list of lists
vals = [[1,2,3], [4,5,6]]
cols = ['a', 'b', 'c']
df = pd.DataFrame(data=vals, columns=cols)
df



In [5]:

    
## from a list of dictionaried
rows = [{'a': 1, 'b': 2, 'c':3}, {'a': 4, 'b': 5, 'c':6}]
df = pd.DataFrame(rows)
df



In [6]:

    
## how does pandas deal with missing numeric values
vals = [[10, 11], [20, 21, 22]]
cols = ['a', 'c', 'd']
df = pd.DataFrame(data=vals, columns=cols)
df



In [7]:

    
## how does pandas deal with missing string values
vals = [['z', 'y'], ['x', 'w', 'v']]
cols = ['a', 'c', 'd']
df = pd.DataFrame(data=vals, columns=cols)
df



In [8]:

    
df.transpose()



In [9]:

    
df



In [10]:

    
## dealing with missing data
vals = [[10, 11], [20, 21, 22]]
cols = ['a', 'c', 'd']
df1 = pd.DataFrame(data=vals, columns=cols)
df1.dropna(inplace=True)
df1



In [11]:

    
## fill them in with something
df2 = pd.DataFrame(data=vals, columns=cols)
df2.fillna(-1, inplace=True)
df2



In [ ]:

reading csv file



In [12]:

    
df = pd.read_csv('winequality-red.csv', delimiter=';')
df.head()









    Out[12]:







  
    
      
      fixed acidity
      volatile acidity
      citric acid
      residual sugar
      chlorides
      free sulfur dioxide
      total sulfur dioxide
      density
      pH
      sulphates
      alcohol
      quality
    
  
  
    
      0
      7.4
      0.70
      0.00
      1.9
      0.076
      11.0
      34.0
      0.9978
      3.51
      0.56
      9.4
      5
    
    
      1
      7.8
      0.88
      0.00
      2.6
      0.098
      25.0
      67.0
      0.9968
      3.20
      0.68
      9.8
      5
    
    
      2
      7.8
      0.76
      0.04
      2.3
      0.092
      15.0
      54.0
      0.9970
      3.26
      0.65
      9.8
      5
    
    
      3
      11.2
      0.28
      0.56
      1.9
      0.075
      17.0
      60.0
      0.9980
      3.16
      0.58
      9.8
      6
    
    
      4
      7.4
      0.70
      0.00
      1.9
      0.076
      11.0
      34.0
      0.9978
      3.51
      0.56
      9.4
      5



In [13]:

    
import pprint
print("Shape of my data frame: {} x {}".format(df.shape[0],df.shape[1]))
df.describe()









    



Shape of my data frame: 1599 x 12






    Out[13]:







  
    
      
      fixed acidity
      volatile acidity
      citric acid
      residual sugar
      chlorides
      free sulfur dioxide
      total sulfur dioxide
      density
      pH
      sulphates
      alcohol
      quality
    
  
  
    
      count
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
    
    
      mean
      8.319637
      0.527821
      0.270976
      2.538806
      0.087467
      15.874922
      46.467792
      0.996747
      3.311113
      0.658149
      10.422983
      5.636023
    
    
      std
      1.741096
      0.179060
      0.194801
      1.409928
      0.047065
      10.460157
      32.895324
      0.001887
      0.154386
      0.169507
      1.065668
      0.807569
    
    
      min
      4.600000
      0.120000
      0.000000
      0.900000
      0.012000
      1.000000
      6.000000
      0.990070
      2.740000
      0.330000
      8.400000
      3.000000
    
    
      25%
      7.100000
      0.390000
      0.090000
      1.900000
      0.070000
      7.000000
      22.000000
      0.995600
      3.210000
      0.550000
      9.500000
      5.000000
    
    
      50%
      7.900000
      0.520000
      0.260000
      2.200000
      0.079000
      14.000000
      38.000000
      0.996750
      3.310000
      0.620000
      10.200000
      6.000000
    
    
      75%
      9.200000
      0.640000
      0.420000
      2.600000
      0.090000
      21.000000
      62.000000
      0.997835
      3.400000
      0.730000
      11.100000
      6.000000
    
    
      max
      15.900000
      1.580000
      1.000000
      15.500000
      0.611000
      72.000000
      289.000000
      1.003690
      4.010000
      2.000000
      14.900000
      8.000000



In [12]:

    
import re

## clean up the column names
cols = df.columns.tolist()
df.columns = [re.sub("\s+","_",col) for col in df.columns.tolist()]



In [3]:

    
## print the columns description
print("Shape of my data frame: {} x {}".format(df.shape[0],df.shape[1]))
pp = pprint.PrettyPrinter(indent=2)
pp.pprint(list(df.columns))









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-64478d119b93> in <module>()
      1 ## print the columns description
----> 2 print("Shape of my data frame: {} x {}".format(df.shape[0],df.shape[1]))
      3 pp = pprint.PrettyPrinter(indent=2)
      4 pp.pprint(list(df.columns))

NameError: name 'df' is not defined



In [14]:

    
## basic statistical calculations
df.describe()









    Out[14]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
      quality
    
  
  
    
      count
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
      1599.000000
    
    
      mean
      8.319637
      0.527821
      0.270976
      2.538806
      0.087467
      15.874922
      46.467792
      0.996747
      3.311113
      0.658149
      10.422983
      5.636023
    
    
      std
      1.741096
      0.179060
      0.194801
      1.409928
      0.047065
      10.460157
      32.895324
      0.001887
      0.154386
      0.169507
      1.065668
      0.807569
    
    
      min
      4.600000
      0.120000
      0.000000
      0.900000
      0.012000
      1.000000
      6.000000
      0.990070
      2.740000
      0.330000
      8.400000
      3.000000
    
    
      25%
      7.100000
      0.390000
      0.090000
      1.900000
      0.070000
      7.000000
      22.000000
      0.995600
      3.210000
      0.550000
      9.500000
      5.000000
    
    
      50%
      7.900000
      0.520000
      0.260000
      2.200000
      0.079000
      14.000000
      38.000000
      0.996750
      3.310000
      0.620000
      10.200000
      6.000000
    
    
      75%
      9.200000
      0.640000
      0.420000
      2.600000
      0.090000
      21.000000
      62.000000
      0.997835
      3.400000
      0.730000
      11.100000
      6.000000
    
    
      max
      15.900000
      1.580000
      1.000000
      15.500000
      0.611000
      72.000000
      289.000000
      1.003690
      4.010000
      2.000000
      14.900000
      8.000000

Slicing



In [15]:

    
## single column reference returns a slice
a_column = df['fixed_acidity']
print(type(a_column))

## multiple column references returns another dataframe
some_columns = df[['fixed_acidity','citric_acid']]
print(type(some_columns))









    



<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>



In [16]:

    
## get rows from a given column using loc
df.loc[100:102, 'fixed_acidity']









    Out[16]:





100    8.3
101    7.8
102    8.1
Name: fixed_acidity, dtype: float64



In [17]:

    
## get rows from a given column using iloc
df['fixed_acidity'].iloc[100:102]









    Out[17]:





100    8.3
101    7.8
Name: fixed_acidity, dtype: float64



In [18]:

    
## get rows from a multiple columns using iloc
df[['fixed_acidity','chlorides']].iloc[100:102]









    Out[18]:







  
    
      
      fixed_acidity
      chlorides
    
  
  
    
      100
      8.3
      0.084
    
    
      101
      7.8
      0.075

Groupbys



In [19]:

    
df_quality = df.groupby("quality")
df_quality.mean()









    Out[19]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
    
    
      quality
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      3
      8.360000
      0.884500
      0.171000
      2.635000
      0.122500
      11.000000
      24.900000
      0.997464
      3.398000
      0.570000
      9.955000
    
    
      4
      7.779245
      0.693962
      0.174151
      2.694340
      0.090679
      12.264151
      36.245283
      0.996542
      3.381509
      0.596415
      10.265094
    
    
      5
      8.167254
      0.577041
      0.243686
      2.528855
      0.092736
      16.983847
      56.513950
      0.997104
      3.304949
      0.620969
      9.899706
    
    
      6
      8.347179
      0.497484
      0.273824
      2.477194
      0.084956
      15.711599
      40.869906
      0.996615
      3.318072
      0.675329
      10.629519
    
    
      7
      8.872362
      0.403920
      0.375176
      2.720603
      0.076588
      14.045226
      35.020101
      0.996104
      3.290754
      0.741256
      11.465913
    
    
      8
      8.566667
      0.423333
      0.391111
      2.577778
      0.068444
      13.277778
      33.444444
      0.995212
      3.267222
      0.767778
      12.094444

Conditionals

Note that we use the & and | to string conditionals together because we use NumPy's masking rules



In [20]:

    
mask = df['alcohol'] > 12.0
df1 = df[mask]
df1.groupby("quality").mean()









    Out[20]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
    
    
      quality
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      4
      4.600000
      0.520000
      0.150000
      2.100000
      0.054000
      8.000000
      65.000000
      0.993400
      3.900000
      0.560000
      13.100000
    
    
      5
      7.790909
      0.584091
      0.203636
      3.604545
      0.068273
      13.363636
      45.090909
      0.994658
      3.420909
      0.590909
      12.945455
    
    
      6
      7.601587
      0.454762
      0.288413
      2.512698
      0.069841
      15.841270
      40.253968
      0.994317
      3.399206
      0.632857
      12.673016
    
    
      7
      7.817544
      0.428509
      0.333684
      2.846491
      0.070702
      13.666667
      43.456140
      0.994437
      3.359474
      0.719649
      12.606433
    
    
      8
      7.822222
      0.506667
      0.314444
      3.000000
      0.066444
      14.888889
      41.333333
      0.994416
      3.383333
      0.766667
      13.111111



In [21]:

    
## multiple conditionals 
df[(df['alcohol'] > 12.0) & (df['quality'] > 5)].groupby("quality").mean()









    Out[21]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
    
    
      quality
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      6
      7.601587
      0.454762
      0.288413
      2.512698
      0.069841
      15.841270
      40.253968
      0.994317
      3.399206
      0.632857
      12.673016
    
    
      7
      7.817544
      0.428509
      0.333684
      2.846491
      0.070702
      13.666667
      43.456140
      0.994437
      3.359474
      0.719649
      12.606433
    
    
      8
      7.822222
      0.506667
      0.314444
      3.000000
      0.066444
      14.888889
      41.333333
      0.994416
      3.383333
      0.766667
      13.111111



In [22]:

    
## Can also use query to evaluate conditionals
result_df = df.query('alcohol >= 9.10 and pH < 3.5')
result_df.loc[1:6, ['alcohol', 'pH']]

Sorting



In [23]:

    
## single column sort
df.sort_values(by='fixed_acidity', ascending=True)









    Out[23]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
      quality
    
  
  
    
      45
      4.6
      0.520
      0.15
      2.10
      0.054
      8.0
      65.0
      0.99340
      3.90
      0.56
      13.100000
      4
    
    
      95
      4.7
      0.600
      0.17
      2.30
      0.058
      17.0
      106.0
      0.99320
      3.85
      0.60
      12.900000
      6
    
    
      821
      4.9
      0.420
      0.00
      2.10
      0.048
      16.0
      42.0
      0.99154
      3.71
      0.74
      14.000000
      7
    
    
      588
      5.0
      0.420
      0.24
      2.00
      0.060
      19.0
      50.0
      0.99170
      3.72
      0.74
      14.000000
      8
    
    
      94
      5.0
      1.020
      0.04
      1.40
      0.045
      41.0
      85.0
      0.99380
      3.75
      0.48
      10.500000
      4
    
    
      553
      5.0
      1.040
      0.24
      1.60
      0.050
      32.0
      96.0
      0.99340
      3.74
      0.62
      11.500000
      5
    
    
      1270
      5.0
      0.380
      0.01
      1.60
      0.048
      26.0
      60.0
      0.99084
      3.70
      0.75
      14.000000
      6
    
    
      1114
      5.0
      0.400
      0.50
      4.30
      0.046
      29.0
      80.0
      0.99020
      3.49
      0.66
      13.600000
      6
    
    
      1321
      5.0
      0.740
      0.00
      1.20
      0.041
      16.0
      46.0
      0.99258
      4.01
      0.59
      12.500000
      6
    
    
      1157
      5.1
      0.510
      0.18
      2.10
      0.042
      16.0
      101.0
      0.99240
      3.46
      0.87
      12.900000
      7
    
    
      695
      5.1
      0.470
      0.02
      1.30
      0.034
      18.0
      44.0
      0.99210
      3.90
      0.62
      12.800000
      6
    
    
      1228
      5.1
      0.420
      0.00
      1.80
      0.044
      18.0
      88.0
      0.99157
      3.68
      0.73
      13.600000
      7
    
    
      802
      5.1
      0.585
      0.00
      1.70
      0.044
      14.0
      86.0
      0.99264
      3.56
      0.94
      12.900000
      7
    
    
      1300
      5.2
      0.645
      0.00
      2.15
      0.080
      15.0
      28.0
      0.99444
      3.78
      0.61
      12.500000
      6
    
    
      1377
      5.2
      0.490
      0.26
      2.30
      0.090
      23.0
      74.0
      0.99530
      3.71
      0.62
      12.200000
      6
    
    
      34
      5.2
      0.320
      0.25
      1.80
      0.103
      13.0
      50.0
      0.99570
      3.38
      0.55
      9.200000
      5
    
    
      230
      5.2
      0.480
      0.04
      1.60
      0.054
      19.0
      106.0
      0.99270
      3.54
      0.62
      12.200000
      7
    
    
      144
      5.2
      0.340
      0.00
      1.80
      0.050
      27.0
      63.0
      0.99160
      3.68
      0.79
      14.000000
      6
    
    
      142
      5.2
      0.340
      0.00
      1.80
      0.050
      27.0
      63.0
      0.99160
      3.68
      0.79
      14.000000
      6
    
    
      444
      5.3
      0.570
      0.01
      1.70
      0.054
      5.0
      27.0
      0.99340
      3.57
      0.84
      12.500000
      7
    
    
      1477
      5.3
      0.470
      0.11
      2.20
      0.048
      16.0
      89.0
      0.99182
      3.54
      0.88
      13.600000
      7
    
    
      916
      5.3
      0.715
      0.19
      1.50
      0.161
      7.0
      62.0
      0.99395
      3.62
      0.61
      11.000000
      5
    
    
      1475
      5.3
      0.470
      0.11
      2.20
      0.048
      16.0
      89.0
      0.99182
      3.54
      0.88
      13.566667
      7
    
    
      198
      5.4
      0.835
      0.08
      1.20
      0.046
      13.0
      93.0
      0.99240
      3.57
      0.85
      13.000000
      7
    
    
      1316
      5.4
      0.740
      0.00
      1.20
      0.041
      16.0
      46.0
      0.99258
      4.01
      0.59
      12.500000
      6
    
    
      1537
      5.4
      0.580
      0.08
      1.90
      0.059
      20.0
      31.0
      0.99484
      3.50
      0.64
      10.200000
      6
    
    
      1111
      5.4
      0.420
      0.27
      2.00
      0.092
      23.0
      55.0
      0.99471
      3.78
      0.64
      12.300000
      7
    
    
      1591
      5.4
      0.740
      0.09
      1.70
      0.089
      16.0
      26.0
      0.99402
      3.67
      0.56
      11.600000
      6
    
    
      1269
      5.5
      0.490
      0.03
      1.80
      0.044
      28.0
      87.0
      0.99080
      3.50
      0.82
      14.000000
      8
    
    
      1178
      5.6
      0.915
      0.00
      2.10
      0.041
      17.0
      78.0
      0.99346
      3.68
      0.73
      11.400000
      5
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      366
      12.8
      0.615
      0.66
      5.80
      0.083
      7.0
      42.0
      1.00220
      3.07
      0.73
      10.000000
      7
    
    
      205
      12.8
      0.300
      0.74
      2.60
      0.095
      9.0
      28.0
      0.99940
      3.20
      0.77
      10.800000
      7
    
    
      206
      12.8
      0.300
      0.74
      2.60
      0.095
      9.0
      28.0
      0.99940
      3.20
      0.77
      10.800000
      7
    
    
      429
      12.8
      0.840
      0.63
      2.40
      0.088
      13.0
      35.0
      0.99970
      3.10
      0.60
      10.400000
      6
    
    
      364
      12.8
      0.615
      0.66
      5.80
      0.083
      7.0
      42.0
      1.00220
      3.07
      0.73
      10.000000
      7
    
    
      811
      12.9
      0.500
      0.55
      2.80
      0.072
      7.0
      24.0
      1.00012
      3.09
      0.68
      10.900000
      6
    
    
      538
      12.9
      0.350
      0.49
      5.80
      0.066
      5.0
      35.0
      1.00140
      3.20
      0.66
      12.000000
      7
    
    
      564
      13.0
      0.470
      0.49
      4.30
      0.085
      6.0
      47.0
      1.00210
      3.30
      0.68
      12.700000
      6
    
    
      470
      13.0
      0.320
      0.65
      2.60
      0.093
      15.0
      47.0
      0.99960
      3.05
      0.61
      10.600000
      5
    
    
      559
      13.0
      0.470
      0.49
      4.30
      0.085
      6.0
      47.0
      1.00210
      3.30
      0.68
      12.700000
      6
    
    
      603
      13.2
      0.460
      0.52
      2.20
      0.071
      12.0
      35.0
      1.00060
      3.10
      0.56
      9.000000
      6
    
    
      611
      13.2
      0.380
      0.55
      2.70
      0.081
      5.0
      16.0
      1.00060
      2.98
      0.54
      9.400000
      5
    
    
      601
      13.2
      0.460
      0.52
      2.20
      0.071
      12.0
      35.0
      1.00060
      3.10
      0.56
      9.000000
      6
    
    
      509
      13.3
      0.290
      0.75
      2.80
      0.084
      23.0
      43.0
      0.99860
      3.04
      0.68
      11.400000
      7
    
    
      680
      13.3
      0.430
      0.58
      1.90
      0.070
      15.0
      40.0
      1.00040
      3.06
      0.49
      9.000000
      5
    
    
      294
      13.3
      0.340
      0.52
      3.20
      0.094
      17.0
      53.0
      1.00140
      3.05
      0.81
      9.500000
      6
    
    
      328
      13.4
      0.270
      0.62
      2.60
      0.082
      6.0
      21.0
      1.00020
      3.16
      0.67
      9.700000
      6
    
    
      353
      13.5
      0.530
      0.79
      4.80
      0.120
      23.0
      77.0
      1.00180
      3.18
      0.77
      13.000000
      5
    
    
      381
      13.7
      0.415
      0.68
      2.90
      0.085
      17.0
      43.0
      1.00140
      3.06
      0.80
      10.000000
      6
    
    
      391
      13.7
      0.415
      0.68
      2.90
      0.085
      17.0
      43.0
      1.00140
      3.06
      0.80
      10.000000
      6
    
    
      347
      13.8
      0.490
      0.67
      3.00
      0.093
      6.0
      15.0
      0.99860
      3.02
      0.93
      12.000000
      6
    
    
      374
      14.0
      0.410
      0.63
      3.80
      0.089
      6.0
      47.0
      1.00140
      3.01
      0.81
      10.800000
      6
    
    
      544
      14.3
      0.310
      0.74
      1.80
      0.075
      6.0
      15.0
      1.00080
      2.86
      0.79
      8.400000
      6
    
    
      244
      15.0
      0.210
      0.44
      2.20
      0.075
      10.0
      24.0
      1.00005
      3.07
      0.84
      9.200000
      7
    
    
      243
      15.0
      0.210
      0.44
      2.20
      0.075
      10.0
      24.0
      1.00005
      3.07
      0.84
      9.200000
      7
    
    
      555
      15.5
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      554
      15.5
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      442
      15.6
      0.685
      0.76
      3.70
      0.100
      6.0
      43.0
      1.00320
      2.95
      0.68
      11.200000
      7
    
    
      557
      15.6
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      652
      15.9
      0.360
      0.65
      7.50
      0.096
      22.0
      71.0
      0.99760
      2.98
      0.84
      14.900000
      5
    
  

1599 rows × 12 columns



In [25]:

    
## multicolumn sort
df.sort_values(by=['fixed_acidity', 'volatile_acidity'], ascending=True)









    Out[25]:







  
    
      
      fixed_acidity
      volatile_acidity
      citric_acid
      residual_sugar
      chlorides
      free_sulfur_dioxide
      total_sulfur_dioxide
      density
      pH
      sulphates
      alcohol
      quality
    
  
  
    
      45
      4.6
      0.520
      0.15
      2.10
      0.054
      8.0
      65.0
      0.99340
      3.90
      0.56
      13.100000
      4
    
    
      95
      4.7
      0.600
      0.17
      2.30
      0.058
      17.0
      106.0
      0.99320
      3.85
      0.60
      12.900000
      6
    
    
      821
      4.9
      0.420
      0.00
      2.10
      0.048
      16.0
      42.0
      0.99154
      3.71
      0.74
      14.000000
      7
    
    
      1270
      5.0
      0.380
      0.01
      1.60
      0.048
      26.0
      60.0
      0.99084
      3.70
      0.75
      14.000000
      6
    
    
      1114
      5.0
      0.400
      0.50
      4.30
      0.046
      29.0
      80.0
      0.99020
      3.49
      0.66
      13.600000
      6
    
    
      588
      5.0
      0.420
      0.24
      2.00
      0.060
      19.0
      50.0
      0.99170
      3.72
      0.74
      14.000000
      8
    
    
      1321
      5.0
      0.740
      0.00
      1.20
      0.041
      16.0
      46.0
      0.99258
      4.01
      0.59
      12.500000
      6
    
    
      94
      5.0
      1.020
      0.04
      1.40
      0.045
      41.0
      85.0
      0.99380
      3.75
      0.48
      10.500000
      4
    
    
      553
      5.0
      1.040
      0.24
      1.60
      0.050
      32.0
      96.0
      0.99340
      3.74
      0.62
      11.500000
      5
    
    
      1228
      5.1
      0.420
      0.00
      1.80
      0.044
      18.0
      88.0
      0.99157
      3.68
      0.73
      13.600000
      7
    
    
      695
      5.1
      0.470
      0.02
      1.30
      0.034
      18.0
      44.0
      0.99210
      3.90
      0.62
      12.800000
      6
    
    
      1157
      5.1
      0.510
      0.18
      2.10
      0.042
      16.0
      101.0
      0.99240
      3.46
      0.87
      12.900000
      7
    
    
      802
      5.1
      0.585
      0.00
      1.70
      0.044
      14.0
      86.0
      0.99264
      3.56
      0.94
      12.900000
      7
    
    
      34
      5.2
      0.320
      0.25
      1.80
      0.103
      13.0
      50.0
      0.99570
      3.38
      0.55
      9.200000
      5
    
    
      142
      5.2
      0.340
      0.00
      1.80
      0.050
      27.0
      63.0
      0.99160
      3.68
      0.79
      14.000000
      6
    
    
      144
      5.2
      0.340
      0.00
      1.80
      0.050
      27.0
      63.0
      0.99160
      3.68
      0.79
      14.000000
      6
    
    
      230
      5.2
      0.480
      0.04
      1.60
      0.054
      19.0
      106.0
      0.99270
      3.54
      0.62
      12.200000
      7
    
    
      1377
      5.2
      0.490
      0.26
      2.30
      0.090
      23.0
      74.0
      0.99530
      3.71
      0.62
      12.200000
      6
    
    
      1300
      5.2
      0.645
      0.00
      2.15
      0.080
      15.0
      28.0
      0.99444
      3.78
      0.61
      12.500000
      6
    
    
      1475
      5.3
      0.470
      0.11
      2.20
      0.048
      16.0
      89.0
      0.99182
      3.54
      0.88
      13.566667
      7
    
    
      1477
      5.3
      0.470
      0.11
      2.20
      0.048
      16.0
      89.0
      0.99182
      3.54
      0.88
      13.600000
      7
    
    
      444
      5.3
      0.570
      0.01
      1.70
      0.054
      5.0
      27.0
      0.99340
      3.57
      0.84
      12.500000
      7
    
    
      916
      5.3
      0.715
      0.19
      1.50
      0.161
      7.0
      62.0
      0.99395
      3.62
      0.61
      11.000000
      5
    
    
      1111
      5.4
      0.420
      0.27
      2.00
      0.092
      23.0
      55.0
      0.99471
      3.78
      0.64
      12.300000
      7
    
    
      1537
      5.4
      0.580
      0.08
      1.90
      0.059
      20.0
      31.0
      0.99484
      3.50
      0.64
      10.200000
      6
    
    
      1316
      5.4
      0.740
      0.00
      1.20
      0.041
      16.0
      46.0
      0.99258
      4.01
      0.59
      12.500000
      6
    
    
      1591
      5.4
      0.740
      0.09
      1.70
      0.089
      16.0
      26.0
      0.99402
      3.67
      0.56
      11.600000
      6
    
    
      198
      5.4
      0.835
      0.08
      1.20
      0.046
      13.0
      93.0
      0.99240
      3.57
      0.85
      13.000000
      7
    
    
      1269
      5.5
      0.490
      0.03
      1.80
      0.044
      28.0
      87.0
      0.99080
      3.50
      0.82
      14.000000
      8
    
    
      49
      5.6
      0.310
      0.37
      1.40
      0.074
      12.0
      96.0
      0.99540
      3.32
      0.58
      9.200000
      5
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      205
      12.8
      0.300
      0.74
      2.60
      0.095
      9.0
      28.0
      0.99940
      3.20
      0.77
      10.800000
      7
    
    
      206
      12.8
      0.300
      0.74
      2.60
      0.095
      9.0
      28.0
      0.99940
      3.20
      0.77
      10.800000
      7
    
    
      364
      12.8
      0.615
      0.66
      5.80
      0.083
      7.0
      42.0
      1.00220
      3.07
      0.73
      10.000000
      7
    
    
      366
      12.8
      0.615
      0.66
      5.80
      0.083
      7.0
      42.0
      1.00220
      3.07
      0.73
      10.000000
      7
    
    
      429
      12.8
      0.840
      0.63
      2.40
      0.088
      13.0
      35.0
      0.99970
      3.10
      0.60
      10.400000
      6
    
    
      538
      12.9
      0.350
      0.49
      5.80
      0.066
      5.0
      35.0
      1.00140
      3.20
      0.66
      12.000000
      7
    
    
      811
      12.9
      0.500
      0.55
      2.80
      0.072
      7.0
      24.0
      1.00012
      3.09
      0.68
      10.900000
      6
    
    
      470
      13.0
      0.320
      0.65
      2.60
      0.093
      15.0
      47.0
      0.99960
      3.05
      0.61
      10.600000
      5
    
    
      559
      13.0
      0.470
      0.49
      4.30
      0.085
      6.0
      47.0
      1.00210
      3.30
      0.68
      12.700000
      6
    
    
      564
      13.0
      0.470
      0.49
      4.30
      0.085
      6.0
      47.0
      1.00210
      3.30
      0.68
      12.700000
      6
    
    
      611
      13.2
      0.380
      0.55
      2.70
      0.081
      5.0
      16.0
      1.00060
      2.98
      0.54
      9.400000
      5
    
    
      601
      13.2
      0.460
      0.52
      2.20
      0.071
      12.0
      35.0
      1.00060
      3.10
      0.56
      9.000000
      6
    
    
      603
      13.2
      0.460
      0.52
      2.20
      0.071
      12.0
      35.0
      1.00060
      3.10
      0.56
      9.000000
      6
    
    
      509
      13.3
      0.290
      0.75
      2.80
      0.084
      23.0
      43.0
      0.99860
      3.04
      0.68
      11.400000
      7
    
    
      294
      13.3
      0.340
      0.52
      3.20
      0.094
      17.0
      53.0
      1.00140
      3.05
      0.81
      9.500000
      6
    
    
      680
      13.3
      0.430
      0.58
      1.90
      0.070
      15.0
      40.0
      1.00040
      3.06
      0.49
      9.000000
      5
    
    
      328
      13.4
      0.270
      0.62
      2.60
      0.082
      6.0
      21.0
      1.00020
      3.16
      0.67
      9.700000
      6
    
    
      353
      13.5
      0.530
      0.79
      4.80
      0.120
      23.0
      77.0
      1.00180
      3.18
      0.77
      13.000000
      5
    
    
      381
      13.7
      0.415
      0.68
      2.90
      0.085
      17.0
      43.0
      1.00140
      3.06
      0.80
      10.000000
      6
    
    
      391
      13.7
      0.415
      0.68
      2.90
      0.085
      17.0
      43.0
      1.00140
      3.06
      0.80
      10.000000
      6
    
    
      347
      13.8
      0.490
      0.67
      3.00
      0.093
      6.0
      15.0
      0.99860
      3.02
      0.93
      12.000000
      6
    
    
      374
      14.0
      0.410
      0.63
      3.80
      0.089
      6.0
      47.0
      1.00140
      3.01
      0.81
      10.800000
      6
    
    
      544
      14.3
      0.310
      0.74
      1.80
      0.075
      6.0
      15.0
      1.00080
      2.86
      0.79
      8.400000
      6
    
    
      243
      15.0
      0.210
      0.44
      2.20
      0.075
      10.0
      24.0
      1.00005
      3.07
      0.84
      9.200000
      7
    
    
      244
      15.0
      0.210
      0.44
      2.20
      0.075
      10.0
      24.0
      1.00005
      3.07
      0.84
      9.200000
      7
    
    
      554
      15.5
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      555
      15.5
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      557
      15.6
      0.645
      0.49
      4.20
      0.095
      10.0
      23.0
      1.00315
      2.92
      0.74
      11.100000
      5
    
    
      442
      15.6
      0.685
      0.76
      3.70
      0.100
      6.0
      43.0
      1.00320
      2.95
      0.68
      11.200000
      7
    
    
      652
      15.9
      0.360
      0.65
      7.50
      0.096
      22.0
      71.0
      0.99760
      2.98
      0.84
      14.900000
      5
    
  

1599 rows × 12 columns

Dropping and adding columns

The argument axis=1 means that we are droppinng a column.
The argument inplace=True tells it to modify the dataframe

If we used inplace==False then it would return a copy that we could use, but the original would remain unmodified.



In [ ]:



In [26]:

    
df.drop('sulphates', axis=1, inplace=True)
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 11 columns):
fixed_acidity           1599 non-null float64
volatile_acidity        1599 non-null float64
citric_acid             1599 non-null float64
residual_sugar          1599 non-null float64
chlorides               1599 non-null float64
free_sulfur_dioxide     1599 non-null float64
total_sulfur_dioxide    1599 non-null float64
density                 1599 non-null float64
pH                      1599 non-null float64
alcohol                 1599 non-null float64
quality                 1599 non-null int64
dtypes: float64(10), int64(1)
memory usage: 137.5 KB



In [27]:

    
## Using eval
df.eval('total_acidity = volatile_acidity + fixed_acidity', inplace=True)
df[['total_acidity', 'volatile_acidity', 'fixed_acidity']].head()









    Out[27]:







  
    
      
      total_acidity
      volatile_acidity
      fixed_acidity
    
  
  
    
      0
      8.10
      0.70
      7.4
    
    
      1
      8.68
      0.88
      7.8
    
    
      2
      8.56
      0.76
      7.8
    
    
      3
      11.48
      0.28
      11.2
    
    
      4
      8.10
      0.70
      7.4



In [ ]:



In [ ]:

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
0	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5
1	7.8	0.88	0.00	2.6	0.098	25.0	67.0	0.9968	3.20	0.68	9.8	5
2	7.8	0.76	0.04	2.3	0.092	15.0	54.0	0.9970	3.26	0.65	9.8	5
3	11.2	0.28	0.56	1.9	0.075	17.0	60.0	0.9980	3.16	0.58	9.8	6
4	7.4	0.70	0.00	1.9	0.076	11.0	34.0	0.9978	3.51	0.56	9.4	5

	fixed acidity	volatile acidity	citric acid	residual sugar	chlorides	free sulfur dioxide	total sulfur dioxide	density	pH	sulphates	alcohol	quality
count	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000	1599.000000
mean	8.319637	0.527821	0.270976	2.538806	0.087467	15.874922	46.467792	0.996747	3.311113	0.658149	10.422983	5.636023
std	1.741096	0.179060	0.194801	1.409928	0.047065	10.460157	32.895324	0.001887	0.154386	0.169507	1.065668	0.807569
min	4.600000	0.120000	0.000000	0.900000	0.012000	1.000000	6.000000	0.990070	2.740000	0.330000	8.400000	3.000000
25%	7.100000	0.390000	0.090000	1.900000	0.070000	7.000000	22.000000	0.995600	3.210000	0.550000	9.500000	5.000000
50%	7.900000	0.520000	0.260000	2.200000	0.079000	14.000000	38.000000	0.996750	3.310000	0.620000	10.200000	6.000000
75%	9.200000	0.640000	0.420000	2.600000	0.090000	21.000000	62.000000	0.997835	3.400000	0.730000	11.100000	6.000000
max	15.900000	1.580000	1.000000	15.500000	0.611000	72.000000	289.000000	1.003690	4.010000	2.000000	14.900000	8.000000

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol
quality
3	8.360000	0.884500	0.171000	2.635000	0.122500	11.000000	24.900000	0.997464	3.398000	0.570000	9.955000
4	7.779245	0.693962	0.174151	2.694340	0.090679	12.264151	36.245283	0.996542	3.381509	0.596415	10.265094
5	8.167254	0.577041	0.243686	2.528855	0.092736	16.983847	56.513950	0.997104	3.304949	0.620969	9.899706
6	8.347179	0.497484	0.273824	2.477194	0.084956	15.711599	40.869906	0.996615	3.318072	0.675329	10.629519
7	8.872362	0.403920	0.375176	2.720603	0.076588	14.045226	35.020101	0.996104	3.290754	0.741256	11.465913
8	8.566667	0.423333	0.391111	2.577778	0.068444	13.277778	33.444444	0.995212	3.267222	0.767778	12.094444

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol
quality
6	7.601587	0.454762	0.288413	2.512698	0.069841	15.841270	40.253968	0.994317	3.399206	0.632857	12.673016
7	7.817544	0.428509	0.333684	2.846491	0.070702	13.666667	43.456140	0.994437	3.359474	0.719649	12.606433
8	7.822222	0.506667	0.314444	3.000000	0.066444	14.888889	41.333333	0.994416	3.383333	0.766667	13.111111

	fixed_acidity	volatile_acidity	citric_acid	residual_sugar	chlorides	free_sulfur_dioxide	total_sulfur_dioxide	density	pH	sulphates	alcohol	quality
45	4.6	0.520	0.15	2.10	0.054	8.0	65.0	0.99340	3.90	0.56	13.100000	4
95	4.7	0.600	0.17	2.30	0.058	17.0	106.0	0.99320	3.85	0.60	12.900000	6
821	4.9	0.420	0.00	2.10	0.048	16.0	42.0	0.99154	3.71	0.74	14.000000	7
588	5.0	0.420	0.24	2.00	0.060	19.0	50.0	0.99170	3.72	0.74	14.000000	8
94	5.0	1.020	0.04	1.40	0.045	41.0	85.0	0.99380	3.75	0.48	10.500000	4
553	5.0	1.040	0.24	1.60	0.050	32.0	96.0	0.99340	3.74	0.62	11.500000	5
1270	5.0	0.380	0.01	1.60	0.048	26.0	60.0	0.99084	3.70	0.75	14.000000	6
1114	5.0	0.400	0.50	4.30	0.046	29.0	80.0	0.99020	3.49	0.66	13.600000	6
1321	5.0	0.740	0.00	1.20	0.041	16.0	46.0	0.99258	4.01	0.59	12.500000	6
1157	5.1	0.510	0.18	2.10	0.042	16.0	101.0	0.99240	3.46	0.87	12.900000	7
695	5.1	0.470	0.02	1.30	0.034	18.0	44.0	0.99210	3.90	0.62	12.800000	6
1228	5.1	0.420	0.00	1.80	0.044	18.0	88.0	0.99157	3.68	0.73	13.600000	7
802	5.1	0.585	0.00	1.70	0.044	14.0	86.0	0.99264	3.56	0.94	12.900000	7
1300	5.2	0.645	0.00	2.15	0.080	15.0	28.0	0.99444	3.78	0.61	12.500000	6
1377	5.2	0.490	0.26	2.30	0.090	23.0	74.0	0.99530	3.71	0.62	12.200000	6
34	5.2	0.320	0.25	1.80	0.103	13.0	50.0	0.99570	3.38	0.55	9.200000	5
230	5.2	0.480	0.04	1.60	0.054	19.0	106.0	0.99270	3.54	0.62	12.200000	7
144	5.2	0.340	0.00	1.80	0.050	27.0	63.0	0.99160	3.68	0.79	14.000000	6
142	5.2	0.340	0.00	1.80	0.050	27.0	63.0	0.99160	3.68	0.79	14.000000	6
444	5.3	0.570	0.01	1.70	0.054	5.0	27.0	0.99340	3.57	0.84	12.500000	7
1477	5.3	0.470	0.11	2.20	0.048	16.0	89.0	0.99182	3.54	0.88	13.600000	7
916	5.3	0.715	0.19	1.50	0.161	7.0	62.0	0.99395	3.62	0.61	11.000000	5
1475	5.3	0.470	0.11	2.20	0.048	16.0	89.0	0.99182	3.54	0.88	13.566667	7
198	5.4	0.835	0.08	1.20	0.046	13.0	93.0	0.99240	3.57	0.85	13.000000	7
1316	5.4	0.740	0.00	1.20	0.041	16.0	46.0	0.99258	4.01	0.59	12.500000	6
1537	5.4	0.580	0.08	1.90	0.059	20.0	31.0	0.99484	3.50	0.64	10.200000	6
1111	5.4	0.420	0.27	2.00	0.092	23.0	55.0	0.99471	3.78	0.64	12.300000	7
1591	5.4	0.740	0.09	1.70	0.089	16.0	26.0	0.99402	3.67	0.56	11.600000	6
1269	5.5	0.490	0.03	1.80	0.044	28.0	87.0	0.99080	3.50	0.82	14.000000	8
1178	5.6	0.915	0.00	2.10	0.041	17.0	78.0	0.99346	3.68	0.73	11.400000	5
...	...	...	...	...	...	...	...	...	...	...	...	...
366	12.8	0.615	0.66	5.80	0.083	7.0	42.0	1.00220	3.07	0.73	10.000000	7
205	12.8	0.300	0.74	2.60	0.095	9.0	28.0	0.99940	3.20	0.77	10.800000	7
206	12.8	0.300	0.74	2.60	0.095	9.0	28.0	0.99940	3.20	0.77	10.800000	7
429	12.8	0.840	0.63	2.40	0.088	13.0	35.0	0.99970	3.10	0.60	10.400000	6
364	12.8	0.615	0.66	5.80	0.083	7.0	42.0	1.00220	3.07	0.73	10.000000	7
811	12.9	0.500	0.55	2.80	0.072	7.0	24.0	1.00012	3.09	0.68	10.900000	6
538	12.9	0.350	0.49	5.80	0.066	5.0	35.0	1.00140	3.20	0.66	12.000000	7
564	13.0	0.470	0.49	4.30	0.085	6.0	47.0	1.00210	3.30	0.68	12.700000	6
470	13.0	0.320	0.65	2.60	0.093	15.0	47.0	0.99960	3.05	0.61	10.600000	5
559	13.0	0.470	0.49	4.30	0.085	6.0	47.0	1.00210	3.30	0.68	12.700000	6
603	13.2	0.460	0.52	2.20	0.071	12.0	35.0	1.00060	3.10	0.56	9.000000	6
611	13.2	0.380	0.55	2.70	0.081	5.0	16.0	1.00060	2.98	0.54	9.400000	5
601	13.2	0.460	0.52	2.20	0.071	12.0	35.0	1.00060	3.10	0.56	9.000000	6
509	13.3	0.290	0.75	2.80	0.084	23.0	43.0	0.99860	3.04	0.68	11.400000	7
680	13.3	0.430	0.58	1.90	0.070	15.0	40.0	1.00040	3.06	0.49	9.000000	5
294	13.3	0.340	0.52	3.20	0.094	17.0	53.0	1.00140	3.05	0.81	9.500000	6
328	13.4	0.270	0.62	2.60	0.082	6.0	21.0	1.00020	3.16	0.67	9.700000	6
353	13.5	0.530	0.79	4.80	0.120	23.0	77.0	1.00180	3.18	0.77	13.000000	5
381	13.7	0.415	0.68	2.90	0.085	17.0	43.0	1.00140	3.06	0.80	10.000000	6
391	13.7	0.415	0.68	2.90	0.085	17.0	43.0	1.00140	3.06	0.80	10.000000	6
347	13.8	0.490	0.67	3.00	0.093	6.0	15.0	0.99860	3.02	0.93	12.000000	6
374	14.0	0.410	0.63	3.80	0.089	6.0	47.0	1.00140	3.01	0.81	10.800000	6
544	14.3	0.310	0.74	1.80	0.075	6.0	15.0	1.00080	2.86	0.79	8.400000	6
244	15.0	0.210	0.44	2.20	0.075	10.0	24.0	1.00005	3.07	0.84	9.200000	7
243	15.0	0.210	0.44	2.20	0.075	10.0	24.0	1.00005	3.07	0.84	9.200000	7
555	15.5	0.645	0.49	4.20	0.095	10.0	23.0	1.00315	2.92	0.74	11.100000	5
554	15.5	0.645	0.49	4.20	0.095	10.0	23.0	1.00315	2.92	0.74	11.100000	5
442	15.6	0.685	0.76	3.70	0.100	6.0	43.0	1.00320	2.95	0.68	11.200000	7
557	15.6	0.645	0.49	4.20	0.095	10.0	23.0	1.00315	2.92	0.74	11.100000	5
652	15.9	0.360	0.65	7.50	0.096	22.0	71.0	0.99760	2.98	0.84	14.900000	5

	total_acidity	volatile_acidity	fixed_acidity
0	8.10	0.70	7.4
1	8.68	0.88	7.8
2	8.56	0.76	7.8
3	11.48	0.28	11.2
4	8.10	0.70	7.4