Pandas Crosstab Explained

Notebook that accompanies article on Practical Business Python



In [1]:

    
import pandas as pd
import seaborn as sns



In [2]:

    
%matplotlib inline

Import the data



In [3]:

    
# Define the headers since the data does not have any
headers = ["symboling", "normalized_losses", "make", "fuel_type", "aspiration",
           "num_doors", "body_style", "drive_wheels", "engine_location",
           "wheel_base", "length", "width", "height", "curb_weight",
           "engine_type", "num_cylinders", "engine_size", "fuel_system",
           "bore", "stroke", "compression_ratio", "horsepower", "peak_rpm",
           "city_mpg", "highway_mpg", "price"]



In [4]:

    
# Read in the CSV file and convert "?" to NaN
df_raw = pd.read_csv("http://mlr.cs.umass.edu/ml/machine-learning-databases/autos/imports-85.data",
                  header=None, names=headers, na_values="?" )
df_raw.head()









    Out[4]:







  
    
      
      symboling
      normalized_losses
      make
      fuel_type
      aspiration
      num_doors
      body_style
      drive_wheels
      engine_location
      wheel_base
      ...
      engine_size
      fuel_system
      bore
      stroke
      compression_ratio
      horsepower
      peak_rpm
      city_mpg
      highway_mpg
      price
    
  
  
    
      0
      3
      NaN
      alfa-romero
      gas
      std
      two
      convertible
      rwd
      front
      88.6
      ...
      130
      mpfi
      3.47
      2.68
      9.0
      111.0
      5000.0
      21
      27
      13495.0
    
    
      1
      3
      NaN
      alfa-romero
      gas
      std
      two
      convertible
      rwd
      front
      88.6
      ...
      130
      mpfi
      3.47
      2.68
      9.0
      111.0
      5000.0
      21
      27
      16500.0
    
    
      2
      1
      NaN
      alfa-romero
      gas
      std
      two
      hatchback
      rwd
      front
      94.5
      ...
      152
      mpfi
      2.68
      3.47
      9.0
      154.0
      5000.0
      19
      26
      16500.0
    
    
      3
      2
      164.0
      audi
      gas
      std
      four
      sedan
      fwd
      front
      99.8
      ...
      109
      mpfi
      3.19
      3.40
      10.0
      102.0
      5500.0
      24
      30
      13950.0
    
    
      4
      2
      164.0
      audi
      gas
      std
      four
      sedan
      4wd
      front
      99.4
      ...
      136
      mpfi
      3.19
      3.40
      8.0
      115.0
      5500.0
      18
      22
      17450.0
    
  

5 rows × 26 columns



In [5]:

    
# Take a quick look at all the values in the data
df_raw.describe()









    Out[5]:







  
    
      
      symboling
      normalized_losses
      wheel_base
      length
      width
      height
      curb_weight
      engine_size
      bore
      stroke
      compression_ratio
      horsepower
      peak_rpm
      city_mpg
      highway_mpg
      price
    
  
  
    
      count
      205.000000
      164.000000
      205.000000
      205.000000
      205.000000
      205.000000
      205.000000
      205.000000
      201.000000
      201.000000
      205.000000
      203.000000
      203.000000
      205.000000
      205.000000
      201.000000
    
    
      mean
      0.834146
      122.000000
      98.756585
      174.049268
      65.907805
      53.724878
      2555.565854
      126.907317
      3.329751
      3.255423
      10.142537
      104.256158
      5125.369458
      25.219512
      30.751220
      13207.129353
    
    
      std
      1.245307
      35.442168
      6.021776
      12.337289
      2.145204
      2.443522
      520.680204
      41.642693
      0.273539
      0.316717
      3.972040
      39.714369
      479.334560
      6.542142
      6.886443
      7947.066342
    
    
      min
      -2.000000
      65.000000
      86.600000
      141.100000
      60.300000
      47.800000
      1488.000000
      61.000000
      2.540000
      2.070000
      7.000000
      48.000000
      4150.000000
      13.000000
      16.000000
      5118.000000
    
    
      25%
      0.000000
      94.000000
      94.500000
      166.300000
      64.100000
      52.000000
      2145.000000
      97.000000
      3.150000
      3.110000
      8.600000
      70.000000
      4800.000000
      19.000000
      25.000000
      7775.000000
    
    
      50%
      1.000000
      115.000000
      97.000000
      173.200000
      65.500000
      54.100000
      2414.000000
      120.000000
      3.310000
      3.290000
      9.000000
      95.000000
      5200.000000
      24.000000
      30.000000
      10295.000000
    
    
      75%
      2.000000
      150.000000
      102.400000
      183.100000
      66.900000
      55.500000
      2935.000000
      141.000000
      3.590000
      3.410000
      9.400000
      116.000000
      5500.000000
      30.000000
      34.000000
      16500.000000
    
    
      max
      3.000000
      256.000000
      120.900000
      208.100000
      72.300000
      59.800000
      4066.000000
      326.000000
      3.940000
      4.170000
      23.000000
      288.000000
      6600.000000
      49.000000
      54.000000
      45400.000000



In [6]:

    
# Filter out the top 8 manufacturers
models = ["toyota","nissan","mazda", "honda", "mitsubishi", "subaru", "volkswagen", "volvo"]



In [7]:

    
df = df_raw[df_raw.make.isin(models)].copy()



In [8]:

    
df.head()









    Out[8]:







  
    
      
      symboling
      normalized_losses
      make
      fuel_type
      aspiration
      num_doors
      body_style
      drive_wheels
      engine_location
      wheel_base
      ...
      engine_size
      fuel_system
      bore
      stroke
      compression_ratio
      horsepower
      peak_rpm
      city_mpg
      highway_mpg
      price
    
  
  
    
      30
      2
      137.0
      honda
      gas
      std
      two
      hatchback
      fwd
      front
      86.6
      ...
      92
      1bbl
      2.91
      3.41
      9.6
      58.0
      4800.0
      49
      54
      6479.0
    
    
      31
      2
      137.0
      honda
      gas
      std
      two
      hatchback
      fwd
      front
      86.6
      ...
      92
      1bbl
      2.91
      3.41
      9.2
      76.0
      6000.0
      31
      38
      6855.0
    
    
      32
      1
      101.0
      honda
      gas
      std
      two
      hatchback
      fwd
      front
      93.7
      ...
      79
      1bbl
      2.91
      3.07
      10.1
      60.0
      5500.0
      38
      42
      5399.0
    
    
      33
      1
      101.0
      honda
      gas
      std
      two
      hatchback
      fwd
      front
      93.7
      ...
      92
      1bbl
      2.91
      3.41
      9.2
      76.0
      6000.0
      30
      34
      6529.0
    
    
      34
      1
      101.0
      honda
      gas
      std
      two
      hatchback
      fwd
      front
      93.7
      ...
      92
      1bbl
      2.91
      3.41
      9.2
      76.0
      6000.0
      30
      34
      7129.0
    
  

5 rows × 26 columns



In [9]:

    
### Basic Crosstab functions



In [10]:

    
# Create a simple crosstab that counts the number of occurences of each combination
pd.crosstab(df.make, df.num_doors)



In [11]:

    
# Add a subtotal
pd.crosstab(df.make, df.num_doors, margins=True, margins_name="Total")



In [12]:

    
# Another example, this time of make and body_style
pd.crosstab(df.make, df.body_style)









    Out[12]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      0
      0
      7
      5
      1
    
    
      mazda
      0
      0
      10
      7
      0
    
    
      mitsubishi
      0
      0
      9
      4
      0
    
    
      nissan
      0
      1
      5
      9
      3
    
    
      subaru
      0
      0
      3
      5
      4
    
    
      toyota
      1
      3
      14
      10
      4
    
    
      volkswagen
      1
      0
      1
      9
      1
    
    
      volvo
      0
      0
      0
      8
      3



In [13]:

    
# Add custom names for the rows and columns?
pd.crosstab(df.make, df.body_style, rownames=['Auto Manufacturer'], colnames=['Body Style'])









    Out[13]:







  
    
      Body Style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      Auto Manufacturer
      
      
      
      
      
    
  
  
    
      honda
      0
      0
      7
      5
      1
    
    
      mazda
      0
      0
      10
      7
      0
    
    
      mitsubishi
      0
      0
      9
      4
      0
    
    
      nissan
      0
      1
      5
      9
      3
    
    
      subaru
      0
      0
      3
      5
      4
    
    
      toyota
      1
      3
      14
      10
      4
    
    
      volkswagen
      1
      0
      1
      9
      1
    
    
      volvo
      0
      0
      0
      8
      3

Normalizing Results



In [14]:

    
# Convert the occurrences to percentages
pd.crosstab(df.make, df.body_style, normalize=True)









    Out[14]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      0.000000
      0.000000
      0.054688
      0.039062
      0.007812
    
    
      mazda
      0.000000
      0.000000
      0.078125
      0.054688
      0.000000
    
    
      mitsubishi
      0.000000
      0.000000
      0.070312
      0.031250
      0.000000
    
    
      nissan
      0.000000
      0.007812
      0.039062
      0.070312
      0.023438
    
    
      subaru
      0.000000
      0.000000
      0.023438
      0.039062
      0.031250
    
    
      toyota
      0.007812
      0.023438
      0.109375
      0.078125
      0.031250
    
    
      volkswagen
      0.007812
      0.000000
      0.007812
      0.070312
      0.007812
    
    
      volvo
      0.000000
      0.000000
      0.000000
      0.062500
      0.023438



In [15]:

    
# Convert the occurrences to percentages for each row
pd.crosstab(df.make, df.body_style, normalize='index')









    Out[15]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      0.000000
      0.000000
      0.538462
      0.384615
      0.076923
    
    
      mazda
      0.000000
      0.000000
      0.588235
      0.411765
      0.000000
    
    
      mitsubishi
      0.000000
      0.000000
      0.692308
      0.307692
      0.000000
    
    
      nissan
      0.000000
      0.055556
      0.277778
      0.500000
      0.166667
    
    
      subaru
      0.000000
      0.000000
      0.250000
      0.416667
      0.333333
    
    
      toyota
      0.031250
      0.093750
      0.437500
      0.312500
      0.125000
    
    
      volkswagen
      0.083333
      0.000000
      0.083333
      0.750000
      0.083333
    
    
      volvo
      0.000000
      0.000000
      0.000000
      0.727273
      0.272727



In [16]:

    
# Convert the occurrences to percentages for each column
pd.crosstab(df.make, df.body_style, normalize='columns')









    Out[16]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      0.0
      0.00
      0.142857
      0.087719
      0.0625
    
    
      mazda
      0.0
      0.00
      0.204082
      0.122807
      0.0000
    
    
      mitsubishi
      0.0
      0.00
      0.183673
      0.070175
      0.0000
    
    
      nissan
      0.0
      0.25
      0.102041
      0.157895
      0.1875
    
    
      subaru
      0.0
      0.00
      0.061224
      0.087719
      0.2500
    
    
      toyota
      0.5
      0.75
      0.285714
      0.175439
      0.2500
    
    
      volkswagen
      0.5
      0.00
      0.020408
      0.157895
      0.0625
    
    
      volvo
      0.0
      0.00
      0.000000
      0.140351
      0.1875



In [17]:

    
# If you want to make the percentages a little easier to see, multiple all values by 100
pd.crosstab(df.make, df.body_style, normalize='columns').mul(100).round(0)









    Out[17]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      0.0
      0.0
      14.0
      9.0
      6.0
    
    
      mazda
      0.0
      0.0
      20.0
      12.0
      0.0
    
    
      mitsubishi
      0.0
      0.0
      18.0
      7.0
      0.0
    
    
      nissan
      0.0
      25.0
      10.0
      16.0
      19.0
    
    
      subaru
      0.0
      0.0
      6.0
      9.0
      25.0
    
    
      toyota
      50.0
      75.0
      29.0
      18.0
      25.0
    
    
      volkswagen
      50.0
      0.0
      2.0
      16.0
      6.0
    
    
      volvo
      0.0
      0.0
      0.0
      14.0
      19.0

Custom Aggregations and Grouping



In [18]:

    
# Perform aggregation functions - not just a simple count
pd.crosstab(df.make, df.body_style, values=df.curb_weight, aggfunc='mean').round(0)









    Out[18]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      make
      
      
      
      
      
    
  
  
    
      honda
      NaN
      NaN
      1970.0
      2289.0
      2024.0
    
    
      mazda
      NaN
      NaN
      2254.0
      2361.0
      NaN
    
    
      mitsubishi
      NaN
      NaN
      2377.0
      2394.0
      NaN
    
    
      nissan
      NaN
      2008.0
      2740.0
      2238.0
      2452.0
    
    
      subaru
      NaN
      NaN
      2137.0
      2314.0
      2454.0
    
    
      toyota
      2975.0
      2585.0
      2370.0
      2338.0
      2708.0
    
    
      volkswagen
      2254.0
      NaN
      2221.0
      2342.0
      2563.0
    
    
      volvo
      NaN
      NaN
      NaN
      3023.0
      3078.0



In [19]:

    
pd.crosstab(df.make, [df.body_style, df.drive_wheels], values=df.curb_weight, aggfunc='mean').fillna('-')









    Out[19]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      drive_wheels
      fwd
      rwd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
    
    
      make
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      honda
      -
      -
      -
      -
      -
      1970
      -
      -
      2288.8
      -
      -
      2024
      -
    
    
      mazda
      -
      -
      -
      -
      -
      2148.33
      2411.25
      -
      2231.6
      2685
      -
      -
      -
    
    
      mitsubishi
      -
      -
      -
      -
      -
      2376.56
      -
      -
      2394
      -
      -
      -
      -
    
    
      nissan
      -
      -
      2008
      -
      -
      2176
      3116.33
      -
      2237.89
      -
      -
      2452.33
      -
    
    
      subaru
      -
      -
      -
      -
      2240
      2085
      -
      2447.5
      2225
      -
      2535
      2372.5
      -
    
    
      toyota
      -
      2975
      -
      2585
      -
      2177.25
      2626.83
      -
      2258.57
      2521.67
      2700
      2280
      3151
    
    
      volkswagen
      2254
      -
      -
      -
      -
      2221
      -
      -
      2342.22
      -
      -
      2563
      -
    
    
      volvo
      -
      -
      -
      -
      -
      -
      -
      -
      -
      3023
      -
      -
      3077.67



In [20]:

    
# Crosstab supports grouping as well. In this case, group the columns
pd.crosstab(df.make, [df.body_style, df.drive_wheels])









    Out[20]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      drive_wheels
      fwd
      rwd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
    
    
      make
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      honda
      0
      0
      0
      0
      0
      7
      0
      0
      5
      0
      0
      1
      0
    
    
      mazda
      0
      0
      0
      0
      0
      6
      4
      0
      5
      2
      0
      0
      0
    
    
      mitsubishi
      0
      0
      0
      0
      0
      9
      0
      0
      4
      0
      0
      0
      0
    
    
      nissan
      0
      0
      1
      0
      0
      2
      3
      0
      9
      0
      0
      3
      0
    
    
      subaru
      0
      0
      0
      0
      1
      2
      0
      2
      3
      0
      2
      2
      0
    
    
      toyota
      0
      1
      0
      3
      0
      8
      6
      0
      7
      3
      2
      1
      1
    
    
      volkswagen
      1
      0
      0
      0
      0
      1
      0
      0
      9
      0
      0
      1
      0
    
    
      volvo
      0
      0
      0
      0
      0
      0
      0
      0
      0
      8
      0
      0
      3



In [21]:

    
# A more complex example showing the grouping of rows and columns
pd.crosstab([df.make, df.num_doors], [df.body_style, df.drive_wheels], 
            rownames=['Auto Manufacturer', "Doors"], 
            colnames=['Body Style', "Drive Type"], 
            dropna=False)









    Out[21]:







  
    
      
      Body Style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      
      Drive Type
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
    
    
      Auto Manufacturer
      Doors
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      honda
      four
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      4
      0
      0
      1
      0
    
    
      two
      0
      0
      0
      0
      0
      0
      0
      7
      0
      0
      1
      0
      0
      0
      0
    
    
      mazda
      four
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      4
      2
      0
      0
      0
    
    
      two
      0
      0
      0
      0
      0
      0
      0
      5
      4
      0
      0
      0
      0
      0
      0
    
    
      mitsubishi
      four
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      4
      0
      0
      0
      0
    
    
      two
      0
      0
      0
      0
      0
      0
      0
      9
      0
      0
      0
      0
      0
      0
      0
    
    
      nissan
      four
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
      5
      0
      0
      3
      0
    
    
      two
      0
      0
      0
      0
      1
      0
      0
      1
      3
      0
      4
      0
      0
      0
      0
    
    
      subaru
      four
      0
      0
      0
      0
      0
      0
      0
      0
      0
      2
      3
      0
      2
      2
      0
    
    
      two
      0
      0
      0
      0
      0
      0
      1
      2
      0
      0
      0
      0
      0
      0
      0
    
    
      toyota
      four
      0
      0
      0
      0
      0
      0
      0
      6
      0
      0
      7
      1
      2
      1
      1
    
    
      two
      0
      0
      1
      0
      0
      3
      0
      2
      6
      0
      0
      2
      0
      0
      0
    
    
      volkswagen
      four
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      7
      0
      0
      1
      0
    
    
      two
      0
      1
      0
      0
      0
      0
      0
      1
      0
      0
      2
      0
      0
      0
      0
    
    
      volvo
      four
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      8
      0
      0
      3
    
    
      two
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0



In [22]:

    
# You can also use agg functions when grouping
pd.crosstab(df.make, [df.body_style, df.drive_wheels], values=df.curb_weight, aggfunc='mean').fillna('-')









    Out[22]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
    
    
      drive_wheels
      fwd
      rwd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
    
    
      make
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      honda
      -
      -
      -
      -
      -
      1970
      -
      -
      2288.8
      -
      -
      2024
      -
    
    
      mazda
      -
      -
      -
      -
      -
      2148.33
      2411.25
      -
      2231.6
      2685
      -
      -
      -
    
    
      mitsubishi
      -
      -
      -
      -
      -
      2376.56
      -
      -
      2394
      -
      -
      -
      -
    
    
      nissan
      -
      -
      2008
      -
      -
      2176
      3116.33
      -
      2237.89
      -
      -
      2452.33
      -
    
    
      subaru
      -
      -
      -
      -
      2240
      2085
      -
      2447.5
      2225
      -
      2535
      2372.5
      -
    
    
      toyota
      -
      2975
      -
      2585
      -
      2177.25
      2626.83
      -
      2258.57
      2521.67
      2700
      2280
      3151
    
    
      volkswagen
      2254
      -
      -
      -
      -
      2221
      -
      -
      2342.22
      -
      -
      2563
      -
    
    
      volvo
      -
      -
      -
      -
      -
      -
      -
      -
      -
      3023
      -
      -
      3077.67



In [23]:

    
# You can also use margins when grouping
pd.crosstab(df.make, [df.body_style, df.drive_wheels], 
            values=df.curb_weight, aggfunc='mean', margins=True,
            margins_name='Average').fillna('-').round(0)









    Out[23]:







  
    
      body_style
      convertible
      hardtop
      hatchback
      sedan
      wagon
      Average
    
    
      drive_wheels
      fwd
      rwd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      4wd
      fwd
      rwd
      
    
    
      make
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      honda
      -
      -
      -
      -
      -
      1970
      -
      -
      2288.8
      -
      -
      2024
      -
      2097.0
    
    
      mazda
      -
      -
      -
      -
      -
      2148.33
      2411.25
      -
      2231.6
      2685
      -
      -
      -
      2298.0
    
    
      mitsubishi
      -
      -
      -
      -
      -
      2376.56
      -
      -
      2394
      -
      -
      -
      -
      2382.0
    
    
      nissan
      -
      -
      2008
      -
      -
      2176
      3116.33
      -
      2237.89
      -
      -
      2452.33
      -
      2400.0
    
    
      subaru
      -
      -
      -
      -
      2240
      2085
      -
      2447.5
      2225
      -
      2535
      2372.5
      -
      2316.0
    
    
      toyota
      -
      2975
      -
      2585
      -
      2177.25
      2626.83
      -
      2258.57
      2521.67
      2700
      2280
      3151
      2441.0
    
    
      volkswagen
      2254
      -
      -
      -
      -
      2221
      -
      -
      2342.22
      -
      -
      2563
      -
      2343.0
    
    
      volvo
      -
      -
      -
      -
      -
      -
      -
      -
      -
      3023
      -
      -
      3077.67
      3038.0
    
    
      Average
      2254
      2975
      2008
      2585
      2240
      2178
      2673.46
      2447.5
      2282.95
      2855.31
      2617.5
      2371.12
      3096
      2406.0

Visualizing results with Seaborn



In [24]:

    
# Seaborn's heatmap can visualize the final results of the crosstab
sns.heatmap(pd.crosstab(df.drive_wheels, df.make))









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe3115831d0>



In [25]:

    
# This is a more complex customization of a heatmap
sns.heatmap(pd.crosstab([df.make, df.num_doors], [df.body_style, df.drive_wheels]), cmap="YlGnBu", 
            annot=True, cbar=False)









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe308feba58>



In [26]:

    
sns.heatmap(pd.crosstab(df.make, df.body_style, values=df.curb_weight, aggfunc='mean').round(0))









    Out[26]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe308dae358>



In [27]:

    
sns.heatmap(pd.crosstab(df.make, [df.body_style, df.drive_wheels], 
            values=df.curb_weight, aggfunc='mean', margins=True, margins_name='Average'),
           cmap="YlGnBu", annot=True, cbar=False, fmt='.0f')









    Out[27]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe308dd47f0>

	symboling	normalized_losses	make	fuel_type	aspiration	num_doors	body_style	drive_wheels	engine_location	wheel_base	...	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
0	3	NaN	alfa-romero	gas	std	two	convertible	rwd	front	88.6	...	130	mpfi	3.47	2.68	9.0	111.0	5000.0	21	27	13495.0
1	3	NaN	alfa-romero	gas	std	two	convertible	rwd	front	88.6	...	130	mpfi	3.47	2.68	9.0	111.0	5000.0	21	27	16500.0
2	1	NaN	alfa-romero	gas	std	two	hatchback	rwd	front	94.5	...	152	mpfi	2.68	3.47	9.0	154.0	5000.0	19	26	16500.0
3	2	164.0	audi	gas	std	four	sedan	fwd	front	99.8	...	109	mpfi	3.19	3.40	10.0	102.0	5500.0	24	30	13950.0
4	2	164.0	audi	gas	std	four	sedan	4wd	front	99.4	...	136	mpfi	3.19	3.40	8.0	115.0	5500.0	18	22	17450.0

	symboling	normalized_losses	wheel_base	length	width	height	curb_weight	engine_size	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
count	205.000000	164.000000	205.000000	205.000000	205.000000	205.000000	205.000000	205.000000	201.000000	201.000000	205.000000	203.000000	203.000000	205.000000	205.000000	201.000000
mean	0.834146	122.000000	98.756585	174.049268	65.907805	53.724878	2555.565854	126.907317	3.329751	3.255423	10.142537	104.256158	5125.369458	25.219512	30.751220	13207.129353
std	1.245307	35.442168	6.021776	12.337289	2.145204	2.443522	520.680204	41.642693	0.273539	0.316717	3.972040	39.714369	479.334560	6.542142	6.886443	7947.066342
min	-2.000000	65.000000	86.600000	141.100000	60.300000	47.800000	1488.000000	61.000000	2.540000	2.070000	7.000000	48.000000	4150.000000	13.000000	16.000000	5118.000000
25%	0.000000	94.000000	94.500000	166.300000	64.100000	52.000000	2145.000000	97.000000	3.150000	3.110000	8.600000	70.000000	4800.000000	19.000000	25.000000	7775.000000
50%	1.000000	115.000000	97.000000	173.200000	65.500000	54.100000	2414.000000	120.000000	3.310000	3.290000	9.000000	95.000000	5200.000000	24.000000	30.000000	10295.000000
75%	2.000000	150.000000	102.400000	183.100000	66.900000	55.500000	2935.000000	141.000000	3.590000	3.410000	9.400000	116.000000	5500.000000	30.000000	34.000000	16500.000000
max	3.000000	256.000000	120.900000	208.100000	72.300000	59.800000	4066.000000	326.000000	3.940000	4.170000	23.000000	288.000000	6600.000000	49.000000	54.000000	45400.000000

	symboling	normalized_losses	make	fuel_type	aspiration	num_doors	body_style	drive_wheels	engine_location	wheel_base	...	engine_size	fuel_system	bore	stroke	compression_ratio	horsepower	peak_rpm	city_mpg	highway_mpg	price
30	2	137.0	honda	gas	std	two	hatchback	fwd	front	86.6	...	92	1bbl	2.91	3.41	9.6	58.0	4800.0	49	54	6479.0
31	2	137.0	honda	gas	std	two	hatchback	fwd	front	86.6	...	92	1bbl	2.91	3.41	9.2	76.0	6000.0	31	38	6855.0
32	1	101.0	honda	gas	std	two	hatchback	fwd	front	93.7	...	79	1bbl	2.91	3.07	10.1	60.0	5500.0	38	42	5399.0
33	1	101.0	honda	gas	std	two	hatchback	fwd	front	93.7	...	92	1bbl	2.91	3.41	9.2	76.0	6000.0	30	34	6529.0
34	1	101.0	honda	gas	std	two	hatchback	fwd	front	93.7	...	92	1bbl	2.91	3.41	9.2	76.0	6000.0	30	34	7129.0

num_doors	four	two
make
honda	5	8
mazda	7	9
mitsubishi	4	9
nissan	9	9
subaru	9	3
toyota	18	14
volkswagen	8	4
volvo	11	0

Body Style	convertible	hardtop	hatchback	sedan	wagon
Auto Manufacturer
honda	0	0	7	5	1
mazda	0	0	10	7	0
mitsubishi	0	0	9	4	0
nissan	0	1	5	9	3
subaru	0	0	3	5	4
toyota	1	3	14	10	4
volkswagen	1	0	1	9	1
volvo	0	0	0	8	3

body_style	convertible	hardtop	hatchback	sedan	wagon
make
honda	0.0	0.00	0.142857	0.087719	0.0625
mazda	0.0	0.00	0.204082	0.122807	0.0000
mitsubishi	0.0	0.00	0.183673	0.070175	0.0000
nissan	0.0	0.25	0.102041	0.157895	0.1875
subaru	0.0	0.00	0.061224	0.087719	0.2500
toyota	0.5	0.75	0.285714	0.175439	0.2500
volkswagen	0.5	0.00	0.020408	0.157895	0.0625
volvo	0.0	0.00	0.000000	0.140351	0.1875

body_style	convertible	hardtop	hatchback	sedan	wagon
make
honda	0.0	0.0	14.0	9.0	6.0
mazda	0.0	0.0	20.0	12.0	0.0
mitsubishi	0.0	0.0	18.0	7.0	0.0
nissan	0.0	25.0	10.0	16.0	19.0
subaru	0.0	0.0	6.0	9.0	25.0
toyota	50.0	75.0	29.0	18.0	25.0
volkswagen	50.0	0.0	2.0	16.0	6.0
volvo	0.0	0.0	0.0	14.0	19.0

body_style	convertible	hardtop	hatchback	sedan	wagon
make
honda	NaN	NaN	1970.0	2289.0	2024.0
mazda	NaN	NaN	2254.0	2361.0	NaN
mitsubishi	NaN	NaN	2377.0	2394.0	NaN
nissan	NaN	2008.0	2740.0	2238.0	2452.0
subaru	NaN	NaN	2137.0	2314.0	2454.0
toyota	2975.0	2585.0	2370.0	2338.0	2708.0
volkswagen	2254.0	NaN	2221.0	2342.0	2563.0
volvo	NaN	NaN	NaN	3023.0	3078.0

body_style	convertible		hardtop		hatchback			sedan			wagon
drive_wheels	fwd	rwd	fwd	rwd	4wd	fwd	rwd	4wd	fwd	rwd	4wd	fwd	rwd
make
honda	-	-	-	-	-	1970	-	-	2288.8	-	-	2024	-
mazda	-	-	-	-	-	2148.33	2411.25	-	2231.6	2685	-	-	-
mitsubishi	-	-	-	-	-	2376.56	-	-	2394	-	-	-	-
nissan	-	-	2008	-	-	2176	3116.33	-	2237.89	-	-	2452.33	-
subaru	-	-	-	-	2240	2085	-	2447.5	2225	-	2535	2372.5	-
toyota	-	2975	-	2585	-	2177.25	2626.83	-	2258.57	2521.67	2700	2280	3151
volkswagen	2254	-	-	-	-	2221	-	-	2342.22	-	-	2563	-
volvo	-	-	-	-	-	-	-	-	-	3023	-	-	3077.67

	Body Style	convertible			hardtop			hatchback			sedan			wagon
	Drive Type	4wd	fwd	rwd	4wd	fwd	rwd	4wd	fwd	rwd	4wd	fwd	rwd	4wd	fwd	rwd
Auto Manufacturer	Doors
honda	four	0	0	0	0	0	0	0	0	0	0	4	0	0	1	0
honda	two	0	0	0	0	0	0	0	7	0	0	1	0	0	0	0
mazda	four	0	0	0	0	0	0	0	1	0	0	4	2	0	0	0
mazda	two	0	0	0	0	0	0	0	5	4	0	0	0	0	0	0
mitsubishi	four	0	0	0	0	0	0	0	0	0	0	4	0	0	0	0
mitsubishi	two	0	0	0	0	0	0	0	9	0	0	0	0	0	0	0
nissan	four	0	0	0	0	0	0	0	1	0	0	5	0	0	3	0
nissan	two	0	0	0	0	1	0	0	1	3	0	4	0	0	0	0
subaru	four	0	0	0	0	0	0	0	0	0	2	3	0	2	2	0
subaru	two	0	0	0	0	0	0	1	2	0	0	0	0	0	0	0
toyota	four	0	0	0	0	0	0	0	6	0	0	7	1	2	1	1
toyota	two	0	0	1	0	0	3	0	2	6	0	0	2	0	0	0
volkswagen	four	0	0	0	0	0	0	0	0	0	0	7	0	0	1	0
volkswagen	two	0	1	0	0	0	0	0	1	0	0	2	0	0	0	0
volvo	four	0	0	0	0	0	0	0	0	0	0	0	8	0	0	3
volvo	two	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0