notebook.community

Edit and run



In [1]:

    
import pandas as pd #importing packages
import os as os



In [2]:

    
pd.describe_option() #describe options for customizing









    



display.chop_threshold : float or None
    if set to a float value, all float values smaller then the given threshold
    will be displayed as exactly 0 by repr and friends.
    [default: None] [currently: None]

display.colheader_justify : 'left'/'right'
    Controls the justification of column headers. used by DataFrameFormatter.
    [default: right] [currently: right]

display.column_space No description available.
    [default: 12] [currently: 12]

display.date_dayfirst : boolean
    When True, prints and parses dates with the day first, eg 20/01/2005
    [default: False] [currently: False]

display.date_yearfirst : boolean
    When True, prints and parses dates with the year first, eg 2005/01/20
    [default: False] [currently: False]

display.encoding : str/unicode
    Defaults to the detected encoding of the console.
    Specifies the encoding to be used for strings returned by to_string,
    these are generally strings meant to be displayed on the console.
    [default: UTF-8] [currently: UTF-8]

display.expand_frame_repr : boolean
    Whether to print out the full DataFrame repr for wide DataFrames across
    multiple lines, `max_columns` is still respected, but the output will
    wrap-around across multiple "pages" if its width exceeds `display.width`.
    [default: True] [currently: True]

display.float_format : callable
    The callable should accept a floating point number and return
    a string with the desired format of the number. This is used
    in some places like SeriesFormatter.
    See formats.format.EngFormatter for an example.
    [default: None] [currently: None]

display.height : int
    Deprecated.
    [default: 60] [currently: 60]
    (Deprecated, use `display.max_rows` instead.)

display.large_repr : 'truncate'/'info'
    For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can
    show a truncated table (the default from 0.13), or switch to the view from
    df.info() (the behaviour in earlier versions of pandas).
    [default: truncate] [currently: truncate]

display.latex.escape : bool
    This specifies if the to_latex method of a Dataframe uses escapes special
    characters.
    method. Valid values: False,True
    [default: True] [currently: True]

display.latex.longtable :bool
    This specifies if the to_latex method of a Dataframe uses the longtable
    format.
    method. Valid values: False,True
    [default: False] [currently: False]

display.latex.repr : boolean
    Whether to produce a latex DataFrame representation for jupyter
    environments that support it.
    (default: False)
    [default: False] [currently: False]

display.line_width : int
    Deprecated.
    [default: 80] [currently: 80]
    (Deprecated, use `display.width` instead.)

display.max_categories : int
    This sets the maximum number of categories pandas should output when
    printing out a `Categorical` or a Series of dtype "category".
    [default: 8] [currently: 8]

display.max_columns : int
    If max_cols is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the width of the terminal and print a truncated object which fits
    the screen width. The IPython notebook, IPython qtconsole, or IDLE
    do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 20] [currently: 20]

display.max_colwidth : int
    The maximum width in characters of a column in the repr of
    a pandas data structure. When the column overflows, a "..."
    placeholder is embedded in the output.
    [default: 50] [currently: 50]

display.max_info_columns : int
    max_info_columns is used in DataFrame.info method to decide if
    per column information will be printed.
    [default: 100] [currently: 100]

display.max_info_rows : int or None
    df.info() will usually show null-counts for each column.
    For large frames this can be quite slow. max_info_rows and max_info_cols
    limit this null check only to frames with smaller dimensions than
    specified.
    [default: 1690785] [currently: 1690785]

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]

display.max_seq_items : int or None
    when pretty-printing a long sequence, no more then `max_seq_items`
    will be printed. If items are omitted, they will be denoted by the
    addition of "..." to the resulting string.

    If set to None, the number of items to be printed is unlimited.
    [default: 100] [currently: 100]

display.memory_usage : bool, string or None
    This specifies if the memory usage of a DataFrame should be displayed when
    df.info() is called. Valid values True,False,'deep'
    [default: True] [currently: True]

display.mpl_style : bool
    Setting this to 'default' will modify the rcParams used by matplotlib
    to give plots a more pleasing visual style by default.
    Setting this to None/False restores the values to their initial value.
    [default: None] [currently: None]

display.multi_sparse : boolean
    "sparsify" MultiIndex display (don't display repeated
    elements in outer levels within groups)
    [default: True] [currently: True]

display.notebook_repr_html : boolean
    When True, IPython notebook will use html representation for
    pandas objects (if it is available).
    [default: True] [currently: True]

display.pprint_nest_depth : int
    Controls the number of nested levels to process when pretty-printing
    [default: 3] [currently: 3]

display.precision : int
    Floating point output precision (number of significant digits). This is
    only a suggestion
    [default: 6] [currently: 6]

display.show_dimensions : boolean or 'truncate'
    Whether to print out dimensions at the end of DataFrame repr.
    If 'truncate' is specified, only print out the dimensions if the
    frame is truncated (e.g. not display all rows and/or columns)
    [default: truncate] [currently: truncate]

display.unicode.ambiguous_as_wide : boolean
    Whether to use the Unicode East Asian Width to calculate the display text
    width.
    Enabling this may affect to the performance (default: False)
    [default: False] [currently: False]

display.unicode.east_asian_width : boolean
    Whether to use the Unicode East Asian Width to calculate the display text
    width.
    Enabling this may affect to the performance (default: False)
    [default: False] [currently: False]

display.width : int
    Width of the display in characters. In case python/IPython is running in
    a terminal this can be set to None and pandas will correctly auto-detect
    the width.
    Note that the IPython notebook, IPython qtconsole, or IDLE do not run in a
    terminal and hence it is not possible to correctly detect the width.
    [default: 80] [currently: 80]

io.excel.xls.writer : string
    The default Excel writer engine for 'xls' files. Available options:
    'xlwt' (the default).
    [default: xlwt] [currently: xlwt]

io.excel.xlsm.writer : string
    The default Excel writer engine for 'xlsm' files. Available options:
    'openpyxl' (the default).
    [default: openpyxl] [currently: openpyxl]

io.excel.xlsx.writer : string
    The default Excel writer engine for 'xlsx' files. Available options:
    'xlsxwriter' (the default), 'openpyxl'.
    [default: xlsxwriter] [currently: xlsxwriter]

io.hdf.default_format : format
    default format writing format, if None, then
    put will default to 'fixed' and append will default to 'table'
    [default: None] [currently: None]

io.hdf.dropna_table : boolean
    drop ALL nan rows when appending to a table
    [default: False] [currently: False]

mode.chained_assignment : string
    Raise an exception, warn, or no action if trying to use chained assignment,
    The default is warn
    [default: warn] [currently: warn]

mode.sim_interactive : boolean
    Whether to simulate interactive mode for purposes of testing
    [default: False] [currently: False]

mode.use_inf_as_null : boolean
    True means treat None, NaN, INF, -INF as null (old way),
    False means None and NaN are null, but INF, -INF are not null
    (new way).
    [default: False] [currently: False]



In [3]:

    
pd.get_option("display.memory_usage")#setting some options









    Out[3]:





True



In [5]:

    
adult=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",header=None)



In [6]:

    
adult.head()









    Out[6]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [7]:

    
names2=["age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship","race","sex","capital-gain","capital-loss","hours-per-week","native-country","income"]



In [8]:

    
adult.columns=names2



In [9]:

    
adult.head()









    Out[9]:






  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [10]:

    
adult.describe()









    Out[10]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      count
      32561.000000
      3.256100e+04
      32561.000000
      32561.000000
      32561.000000
      32561.000000
    
    
      mean
      38.581647
      1.897784e+05
      10.080679
      1077.648844
      87.303830
      40.437456
    
    
      std
      13.640433
      1.055500e+05
      2.572720
      7385.292085
      402.960219
      12.347429
    
    
      min
      17.000000
      1.228500e+04
      1.000000
      0.000000
      0.000000
      1.000000
    
    
      25%
      28.000000
      1.178270e+05
      9.000000
      0.000000
      0.000000
      40.000000
    
    
      50%
      37.000000
      1.783560e+05
      10.000000
      0.000000
      0.000000
      40.000000
    
    
      75%
      48.000000
      2.370510e+05
      12.000000
      0.000000
      0.000000
      45.000000
    
    
      max
      90.000000
      1.484705e+06
      16.000000
      99999.000000
      4356.000000
      99.000000



In [22]:

    
adult.quantile([.1,.5])









    Out[22]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      0.1
      22.0
      65716.0
      7.0
      0.0
      0.0
      24.0
    
    
      0.5
      37.0
      178356.0
      10.0
      0.0
      0.0
      40.0



In [21]:

    
adult.quantile([.1,.5,.10,.25,.50,.75,.90,.95,.99])









    Out[21]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      0.10
      22.0
      65716.0
      7.0
      0.0
      0.0
      24.0
    
    
      0.50
      37.0
      178356.0
      10.0
      0.0
      0.0
      40.0
    
    
      0.10
      22.0
      65716.0
      7.0
      0.0
      0.0
      24.0
    
    
      0.25
      28.0
      117827.0
      9.0
      0.0
      0.0
      40.0
    
    
      0.50
      37.0
      178356.0
      10.0
      0.0
      0.0
      40.0
    
    
      0.75
      48.0
      237051.0
      12.0
      0.0
      0.0
      45.0
    
    
      0.90
      58.0
      329054.0
      13.0
      0.0
      0.0
      55.0
    
    
      0.95
      63.0
      379682.0
      14.0
      5013.0
      0.0
      60.0
    
    
      0.99
      74.0
      510072.0
      16.0
      15024.0
      1980.0
      80.0



In [ ]:



In [15]:

    
adult.corr()









    Out[15]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      age
      1.000000
      -0.076646
      0.036527
      0.077674
      0.057775
      0.068756
    
    
      fnlwgt
      -0.076646
      1.000000
      -0.043195
      0.000432
      -0.010252
      -0.018768
    
    
      education-num
      0.036527
      -0.043195
      1.000000
      0.122630
      0.079923
      0.148123
    
    
      capital-gain
      0.077674
      0.000432
      0.122630
      1.000000
      -0.031615
      0.078409
    
    
      capital-loss
      0.057775
      -0.010252
      0.079923
      -0.031615
      1.000000
      0.054256
    
    
      hours-per-week
      0.068756
      -0.018768
      0.148123
      0.078409
      0.054256
      1.000000



In [14]:

    
adult.corr(method='pearson', min_periods=1)









    Out[14]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
  
  
    
      age
      1.000000
      -0.076646
      0.036527
      0.077674
      0.057775
      0.068756
    
    
      fnlwgt
      -0.076646
      1.000000
      -0.043195
      0.000432
      -0.010252
      -0.018768
    
    
      education-num
      0.036527
      -0.043195
      1.000000
      0.122630
      0.079923
      0.148123
    
    
      capital-gain
      0.077674
      0.000432
      0.122630
      1.000000
      -0.031615
      0.078409
    
    
      capital-loss
      0.057775
      -0.010252
      0.079923
      -0.031615
      1.000000
      0.054256
    
    
      hours-per-week
      0.068756
      -0.018768
      0.148123
      0.078409
      0.054256
      1.000000



In [12]:

    
adult.race.value_counts()









    Out[12]:





 White                 27816
 Black                  3124
 Asian-Pac-Islander     1039
 Amer-Indian-Eskimo      311
 Other                   271
Name: race, dtype: int64



In [13]:

    
adult.sex.value_counts()









    Out[13]:





 Male      21790
 Female    10771
Name: sex, dtype: int64



In [23]:

    
pd.crosstab(adult.race,adult.sex)









    Out[23]:






  
    
      sex
      Female
      Male
    
    
      race
      
      
    
  
  
    
      Amer-Indian-Eskimo
      119
      192
    
    
      Asian-Pac-Islander
      346
      693
    
    
      Black
      1555
      1569
    
    
      Other
      109
      162
    
    
      White
      8642
      19174



In [24]:

    
pd.crosstab(adult.race,adult.income)









    Out[24]:






  
    
      income
      <=50K
      >50K
    
    
      race
      
      
    
  
  
    
      Amer-Indian-Eskimo
      275
      36
    
    
      Asian-Pac-Islander
      763
      276
    
    
      Black
      2737
      387
    
    
      Other
      246
      25
    
    
      White
      20699
      7117



In [25]:

    
workclass=adult.groupby("workclass")



In [26]:

    
workclass.count()









    Out[26]:






  
    
      
      age
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
    
      workclass
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      ?
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
      1836
    
    
      Federal-gov
      960
      960
      960
      960
      960
      960
      960
      960
      960
      960
      960
      960
      960
      960
    
    
      Local-gov
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
      2093
    
    
      Never-worked
      7
      7
      7
      7
      7
      7
      7
      7
      7
      7
      7
      7
      7
      7
    
    
      Private
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
      22696
    
    
      Self-emp-inc
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
      1116
    
    
      Self-emp-not-inc
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
      2541
    
    
      State-gov
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
      1298
    
    
      Without-pay
      14
      14
      14
      14
      14
      14
      14
      14
      14
      14
      14
      14
      14
      14



In [27]:

    
workclass.mean()









    Out[27]:






  
    
      
      age
      fnlwgt
      education-num
      capital-gain
      capital-loss
      hours-per-week
    
    
      workclass
      
      
      
      
      
      
    
  
  
    
      ?
      40.960240
      188516.338235
      9.260349
      606.795752
      60.760349
      31.919390
    
    
      Federal-gov
      42.590625
      185221.243750
      10.973958
      833.232292
      112.268750
      41.379167
    
    
      Local-gov
      41.751075
      188639.712852
      11.042045
      880.202580
      109.854276
      40.982800
    
    
      Never-worked
      20.571429
      225989.571429
      7.428571
      0.000000
      0.000000
      28.428571
    
    
      Private
      36.797585
      192764.114734
      9.879714
      889.217792
      80.008724
      40.267096
    
    
      Self-emp-inc
      46.017025
      175981.344086
      11.137097
      4875.693548
      155.138889
      48.818100
    
    
      Self-emp-not-inc
      44.969697
      175608.641480
      10.226289
      1886.061787
      116.631641
      44.421881
    
    
      State-gov
      39.436055
      184136.613251
      11.375963
      701.699538
      83.256549
      39.031587
    
    
      Without-pay
      47.785714
      174267.500000
      9.071429
      487.857143
      0.000000
      32.714286



In [28]:

    
adult.transpose()









    Out[28]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      32551
      32552
      32553
      32554
      32555
      32556
      32557
      32558
      32559
      32560
    
  
  
    
      age
      39
      50
      38
      53
      28
      37
      49
      52
      31
      42
      ...
      32
      43
      32
      53
      22
      27
      40
      58
      22
      52
    
    
      workclass
      State-gov
      Self-emp-not-inc
      Private
      Private
      Private
      Private
      Private
      Self-emp-not-inc
      Private
      Private
      ...
      Private
      Private
      Private
      Private
      Private
      Private
      Private
      Private
      Private
      Self-emp-inc
    
    
      fnlwgt
      77516
      83311
      215646
      234721
      338409
      284582
      160187
      209642
      45781
      159449
      ...
      34066
      84661
      116138
      321865
      310152
      257302
      154374
      151910
      201490
      287927
    
    
      education
      Bachelors
      Bachelors
      HS-grad
      11th
      Bachelors
      Masters
      9th
      HS-grad
      Masters
      Bachelors
      ...
      10th
      Assoc-voc
      Masters
      Masters
      Some-college
      Assoc-acdm
      HS-grad
      HS-grad
      HS-grad
      HS-grad
    
    
      education-num
      13
      13
      9
      7
      13
      14
      5
      9
      14
      13
      ...
      6
      11
      14
      14
      10
      12
      9
      9
      9
      9
    
    
      marital-status
      Never-married
      Married-civ-spouse
      Divorced
      Married-civ-spouse
      Married-civ-spouse
      Married-civ-spouse
      Married-spouse-absent
      Married-civ-spouse
      Never-married
      Married-civ-spouse
      ...
      Married-civ-spouse
      Married-civ-spouse
      Never-married
      Married-civ-spouse
      Never-married
      Married-civ-spouse
      Married-civ-spouse
      Widowed
      Never-married
      Married-civ-spouse
    
    
      occupation
      Adm-clerical
      Exec-managerial
      Handlers-cleaners
      Handlers-cleaners
      Prof-specialty
      Exec-managerial
      Other-service
      Exec-managerial
      Prof-specialty
      Exec-managerial
      ...
      Handlers-cleaners
      Sales
      Tech-support
      Exec-managerial
      Protective-serv
      Tech-support
      Machine-op-inspct
      Adm-clerical
      Adm-clerical
      Exec-managerial
    
    
      relationship
      Not-in-family
      Husband
      Not-in-family
      Husband
      Wife
      Wife
      Not-in-family
      Husband
      Not-in-family
      Husband
      ...
      Husband
      Husband
      Not-in-family
      Husband
      Not-in-family
      Wife
      Husband
      Unmarried
      Own-child
      Wife
    
    
      race
      White
      White
      White
      Black
      Black
      White
      Black
      White
      White
      White
      ...
      Amer-Indian-Eskimo
      White
      Asian-Pac-Islander
      White
      White
      White
      White
      White
      White
      White
    
    
      sex
      Male
      Male
      Male
      Male
      Female
      Female
      Female
      Male
      Female
      Male
      ...
      Male
      Male
      Male
      Male
      Male
      Female
      Male
      Female
      Male
      Female
    
    
      capital-gain
      2174
      0
      0
      0
      0
      0
      0
      0
      14084
      5178
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      15024
    
    
      capital-loss
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      hours-per-week
      40
      13
      40
      40
      40
      40
      16
      45
      50
      40
      ...
      40
      45
      11
      40
      40
      38
      40
      40
      20
      40
    
    
      native-country
      United-States
      United-States
      United-States
      United-States
      Cuba
      United-States
      Jamaica
      United-States
      United-States
      United-States
      ...
      United-States
      United-States
      Taiwan
      United-States
      United-States
      United-States
      United-States
      United-States
      United-States
      United-States
    
    
      income
      <=50K
      <=50K
      <=50K
      <=50K
      <=50K
      <=50K
      <=50K
      >50K
      >50K
      >50K
      ...
      <=50K
      <=50K
      <=50K
      >50K
      <=50K
      <=50K
      >50K
      <=50K
      <=50K
      >50K
    
  

15 rows × 32561 columns



In [29]:

    
e=adult.groupby(['sex', "race"]).age.median().reset_index()
e.pivot(index='sex', columns='race', values='age')









    Out[29]:






  
    
      race
      Amer-Indian-Eskimo
      Asian-Pac-Islander
      Black
      Other
      White
    
    
      sex
      
      
      
      
      
    
  
  
    
      Female
      36
      33
      37
      29
      35
    
    
      Male
      35
      37
      36
      32
      38



In [30]:

    
import pandasql as pdsql



In [32]:

    
str1="select *  from adult limit 3;"



In [33]:

    
df1=pdsql.sqldf(str1)
df1









    Out[33]:






  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      income
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K



In [ ]:

	0	1	2	3	4	5	6	7	8	9	10	12	13	14
0	39	State-gov	77516	Bachelors	13	Never-married	Adm-clerical	Not-in-family	White	Male	2174	40	United-States	<=50K
1	50	Self-emp-not-inc	83311	Bachelors	13	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	13	United-States	<=50K
2	38	Private	215646	HS-grad	9	Divorced	Handlers-cleaners	Not-in-family	White	Male	0	40	United-States	<=50K
3	53	Private	234721	11th	7	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0	40	United-States	<=50K
4	28	Private	338409	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	Black	Female	0	40	Cuba	<=50K

	age	fnlwgt	education-num	capital-gain	capital-loss	hours-per-week
count	32561.000000	3.256100e+04	32561.000000	32561.000000	32561.000000	32561.000000
mean	38.581647	1.897784e+05	10.080679	1077.648844	87.303830	40.437456
std	13.640433	1.055500e+05	2.572720	7385.292085	402.960219	12.347429
min	17.000000	1.228500e+04	1.000000	0.000000	0.000000	1.000000
25%	28.000000	1.178270e+05	9.000000	0.000000	0.000000	40.000000
50%	37.000000	1.783560e+05	10.000000	0.000000	0.000000	40.000000
75%	48.000000	2.370510e+05	12.000000	0.000000	0.000000	45.000000
max	90.000000	1.484705e+06	16.000000	99999.000000	4356.000000	99.000000

	age	fnlwgt	education-num	capital-gain	capital-loss	hours-per-week
0.10	22.0	65716.0	7.0	0.0	0.0	24.0
0.50	37.0	178356.0	10.0	0.0	0.0	40.0
0.10	22.0	65716.0	7.0	0.0	0.0	24.0
0.25	28.0	117827.0	9.0	0.0	0.0	40.0
0.50	37.0	178356.0	10.0	0.0	0.0	40.0
0.75	48.0	237051.0	12.0	0.0	0.0	45.0
0.90	58.0	329054.0	13.0	0.0	0.0	55.0
0.95	63.0	379682.0	14.0	5013.0	0.0	60.0
0.99	74.0	510072.0	16.0	15024.0	1980.0	80.0

	age	fnlwgt	education-num	capital-gain	capital-loss	hours-per-week
age	1.000000	-0.076646	0.036527	0.077674	0.057775	0.068756
fnlwgt	-0.076646	1.000000	-0.043195	0.000432	-0.010252	-0.018768
education-num	0.036527	-0.043195	1.000000	0.122630	0.079923	0.148123
capital-gain	0.077674	0.000432	0.122630	1.000000	-0.031615	0.078409
capital-loss	0.057775	-0.010252	0.079923	-0.031615	1.000000	0.054256
hours-per-week	0.068756	-0.018768	0.148123	0.078409	0.054256	1.000000

sex	Female	Male
race
Amer-Indian-Eskimo	119	192
Asian-Pac-Islander	346	693
Black	1555	1569
Other	109	162
White	8642	19174

	age	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	capital-loss	hours-per-week	native-country	income
workclass
?	1836	1836	1836	1836	1836	1836	1836	1836	1836	1836	1836	1836	1836	1836
Federal-gov	960	960	960	960	960	960	960	960	960	960	960	960	960	960
Local-gov	2093	2093	2093	2093	2093	2093	2093	2093	2093	2093	2093	2093	2093	2093
Never-worked	7	7	7	7	7	7	7	7	7	7	7	7	7	7
Private	22696	22696	22696	22696	22696	22696	22696	22696	22696	22696	22696	22696	22696	22696
Self-emp-inc	1116	1116	1116	1116	1116	1116	1116	1116	1116	1116	1116	1116	1116	1116
Self-emp-not-inc	2541	2541	2541	2541	2541	2541	2541	2541	2541	2541	2541	2541	2541	2541
State-gov	1298	1298	1298	1298	1298	1298	1298	1298	1298	1298	1298	1298	1298	1298
Without-pay	14	14	14	14	14	14	14	14	14	14	14	14	14	14

	age	fnlwgt	education-num	capital-gain	capital-loss	hours-per-week
workclass
?	40.960240	188516.338235	9.260349	606.795752	60.760349	31.919390
Federal-gov	42.590625	185221.243750	10.973958	833.232292	112.268750	41.379167
Local-gov	41.751075	188639.712852	11.042045	880.202580	109.854276	40.982800
Never-worked	20.571429	225989.571429	7.428571	0.000000	0.000000	28.428571
Private	36.797585	192764.114734	9.879714	889.217792	80.008724	40.267096
Self-emp-inc	46.017025	175981.344086	11.137097	4875.693548	155.138889	48.818100
Self-emp-not-inc	44.969697	175608.641480	10.226289	1886.061787	116.631641	44.421881
State-gov	39.436055	184136.613251	11.375963	701.699538	83.256549	39.031587
Without-pay	47.785714	174267.500000	9.071429	487.857143	0.000000	32.714286