Tabular example



In [ ]:

    
from fastai.tabular import *  # Quick accesss to tabular functionality

Tabular data should be in a Pandas DataFrame.



In [ ]:

    
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')



In [ ]:

    
df['salary'].unique()









    Out[ ]:





array(['>=50k', '<50k'], dtype=object)



In [ ]:

    
# function import
from fastai.utils.mem import *



In [ ]:

    
# other function teset
gpu_with_max_free_mem()









    Out[ ]:





(0, 7812)



In [ ]:

    
# test reduce_mem_usage(df)



In [ ]:

    
df.head()









    Out[ ]:







  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      salary
    
  
  
    
      0
      49
      Private
      101320
      Assoc-acdm
      12.0
      Married-civ-spouse
      NaN
      Wife
      White
      Female
      0
      1902
      40
      United-States
      >=50k
    
    
      1
      44
      Private
      236746
      Masters
      14.0
      Divorced
      Exec-managerial
      Not-in-family
      White
      Male
      10520
      0
      45
      United-States
      >=50k
    
    
      2
      38
      Private
      96185
      HS-grad
      NaN
      Divorced
      NaN
      Unmarried
      Black
      Female
      0
      0
      32
      United-States
      <50k
    
    
      3
      38
      Self-emp-inc
      112847
      Prof-school
      15.0
      Married-civ-spouse
      Prof-specialty
      Husband
      Asian-Pac-Islander
      Male
      0
      0
      40
      United-States
      >=50k
    
    
      4
      42
      Self-emp-not-inc
      82297
      7th-8th
      NaN
      Married-civ-spouse
      Other-service
      Wife
      Black
      Female
      0
      0
      50
      United-States
      <50k



In [ ]:

    
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]



In [ ]:

    
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)



In [ ]:

    
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(800,1000)))
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())



In [ ]:

    
data.show_batch(rows=10)









    





  
    
      workclass
      education
      marital-status
      occupation
      relationship
      race
      education-num_na
      age
      fnlwgt
      education-num
      target
    
  
  
    
      Private
      Bachelors
      Never-married
      Exec-managerial
      Not-in-family
      White
      False
      -1.1425
      -0.9280
      1.1422
      <50k
    
    
      Private
      HS-grad
      Never-married
      Other-service
      Not-in-family
      White
      False
      -0.5561
      0.7244
      -0.4224
      <50k
    
    
      Private
      Some-college
      Never-married
      Other-service
      Own-child
      White
      False
      -1.5090
      -0.1673
      -0.0312
      <50k
    
    
      Private
      HS-grad
      Divorced
      Adm-clerical
      Other-relative
      Amer-Indian-Eskimo
      False
      1.2763
      -0.8370
      -0.4224
      <50k
    
    
      Local-gov
      Bachelors
      Divorced
      Transport-moving
      Not-in-family
      White
      False
      0.2502
      -1.3617
      1.1422
      <50k
    
    
      Private
      Some-college
      Married-civ-spouse
      Prof-specialty
      Husband
      White
      False
      -0.8493
      -0.3286
      -0.0312
      >=50k
    
    
      Private
      11th
      Never-married
      Prof-specialty
      Own-child
      White
      False
      -1.5090
      0.8521
      -1.2046
      <50k
    
    
      Private
      7th-8th
      Married-civ-spouse
      Tech-support
      Husband
      White
      False
      -0.2629
      0.0550
      -2.3781
      <50k
    
    
      Private
      HS-grad
      Never-married
      Transport-moving
      Not-in-family
      White
      False
      -0.8493
      -0.3286
      -0.4224
      <50k
    
    
      Private
      Bachelors
      Never-married
      Adm-clerical
      Not-in-family
      White
      False
      -0.7760
      1.3159
      1.1422
      <50k



In [ ]:

    
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)









    




Total time: 00:02 

  
    
      epoch
      train_loss
      valid_loss
      accuracy
      time
    
  
  
    
      0
      0.357122
      0.381649
      0.790000
      00:02

Inference



In [ ]:

    
row = df.iloc[0]



In [ ]:

    
learn.predict(row)









    Out[ ]:





(Category >=50k, tensor(1), tensor([0.3581, 0.6419]))



In [ ]:

	age	workclass	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	capital-loss	hours-per-week	native-country	salary
0	49	Private	101320	Assoc-acdm	12.0	Married-civ-spouse	NaN	Wife	White	Female	0	1902	40	United-States	>=50k
1	44	Private	236746	Masters	14.0	Divorced	Exec-managerial	Not-in-family	White	Male	10520	0	45	United-States	>=50k
2	38	Private	96185	HS-grad	NaN	Divorced	NaN	Unmarried	Black	Female	0	0	32	United-States	<50k
3	38	Self-emp-inc	112847	Prof-school	15.0	Married-civ-spouse	Prof-specialty	Husband	Asian-Pac-Islander	Male	0	0	40	United-States	>=50k
4	42	Self-emp-not-inc	82297	7th-8th	NaN	Married-civ-spouse	Other-service	Wife	Black	Female	0	0	50	United-States	<50k

workclass	education	marital-status	occupation	relationship	race	education-num_na	age	fnlwgt	education-num	target
Private	Bachelors	Never-married	Exec-managerial	Not-in-family	White	False	-1.1425	-0.9280	1.1422	<50k
Private	HS-grad	Never-married	Other-service	Not-in-family	White	False	-0.5561	0.7244	-0.4224	<50k
Private	Some-college	Never-married	Other-service	Own-child	White	False	-1.5090	-0.1673	-0.0312	<50k
Private	HS-grad	Divorced	Adm-clerical	Other-relative	Amer-Indian-Eskimo	False	1.2763	-0.8370	-0.4224	<50k
Local-gov	Bachelors	Divorced	Transport-moving	Not-in-family	White	False	0.2502	-1.3617	1.1422	<50k
Private	Some-college	Married-civ-spouse	Prof-specialty	Husband	White	False	-0.8493	-0.3286	-0.0312	>=50k
Private	11th	Never-married	Prof-specialty	Own-child	White	False	-1.5090	0.8521	-1.2046	<50k
Private	7th-8th	Married-civ-spouse	Tech-support	Husband	White	False	-0.2629	0.0550	-2.3781	<50k
Private	HS-grad	Never-married	Transport-moving	Not-in-family	White	False	-0.8493	-0.3286	-0.4224	<50k
Private	Bachelors	Never-married	Adm-clerical	Not-in-family	White	False	-0.7760	1.3159	1.1422	<50k