Tabular example


In [ ]:
from fastai.tabular import *  # Quick accesss to tabular functionality

Tabular data should be in a Pandas DataFrame.


In [ ]:
path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')

In [ ]:
df['salary'].unique()


Out[ ]:
array(['>=50k', '<50k'], dtype=object)

In [ ]:
# function import
from fastai.utils.mem import *

In [ ]:
# other function teset
gpu_with_max_free_mem()


Out[ ]:
(0, 7812)

In [ ]:
# test reduce_mem_usage(df)

In [ ]:
df.head()


Out[ ]:
age workclass fnlwgt education education-num marital-status occupation relationship race sex capital-gain capital-loss hours-per-week native-country salary
0 49 Private 101320 Assoc-acdm 12.0 Married-civ-spouse NaN Wife White Female 0 1902 40 United-States >=50k
1 44 Private 236746 Masters 14.0 Divorced Exec-managerial Not-in-family White Male 10520 0 45 United-States >=50k
2 38 Private 96185 HS-grad NaN Divorced NaN Unmarried Black Female 0 0 32 United-States <50k
3 38 Self-emp-inc 112847 Prof-school 15.0 Married-civ-spouse Prof-specialty Husband Asian-Pac-Islander Male 0 0 40 United-States >=50k
4 42 Self-emp-not-inc 82297 7th-8th NaN Married-civ-spouse Other-service Wife Black Female 0 0 50 United-States <50k

In [ ]:
dep_var = 'salary'
cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race']
cont_names = ['age', 'fnlwgt', 'education-num']
procs = [FillMissing, Categorify, Normalize]

In [ ]:
test = TabularList.from_df(df.iloc[800:1000].copy(), path=path, cat_names=cat_names, cont_names=cont_names)

In [ ]:
data = (TabularList.from_df(df, path=path, cat_names=cat_names, cont_names=cont_names, procs=procs)
                           .split_by_idx(list(range(800,1000)))
                           .label_from_df(cols=dep_var)
                           .add_test(test)
                           .databunch())

In [ ]:
data.show_batch(rows=10)


workclass education marital-status occupation relationship race education-num_na age fnlwgt education-num target
Private Bachelors Never-married Exec-managerial Not-in-family White False -1.1425 -0.9280 1.1422 <50k
Private HS-grad Never-married Other-service Not-in-family White False -0.5561 0.7244 -0.4224 <50k
Private Some-college Never-married Other-service Own-child White False -1.5090 -0.1673 -0.0312 <50k
Private HS-grad Divorced Adm-clerical Other-relative Amer-Indian-Eskimo False 1.2763 -0.8370 -0.4224 <50k
Local-gov Bachelors Divorced Transport-moving Not-in-family White False 0.2502 -1.3617 1.1422 <50k
Private Some-college Married-civ-spouse Prof-specialty Husband White False -0.8493 -0.3286 -0.0312 >=50k
Private 11th Never-married Prof-specialty Own-child White False -1.5090 0.8521 -1.2046 <50k
Private 7th-8th Married-civ-spouse Tech-support Husband White False -0.2629 0.0550 -2.3781 <50k
Private HS-grad Never-married Transport-moving Not-in-family White False -0.8493 -0.3286 -0.4224 <50k
Private Bachelors Never-married Adm-clerical Not-in-family White False -0.7760 1.3159 1.1422 <50k

In [ ]:
learn = tabular_learner(data, layers=[200,100], metrics=accuracy)
learn.fit(1, 1e-2)


Total time: 00:02

epoch train_loss valid_loss accuracy time
0 0.357122 0.381649 0.790000 00:02

Inference


In [ ]:
row = df.iloc[0]

In [ ]:
learn.predict(row)


Out[ ]:
(Category >=50k, tensor(1), tensor([0.3581, 0.6419]))

In [ ]: