Firebase Import Census Data

This is a companion notebook for the new Data Science Solutions book. The code is explained in the book.



In [22]:

    
import pandas as pd
import numpy as np

column_names = [
    'age', 'workclass', 'fnlwgt', 
    'education', 'education-num', 'marital-status', 
    'occupation', 'relationship', 'race', 'sex', 
    'capital-gain', 'capital-loss', 'hours-per-week', 
    'native-country', 'salary']

train_df = pd.read_csv(
    'data/aws/census/adult.data', 
    header=None, names=column_names, 
    sep=', ', engine='python')

test_df = pd.read_csv(
    'data/aws/census/adult.test', 
    header=None, names=column_names, 
    sep=', ', engine='python', skiprows=1)

train_df.shape, test_df.shape









    Out[22]:





((32561, 15), (16281, 15))



In [23]:

    
train_df.head()









    Out[23]:






  
    
      
      age
      workclass
      fnlwgt
      education
      education-num
      marital-status
      occupation
      relationship
      race
      sex
      capital-gain
      capital-loss
      hours-per-week
      native-country
      salary
    
  
  
    
      0
      39
      State-gov
      77516
      Bachelors
      13
      Never-married
      Adm-clerical
      Not-in-family
      White
      Male
      2174
      0
      40
      United-States
      <=50K
    
    
      1
      50
      Self-emp-not-inc
      83311
      Bachelors
      13
      Married-civ-spouse
      Exec-managerial
      Husband
      White
      Male
      0
      0
      13
      United-States
      <=50K
    
    
      2
      38
      Private
      215646
      HS-grad
      9
      Divorced
      Handlers-cleaners
      Not-in-family
      White
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      3
      53
      Private
      234721
      11th
      7
      Married-civ-spouse
      Handlers-cleaners
      Husband
      Black
      Male
      0
      0
      40
      United-States
      <=50K
    
    
      4
      28
      Private
      338409
      Bachelors
      13
      Married-civ-spouse
      Prof-specialty
      Wife
      Black
      Female
      0
      0
      40
      Cuba
      <=50K



In [24]:

    
train_df.to_json(
    orient='index', 
    path_or_buf='data/firebase/census/census.json')
test_df.to_json(
    orient='index', 
    path_or_buf='data/firebase/census/census_test.json')

	age	workclass	fnlwgt	education	education-num	marital-status	occupation	relationship	race	sex	capital-gain	hours-per-week	native-country	salary
0	39	State-gov	77516	Bachelors	13	Never-married	Adm-clerical	Not-in-family	White	Male	2174	40	United-States	<=50K
1	50	Self-emp-not-inc	83311	Bachelors	13	Married-civ-spouse	Exec-managerial	Husband	White	Male	0	13	United-States	<=50K
2	38	Private	215646	HS-grad	9	Divorced	Handlers-cleaners	Not-in-family	White	Male	0	40	United-States	<=50K
3	53	Private	234721	11th	7	Married-civ-spouse	Handlers-cleaners	Husband	Black	Male	0	40	United-States	<=50K
4	28	Private	338409	Bachelors	13	Married-civ-spouse	Prof-specialty	Wife	Black	Female	0	40	Cuba	<=50K