In [2]:
import pandas as pd
In [3]:
DATA_DIR = '/Users/joe/Dropbox/SFI_CensusData/UnitedStates/'
acs_df = pd.read_csv(DATA_DIR + '2010acs.csv')
acs_df['ID'] = acs_df.apply(lambda x:
str(x['STATE_ID']).zfill(2) + str(x['COUNTY_ID']).zfill(3) + str(x['TRACT_ID']).zfill(5) + str(x['BKGP_ID']).zfill(1), axis=1)
acs_df = acs_df.set_index('ID', drop=False) # Set index but don't drop
In [4]:
acs_df.head()
Out[4]:
In [9]:
edu_vars = {'B15002_001E': 'Edu Total',
'B15002_002E': 'Edu Male Total',
'B15002_003E': 'Edu Male Level 0',
'B15002_004E': 'Edu Male Level 1',
'B15002_005E': 'Edu Male Level 2',
'B15002_006E': 'Edu Male Level 3',
'B15002_007E': 'Edu Male Level 4',
'B15002_008E': 'Edu Male Level 5',
'B15002_009E': 'Edu Male Level 6',
'B15002_010E': 'Edu Male Level 7',
'B15002_011E': 'Edu Male Level 8',
'B15002_012E': 'Edu Male Level 9',
'B15002_013E': 'Edu Male Level 10',
'B15002_014E': 'Edu Male Level 11',
'B15002_015E': 'Edu Male Level 12',
'B15002_016E': 'Edu Male Level 13',
'B15002_017E': 'Edu Male Level 14',
'B15002_018E': 'Edu Male Level 15',
'B15002_020E': 'Edu Female Level 0',
'B15002_021E': 'Edu Female Level 1',
'B15002_022E': 'Edu Female Level 2',
'B15002_023E': 'Edu Female Level 3',
'B15002_024E': 'Edu Female Level 4',
'B15002_025E': 'Edu Female Level 5',
'B15002_026E': 'Edu Female Level 6',
'B15002_027E': 'Edu Female Level 7',
'B15002_028E': 'Edu Female Level 8',
'B15002_029E': 'Edu Female Level 9',
'B15002_030E': 'Edu Female Level 10',
'B15002_031E': 'Edu Female Level 11',
'B15002_032E': 'Edu Female Level 12',
'B15002_033E': 'Edu Female Level 13',
'B15002_034E': 'Edu Female Level 14',
'B15002_035E': 'Edu Female Level 15',
'GEOID': 'GeoId'}
In [10]:
edu_df = pd.read_csv(DATA_DIR + '2010acs_edu.csv', encoding='utf-8', dtype={'ID':'str'})
edu_df['ID'] = edu_df.apply(lambda x:
str(x['state']).zfill(2) + str(x['county']).zfill(3) + str(x['tract']).zfill(5) + str(x['block group']).zfill(1), axis=1)
edu_df = edu_df.rename(columns=edu_vars)
edu_df = edu_df.set_index('ID', drop=True) # Set index but don't drop
edu_df = edu_df.drop(['state', 'county', 'tract', 'block group', 'GeoId'], axis=1)
In [11]:
edu_df.head()
Out[11]:
In [12]:
df = acs_df.join(edu_df)
In [13]:
df.head()
Out[13]:
In [27]:
df = df.drop(df.filter(regex='(M|m)ale').columns, axis=1)
#df = df.drop(['ID.1', 'DKL(y|n)', 'H(y|n)'], axis=1)
df.info()
In [28]:
df.to_csv('/Users/joe/Dropbox/SFI_CensusData/UnitedStates/2010acs_edu.csv')
In [ ]: