In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()
In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg
Out[2]:
In [3]:
df = pkg.reference('homeless_survey_source').dataframe()
df.head()
Out[3]:
In [4]:
df['sex'] = df.Gender.replace({
'Male': 'M',
'Female': 'F',
'Unknown': 'U',
'Transgender': 'U'
})
df['Race_Full'] = df.Race_Full.str.strip()
s = df.Race_Full.astype(str).apply
df['black'] = s( lambda v: 1 if 'black' in v.lower() or 'african' in v.lower() else 0)
df['white'] = s( lambda v: 1 if 'european' in v.lower() or 'white' in v.lower() or 'causian' in v.lower() else 0)
df['asian'] = s( lambda v: 1 if 'asian' in v.lower() else 0)
df['aian'] = s( lambda v: 1 if 'indian' in v.lower() or 'alaska' in v.lower() else 0)
df['nhopi'] = s( lambda v: 1 if 'hawaii' in v.lower() or 'pacific' in v.lower() else 0)
df['hisp'] = s( lambda v: 1 if 'hisp' in v.lower() or 'latin' in v.lower() else 0)
df['other'] = (df.black + df.white + df.asian + df.aian + df.hisp + df.nhopi )
def raceeth(r):
if r.hisp or r.Ethnicity == 'Latino':
return 'hisp'
elif r.other > 1:
return 'other'
elif r.black:
return 'black'
elif r.aian:
return 'aian'
elif r.nhopi:
return 'nhopi'
elif r.asian:
return 'asian'
elif r.white:
return 'nhwhite'
else:
return 'other'
df['raceeth'] = df.apply(raceeth, axis=1)
df.drop(columns='black white asian aian nhopi hisp other'.split(), inplace = True)
from geoid.acs import Tract
df['geoid'] = df.Census_Tract.apply(lambda v: str(Tract(6,37,int(v))) if v and not np.isnan(v) else None )
df['survey_type'] = df.Survey_Year.apply( lambda v: v.strip().split()[0].lower() )
df['year'] = df.Survey_Year.apply( lambda v: v.strip().split()[1].lower() )
cols = ['geoid', 'Survey_Year', 'survey_type','year', 'Birth_Year', 'Age', 'Gender', 'sex',
'Ethnicity', 'Race_Full', 'Race_Recode', 'raceeth', 'Veteran', 'Chronic_Time', 'Chronic_Condition',
'Chronic', 'Adult_With_Child', 'Times_Homeless_3yrs', 'Times_Homeless_Past_Year', 'Current_Stint_Duration',
'SPA', 'Census_Tract', 'Physical_Sexual_Abuse', 'Physical_Disability', 'Mental_Illness', 'Alcohol_Abuse',
'Drug_Abuse', 'Drug_Alcohol_History', 'HIV_Positive', 'Part_Time', 'Full_Time', 'Unemployed_Looking',
'Unemployed_Not_Looking' ]
df = df[cols]
df.head()
Out[4]: