In [1]:
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()

In [2]:
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg


Out[2]:

LA County Homeless Survey

economicrt.org-homeless_survey-2011e2017-4 Last Update: 2019-01-08T04:17:54

Data from Demographic Surveys and HMIS Records 2011 to 2017

This dataset consists of records of in-person surveys of homeless individuals in Los Angeles county, from 2011 to 2017, inclusive. The original data is collected from annual surveys that are part of the annual Point In Time Count of homeless individuqals conducted by the Los Angeles Homeless Services Authority. The data is published by The Economic Roundtable.

The data in this package is significantly altered from the source file to make analysis easier. Changes include:

  • Added 'raceeth' field, which recodes the very many race/ethnicity values to a much simpler set that hamonizes with US Census categories.
  • Added 'sex' field, which recodes the 'gender' field.
  • Broke out the type+year combination in 'survey_year' into two values.

The Race/Ethnicity categories are:

  • hisp: Hispanic or Latino, of any race
  • nhwhite: Non hispanic white
  • black: Non hispanic Black or African American
  • aian: Non hispanic American Indian / Alaskan Native
  • asian: Non hispanic Asian
  • nhopi: Non hispanic Native Hawaiian / Other Pacific Islander
  • other: Other race or multiple races

Versions

  1. Initial Version
  2. Added schema descriptions
  3. Broke out the survey_year into two fields, survey_type and year
  4. Improved Metadata

Documentation Links

Contacts

Resources

  • homeless_survey. Homeless survey data, with recoded race, ethnicity, and sex

References


In [3]:
df = pkg.reference('homeless_survey_source').dataframe()
df.head()


Out[3]:
Unnamed: 0 Survey_Year Birth_Year Age Gender Ethnicity Race_Full Race_Recode Veteran Chronic_Time ... Physical_Disability Mental_Illness Alcohol_Abuse Drug_Abuse Drug_Alcohol_History HIV_Positive Part_Time Full_Time Unemployed_Looking Unemployed_Not_Looking
0 1 Unsheltered 2011 1993.0 18.0 Female European American White European American 0 0 ... 0 0 0.0 0.0 1 0 0.0 0.0 1.0 0.0
1 2 Unsheltered 2011 1964.0 46.0 Female African American Black-African-American African American 0 0 ... 0 1 0.0 0.0 0 0 0.0 0.0 1.0 0.0
2 3 Unsheltered 2011 1956.0 55.0 Male European American White European American 0 1 ... 1 0 0.0 0.0 0 0 0.0 0.0 1.0 0.0
3 4 Unsheltered 2011 1960.0 50.0 Male European American White European American 1 1 ... 0 0 1.0 0.0 1 0 0.0 0.0 1.0 0.0
4 5 Unsheltered 2011 1979.0 31.0 Male Latino BLANK Unknown 0 0 ... 0 0 0.0 0.0 0 0 0.0 0.0 1.0 0.0

5 rows × 29 columns


In [4]:
df['sex'] = df.Gender.replace({
    'Male': 'M',
    'Female': 'F',
    'Unknown': 'U',
    'Transgender': 'U'
})

df['Race_Full'] = df.Race_Full.str.strip()

s = df.Race_Full.astype(str).apply

df['black'] = s( lambda v: 1 if 'black' in v.lower() or 'african' in v.lower() else 0)
df['white'] = s( lambda v: 1 if 'european' in v.lower() or 'white' in v.lower() or 'causian' in v.lower() else 0)
df['asian'] = s( lambda v: 1 if 'asian' in v.lower() else 0)
df['aian'] =  s( lambda v: 1 if 'indian' in v.lower()  or 'alaska' in v.lower() else 0)
df['nhopi'] =  s( lambda v: 1 if 'hawaii' in v.lower()  or 'pacific' in v.lower() else 0)
df['hisp'] =  s( lambda v: 1 if 'hisp' in v.lower()  or 'latin' in v.lower() else 0)
df['other'] = (df.black + df.white + df.asian + df.aian + df.hisp + df.nhopi ) 

def raceeth(r):
    
    if r.hisp or r.Ethnicity == 'Latino':
        return 'hisp'
    elif r.other > 1:
        return 'other'
    elif r.black:
        return 'black'
    elif r.aian:
        return 'aian'
    elif r.nhopi:
        return 'nhopi'
    elif r.asian:
        return 'asian'
    elif r.white:
        return 'nhwhite'
    else:
        return 'other'

df['raceeth'] = df.apply(raceeth, axis=1)
df.drop(columns='black white asian aian nhopi hisp other'.split(), inplace = True)

from geoid.acs import Tract
df['geoid'] = df.Census_Tract.apply(lambda v: str(Tract(6,37,int(v))) if v and not np.isnan(v) else None )

df['survey_type'] = df.Survey_Year.apply( lambda v: v.strip().split()[0].lower() ) 
df['year'] = df.Survey_Year.apply( lambda v: v.strip().split()[1].lower() ) 

cols = ['geoid', 'Survey_Year', 'survey_type','year', 'Birth_Year', 'Age', 'Gender', 'sex', 
 'Ethnicity', 'Race_Full', 'Race_Recode', 'raceeth', 'Veteran', 'Chronic_Time', 'Chronic_Condition', 
 'Chronic', 'Adult_With_Child', 'Times_Homeless_3yrs', 'Times_Homeless_Past_Year', 'Current_Stint_Duration',
 'SPA', 'Census_Tract', 'Physical_Sexual_Abuse', 'Physical_Disability', 'Mental_Illness', 'Alcohol_Abuse',
 'Drug_Abuse', 'Drug_Alcohol_History', 'HIV_Positive', 'Part_Time', 'Full_Time', 'Unemployed_Looking', 
 'Unemployed_Not_Looking' ]

df = df[cols]

df.head()


Out[4]:
geoid Survey_Year survey_type year Birth_Year Age Gender sex Ethnicity Race_Full ... Physical_Disability Mental_Illness Alcohol_Abuse Drug_Abuse Drug_Alcohol_History HIV_Positive Part_Time Full_Time Unemployed_Looking Unemployed_Not_Looking
0 None Unsheltered 2011 unsheltered 2011 1993.0 18.0 Female F European American White ... 0 0 0.0 0.0 1 0 0.0 0.0 1.0 0.0
1 None Unsheltered 2011 unsheltered 2011 1964.0 46.0 Female F African American Black-African-American ... 0 1 0.0 0.0 0 0 0.0 0.0 1.0 0.0
2 None Unsheltered 2011 unsheltered 2011 1956.0 55.0 Male M European American White ... 1 0 0.0 0.0 0 0 0.0 0.0 1.0 0.0
3 None Unsheltered 2011 unsheltered 2011 1960.0 50.0 Male M European American White ... 0 0 1.0 0.0 1 0 0.0 0.0 1.0 0.0
4 None Unsheltered 2011 unsheltered 2011 1979.0 31.0 Male M Latino BLANK ... 0 0 0.0 0.0 0 0 0.0 0.0 1.0 0.0

5 rows × 33 columns