Idea #1 - comparison of segregation in Boston vs Philly (now and prior)

Ways of measuring:

*Compare percent of different geographies (from blocks to ZIP code) that are predominately one race or ethnicity

*The same but white vs minority in general

*The same as either but also considering population of a tract, and distance to close tract with a predominance that is not your own. Classify the tracts



In [3]:

    
import charts_function_list
import os
base,data,outputs = charts_function_list.folder_setup()
from census import Census
from us import states
key = key
c = Census(key)



In [ ]:

    
class state_things(object):
    def __init__(self,state_name):
        
        temp_df = pd.DataFrame(c.acs5.state('NAME', Census.ALL))
        self.state_name = state_name
        self.state_code = temp_df[temp_df['NAME']==self.state_name]['state'].values[0]
        self.county_codes_full = pd.DataFrame(c.acs5.get('NAME', geo={'for': 'county:*',
                       'in': 'state:'+self.state_code}))
        
        self.county_codes = {k:v for k,v in zip(self.county_codes_full['county'],self.county_codes_full['NAME'])}
        
        
    def search_for_county(self,county_name):
        return self.county_codes_full[self.county_codes_full['NAME'].str.contains(county_name)]
    
    
        
    def get_by_tract(self,county_code,additional_fields=None):
        if additional_fields == None:
            tract_df = pd.DataFrame(c.acs5.state_county_tract('NAME', self.state_code,county_code, Census.ALL))
            
        else:
            tract_df = pd.DataFrame(c.acs5.state_county_tract(tuple(['NAME'])+tuple(additional_fields), self.state_code,county_code, Census.ALL))
      
        tract_df['county_map'] = tract_df['county'].map(self.county_codes)
        return tract_df
        
    def get_places(self,additional_fields=None):
        if additional_fields == None:
            place_df = pd.DataFrame(c.acs5.state_place('NAME',self.state_code, place=Census.ALL))

        else:
            place_df = pd.DataFrame(c.acs5.state_place((tuple(['NAME'])+tuple(additional_fields)),
                                                    self.state_code, place= Census.ALL))
        return place_df

#find fields in a particular table and return as a dictionary
def fields_in_table(table_number):
    table_list =  list(filter(lambda x: table_number in x, c.acs5.fields().keys()))
    labels = [c.acs5.fields()[x]['label'] for x in table_list]
    return {k:v for k,v in zip(table_list,labels)}



In [705]:

    
state_codes = c.acs5.state('NAME', Census.ALL)
state_code_frame = pd.DataFrame(state_codes)
d = {name:state_things(name) for name in state_code_frame['NAME']}



In [709]:

    
us_places = []
us_tracts = []

#create dictionary to get fields in the Race by Hispanic Origin table
hisp_race_dict = fields_in_table(table_number='B03002')

#take just the keys to generate the tables
fields_we_want = list(hisp_race_dict.keys())

for states in list(d.keys()):
    #pull all the places for each state
    us_places.append(d[states].get_places(additional_fields=fields_we_want))
    
    for counties in list(d[states].county_codes.keys()):
        us_tracts.append(d[states].get_by_tract(county_code=counties,additional_fields=fields_we_want))

place_race_frame = pd.concat(us_places).reset_index(drop=True)
tract_race_frame = pd.concat(us_tracts).reset_index(drop=True)



In [963]:

    
def reformat_race(df,geo):
    race_mapping = {'B03002_001E':'TOTAL',
                       'B03002_003E':'WHITE', #White = White NH
                       'B03002_004E':'BLACK', #Black = Black NH
                       'B03002_005E':'AK_NH',
                       'B03002_006E':'ASIAN_NH',
                       'B03002_007E':'PI_NH',
                       'B03002_008E':'OTHER_NH',
                       'B03002_009E':'MIX_NH',
                       'B03002_012E':'HISP_ALL',
                       'B03002_013E':'WHITE_H',
                       'B03002_014E':'BLACK_H',
                       'B03002_015E':'AK_H',
                       'B03002_016E':'ASIAN_H',
                       'B03002_017E':'PI_H',
                       'B03002_018E':'OTHER_H',
                       'B03002_019E':'MIX_H'}
    df = df.rename(columns=race_mapping)
    if geo == 'place':
        df = df[list(race_mapping.values())+['NAME', 'place', 'state']]
    elif geo == 'tract':
         df = df[list(race_mapping.values())+['NAME', 'tract', 'state','county','county_map']]
    
    df['AK'] = df['AK_NH']+df['AK_H']
    df['API']= df['ASIAN_H']+df['ASIAN_NH']+df['PI_H']+df['PI_NH']
    df['OTHER_MIX']=df['OTHER_NH']+df['MIX_NH']
    df['HISP'] = df['HISP_ALL']-df['AK_H']-df['PI_H']-df['ASIAN_H']
    def percent_group(column):
        return df[column]/df['TOTAL']
    
    percent_frame = pd.DataFrame({'PER_WHITE':percent_group('WHITE'),
                                 'PER_BLACK':percent_group('BLACK'),
                                 'PER_API':percent_group('API'),
                                 'PER_AKNA':percent_group('AK'),
                                  'PER_HISP':percent_group('HISP'),
                                  'PER_OTHER':percent_group('OTHER_MIX')
                                 
                                 })
    
    df = pd.merge(df,percent_frame,left_index=True,right_index=True)
    df['MAX_ETH_PER']= percent_frame.max(axis=1)
    df['MAX_ETH']= percent_frame.idxmax(axis=1)
    
    def threshold_percent(threshold):
         return np.where(df['MAX_ETH_PER']>=threshold,1,0)
    
    df['Above 70'],df['Above 80'],df['Above 90']=threshold_percent(.7),threshold_percent(.8),threshold_percent(.9)
    
    return df



In [964]:

    
place_race_frame_full = reformat_race(place_race_frame,geo='place').sort_values(by='MAX_ETH_PER',ascending=False)
tract_race_frame_full = reformat_race(tract_race_frame,geo='tract').sort_values(by='MAX_ETH_PER',ascending=False)
tract_race_frame_full['GEO_ID']=tract_race_frame_full[['state','county','tract']].astype('str').apply(lambda x: x.sum(),axis=1)



In [1008]:

    
os.chdir(data)
place_race_frame_full.to_csv('2016_ACS_race_place_full.csv')
tract_race_frame_full.to_csv('2016_ACS_race_tract_full.csv')



In [1012]:

    
tract_race_frame_full.describe()









    Out[1012]:







  
    
      
      TOTAL
      WHITE
      BLACK
      AK_NH
      ASIAN_NH
      PI_NH
      OTHER_NH
      MIX_NH
      HISP_ALL
      WHITE_H
      BLACK_H
      AK_H
      ASIAN_H
      PI_H
      OTHER_H
      MIX_H
      AK
      API
      OTHER_MIX
      HISP
      PER_AKNA
      PER_API
      PER_BLACK
      PER_HISP
      PER_OTHER
      PER_WHITE
      MAX_ETH_PER
      Above 70
      Above 80
      Above 90
    
  
  
    
      count
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      74001.000000
      73306.000000
      73306.000000
      73306.000000
      73306.000000
      73306.000000
      73306.000000
      73306.000000
      74001.000000
      74001.000000
      74001.000000
    
    
      mean
      4352.475602
      2667.386549
      528.386441
      28.167403
      221.992784
      6.878421
      9.158255
      97.391157
      793.114593
      523.180646
      19.740004
      7.097404
      2.655518
      0.691788
      201.767746
      37.981487
      35.264807
      232.218511
      106.549411
      782.669883
      0.008880
      0.048667
      0.132879
      0.168072
      0.024345
      0.617157
      0.746134
      0.614816
      0.456359
      0.256497
    
    
      std
      2175.316924
      1889.675151
      902.984218
      178.352648
      486.204507
      50.717061
      33.412553
      123.721987
      1264.186601
      930.526358
      75.115015
      25.951491
      12.788123
      8.874696
      434.157866
      90.022092
      183.869191
      503.464694
      131.112594
      1253.893232
      0.046403
      0.091730
      0.217001
      0.228248
      0.026013
      0.306745
      0.177724
      0.486642
      0.498095
      0.436702
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.231494
      0.000000
      0.000000
      0.000000
    
    
      25%
      2893.000000
      1260.000000
      27.000000
      0.000000
      6.000000
      0.000000
      0.000000
      23.000000
      86.000000
      49.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      8.000000
      26.000000
      84.000000
      0.000000
      0.002585
      0.007570
      0.024418
      0.007686
      0.386574
      0.606651
      0.000000
      0.000000
      0.000000
    
    
      50%
      4080.000000
      2482.000000
      149.000000
      0.000000
      54.000000
      0.000000
      0.000000
      63.000000
      283.000000
      175.000000
      0.000000
      0.000000
      0.000000
      0.000000
      37.000000
      7.000000
      2.000000
      59.000000
      70.000000
      276.000000
      0.000778
      0.015626
      0.037615
      0.070142
      0.018087
      0.709287
      0.777951
      1.000000
      0.000000
      0.000000
    
    
      75%
      5465.000000
      3777.000000
      606.000000
      15.000000
      216.000000
      0.000000
      1.000000
      131.000000
      883.000000
      552.000000
      11.000000
      0.000000
      0.000000
      0.000000
      188.000000
      42.000000
      24.000000
      228.000000
      143.000000
      869.000000
      0.006017
      0.051446
      0.144482
      0.204618
      0.033377
      0.881449
      0.904068
      1.000000
      1.000000
      1.000000
    
    
      max
      61133.000000
      37457.000000
      18513.000000
      9607.000000
      12452.000000
      3520.000000
      1149.000000
      3516.000000
      27381.000000
      23937.000000
      2998.000000
      674.000000
      819.000000
      921.000000
      8676.000000
      3281.000000
      9655.000000
      12452.000000
      3516.000000
      27360.000000
      1.000000
      0.911438
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000
      1.000000

Selecting what cities to feature



In [5]:

    
os.chdir(data)



In [6]:

    
place_race_frame_full = pd.read_csv('2016_ACS_race_place_full.csv')
ak_max = place_race_frame_full[(place_race_frame_full['TOTAL']>500000)]['PER_AKNA'].max()
per_other_max = place_race_frame_full[(place_race_frame_full['TOTAL']>500000)]['PER_OTHER'].max()


def max_print(group, minimum):
    location = place_race_frame_full[(place_race_frame_full['MAX_ETH']==group)&(place_race_frame_full['TOTAL']>minimum)].reset_index().loc[0]['NAME']
    value = place_race_frame_full[(place_race_frame_full['MAX_ETH']==group)&(place_race_frame_full['TOTAL']>minimum)].reset_index().loc[0]['MAX_ETH_PER']
    print('MOST '+group+": "+location+" "+format(value*100,'.2f')+'%')
for item in ['PER_WHITE','PER_BLACK','PER_HISP','PER_API']:
    max_print(item,500000)
    
print("---")    
print("No place over 500,000K with majorirty AK/Native American or Other/Mixed Race. Finding the max perctages instead")
print("---")
print('PER_AK: ',place_race_frame_full[place_race_frame_full['PER_AKNA']==ak_max].reset_index().loc[0]['NAME'])
print('PER_OTHER: ',place_race_frame_full[place_race_frame_full['PER_OTHER']==per_other_max].reset_index().loc[0]['NAME'])









    



MOST PER_WHITE: Portland city, Oregon 71.55%
MOST PER_BLACK: Detroit city, Michigan 79.40%
MOST PER_HISP: El Paso city, Texas 79.76%
MOST PER_API: San Jose city, California 34.54%
---
No place over 500,000K with majorirty AK/Native American or Other/Mixed Race. Finding the max perctages instead
---
PER_AK:  Albuquerque city, New Mexico
PER_OTHER:  Seattle city, Washington



In [11]:

    
place_race_frame_full[place_race_frame_full['NAME']=='El Paso city, Texas']









    Out[11]:







  
    
      
      Unnamed: 0
      TOTAL
      WHITE
      BLACK
      AK_NH
      ASIAN_NH
      PI_NH
      OTHER_NH
      MIX_NH
      HISP_ALL
      WHITE_H
      BLACK_H
      AK_H
      ASIAN_H
      PI_H
      OTHER_H
      MIX_H
      NAME
      place
      state
      AK
      API
      OTHER_MIX
      HISP
      PER_AKNA
      PER_API
      PER_BLACK
      PER_HISP
      PER_OTHER
      PER_WHITE
      MAX_ETH_PER
      MAX_ETH
      Above 70
      Above 80
      Above 90
    
  
  
    
      20125
      24973
      678058.0
      94654.0
      22609.0
      1594.0
      7984.0
      991.0
      629.0
      5576.0
      544021.0
      472521.0
      3313.0
      2412.0
      590.0
      182.0
      55500.0
      9503.0
      El Paso city, Texas
      24000
      48
      4006.0
      9747.0
      6205.0
      540837.0
      0.005908
      0.014375
      0.033344
      0.797626
      0.009151
      0.139596
      0.797626
      PER_HISP
      1
      0
      0



In [17]:

    
reload_tract = pd.read_csv('2016_ACS_race_tract_full.csv',index_col=0)



In [20]:

    
reload_tract[reload_tract['GEO_ID']==41051007400]









    Out[20]:







  
    
      
      TOTAL
      WHITE
      BLACK
      AK_NH
      ASIAN_NH
      PI_NH
      OTHER_NH
      MIX_NH
      HISP_ALL
      WHITE_H
      BLACK_H
      AK_H
      ASIAN_H
      PI_H
      OTHER_H
      MIX_H
      NAME
      tract
      state
      county
      county_map
      AK
      API
      OTHER_MIX
      HISP
      PER_AKNA
      PER_API
      PER_BLACK
      PER_HISP
      PER_OTHER
      PER_WHITE
      MAX_ETH_PER
      MAX_ETH
      Above 70
      Above 80
      Above 90
      GEO_ID
    
  
  
    
      55105
      3995.0
      1591.0
      846.0
      45.0
      122.0
      19.0
      0.0
      283.0
      1089.0
      482.0
      0.0
      1.0
      0.0
      0.0
      77.0
      529.0
      Census Tract 74, Multnomah County, Oregon
      7400
      41
      51
      Multnomah County, Oregon
      46.0
      141.0
      283.0
      1088.0
      0.011514
      0.035294
      0.211765
      0.27234
      0.070839
      0.398248
      0.398248
      PER_WHITE
      0
      0
      0
      41051007400



In [510]:

    
#percent of population that lives in a tract that is >=x% of one ethnic group
def homogenous_pop(df,threshold_column):
    percent = (df[df[threshold_column]==1]['TOTAL'].sum())/df['TOTAL'].sum()
    return format(percent*100,'.1f')

Some extra stuff



In [ ]:

    
#For those who are non-white and Hispanic, which to include within the "hispanic" group? Let's assess

for counties in [reformat_race(la_county_data), reformat_race(philly_county_data),reformat_race(suffolk_county_data)]:
    for item in ['White_H','Black_H','AK_H','Asian_H','PI_H','OTHER_H','MIX_H']:
        print(item+' '+str(format(((counties[item]/counties['HISP_All']).mean())*100,'.1f'))+'%')
    print('------')



In [ ]:

    
## extra tutorial stuff

c.acs5.get(('NAME', table_name),
          {'for': 'state:{}'.format(states.MD.fips)},year=2011) #set different years



table_name ='B25034_010E'

#The get method is the core data access method on both the ACS and SF1 data sets. 
#The first parameter is either a single string column or a tuple of columns. 
#The second parameter is a geoemtry dict with a for key and on option in key. 
c.acs5.get(('NAME', table_name),
          {'for': 'state:{}'.format(states.MD.fips)},year=2011) #set different years



In [ ]:

    
Some info on Census and US libraries from 

https://github.com/datamade/census

## Description

#### ACS5 
state(fields, state_fips)


state_county(fields, state_fips, county_fips)


state_county_blockgroup(fields, state_fips, county_fips, blockgroup)


state_county_subdivision(fields, state_fips, county_fips, subdiv_fips)


state_county_tract(fields, state_fips, county_fips, tract)


state_place(fields, state_fips, place)


state_district(fields, state_fips, district)


us(fields)

zipcode(fields, zip5)

#### ACS1

ACS1 Geographies

state(fields, state_fips)

state_district(fields, state_fips, district)
us(fields)


## Datasets
*acs5: ACS 5 Year Estimates (2016, 2015, 2014, 2013, 2012, 2011, 2010)

*acs1dp: ACS 1 Year Estimates, Data Profiles (2016, 2015, 2014, 2013, 2012)

*sf1: Census Summary File 1 (2010, 2000, 1990)

*sf3: Census Summary File 3 (2000, 1990)

	TOTAL	WHITE	BLACK	AK_NH	ASIAN_NH	PI_NH	OTHER_NH	MIX_NH	HISP_ALL	WHITE_H	BLACK_H	AK_H	ASIAN_H	PI_H	OTHER_H	MIX_H	AK	API	OTHER_MIX	HISP	PER_AKNA	PER_API	PER_BLACK	PER_HISP	PER_OTHER	PER_WHITE	MAX_ETH_PER	Above 70	Above 80	Above 90
count	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	74001.000000	73306.000000	73306.000000	73306.000000	73306.000000	73306.000000	73306.000000	73306.000000	74001.000000	74001.000000	74001.000000
mean	4352.475602	2667.386549	528.386441	28.167403	221.992784	6.878421	9.158255	97.391157	793.114593	523.180646	19.740004	7.097404	2.655518	0.691788	201.767746	37.981487	35.264807	232.218511	106.549411	782.669883	0.008880	0.048667	0.132879	0.168072	0.024345	0.617157	0.746134	0.614816	0.456359	0.256497
std	2175.316924	1889.675151	902.984218	178.352648	486.204507	50.717061	33.412553	123.721987	1264.186601	930.526358	75.115015	25.951491	12.788123	8.874696	434.157866	90.022092	183.869191	503.464694	131.112594	1253.893232	0.046403	0.091730	0.217001	0.228248	0.026013	0.306745	0.177724	0.486642	0.498095	0.436702
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.231494	0.000000	0.000000	0.000000
25%	2893.000000	1260.000000	27.000000	0.000000	6.000000	0.000000	0.000000	23.000000	86.000000	49.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	8.000000	26.000000	84.000000	0.000000	0.002585	0.007570	0.024418	0.007686	0.386574	0.606651	0.000000	0.000000	0.000000
50%	4080.000000	2482.000000	149.000000	0.000000	54.000000	0.000000	0.000000	63.000000	283.000000	175.000000	0.000000	0.000000	0.000000	0.000000	37.000000	7.000000	2.000000	59.000000	70.000000	276.000000	0.000778	0.015626	0.037615	0.070142	0.018087	0.709287	0.777951	1.000000	0.000000	0.000000
75%	5465.000000	3777.000000	606.000000	15.000000	216.000000	0.000000	1.000000	131.000000	883.000000	552.000000	11.000000	0.000000	0.000000	0.000000	188.000000	42.000000	24.000000	228.000000	143.000000	869.000000	0.006017	0.051446	0.144482	0.204618	0.033377	0.881449	0.904068	1.000000	1.000000	1.000000
max	61133.000000	37457.000000	18513.000000	9607.000000	12452.000000	3520.000000	1149.000000	3516.000000	27381.000000	23937.000000	2998.000000	674.000000	819.000000	921.000000	8676.000000	3281.000000	9655.000000	12452.000000	3516.000000	27360.000000	1.000000	0.911438	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000	1.000000

Idea #1 - comparison of segregation in Boston vs Philly (now and prior)

Get necessary fields related to Race by Hispanic/Latino Origin table, B03002

Selecting what cities to feature

Some extra stuff