notebook.community

Edit and run



In [1]:

    
import seaborn as sns
import metapack as mp
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display 

%matplotlib inline
sns.set_context('notebook')
mp.jupyter.init()



In [2]:

    
#pkg = mp.jupyter.open_package()
pkg = mp.jupyter.open_source_package()
pkg









    Out[2]:




Small Area Estimates for Diabetes in San Diego
sandiegodata.org-diabetes_sae-1 Last Update: 2018-12-05T23:54:30
__
Contacts

Wrangler Eric Busboom, Civic Knowledge

References

rasp_diabetes. Diabetes probabilities, by age, race, sex and poverty status
rasp_tracts. Race, age, sex and poverty status of population by tract in San Diego County



In [3]:

    
dia = pkg.reference('rasp_diabetes').dataframe()
tracts = pkg.reference('rasp_tracts').dataframe()

tracts['raceeth'] = tracts.raceeth.replace({'aian':'other', 'many': 'other','nhopi':'other'})



In [4]:

    
dia_adult_pop = dia.group_pop.sum() # Adult pop of California, at least according to this dataset

# The Census ACS has a different idea about what the adult pop of SD County is. 
tracts_adult_pop = tracts[(tracts.age_min >= 18) & (tracts.overlapping == 0)].value_est.sum()

dia_adult_pop, tracts_adult_pop









    Out[4]:





(29309514, 2643923)



In [5]:

    
dia.raceeth.value_counts()









    Out[5]:





black      28
other      28
asian      28
hisp       28
nhwhite    28
Name: raceeth, dtype: int64



In [6]:

    
tracts.raceeth.value_counts()









    Out[6]:





other      148208
black       37052
hisp        37052
all         37052
asian       37052
white       37052
nhwhite     37052
Name: raceeth, dtype: int64



In [7]:

    
tracts.head()









    Out[7]:







  
    
      
      geoid
      col_name
      value_est
      value_margin
      sex
      raceeth
      age_range
      pov
      age_min
      age_max
      overlapping
    
  
  
    
      0
      14000US06073000100
      B17001A_001
      2455
      214
      both
      white
      00-120
      all
      0
      120
      1
    
    
      1
      14000US06073000100
      B17001A_002
      102
      70
      both
      white
      00-120
      below
      0
      120
      1
    
    
      2
      14000US06073000100
      B17001A_003
      12
      16
      male
      white
      00-120
      below
      0
      120
      1
    
    
      3
      14000US06073000100
      B17001A_004
      0
      12
      male
      white
      00-004
      below
      0
      4
      1
    
    
      4
      14000US06073000100
      B17001A_005
      0
      12
      male
      white
      05-005
      below
      5
      5
      1



In [8]:

    
dia_merge = pd.DataFrame(
{
    'raceeth': dia.raceeth,
    'age_range': dia.age_group.replace({'75-085':'75-120'}),
    'sex': dia.sex.apply(lambda v: 'male' if v == 1 else 'female' ),
    'pov':  dia.pov.apply(lambda v: 'below' if v == 1 else 'above' ),
    'rate': dia.imputed_rate
})

dia_merge.head()









    Out[8]:







  
    
      
      raceeth
      age_range
      sex
      pov
      rate
    
  
  
    
      0
      asian
      18-024
      female
      above
      0.007575
    
    
      1
      asian
      18-024
      female
      below
      0.007575
    
    
      2
      asian
      18-024
      male
      above
      0.007575
    
    
      3
      asian
      18-024
      male
      below
      0.007575
    
    
      4
      asian
      25-034
      female
      above
      0.000067



In [9]:

    
#
tracts_merge = tracts[tracts.raceeth != 'white'].copy()
tracts_merge.head()









    Out[9]:







  
    
      
      geoid
      col_name
      value_est
      value_margin
      sex
      raceeth
      age_range
      pov
      age_min
      age_max
      overlapping
    
  
  
    
      37052
      14000US06073000100
      B17001B_001
      0
      12
      both
      black
      00-120
      all
      0
      120
      1
    
    
      37053
      14000US06073000100
      B17001B_002
      0
      12
      both
      black
      00-120
      below
      0
      120
      1
    
    
      37054
      14000US06073000100
      B17001B_003
      0
      12
      male
      black
      00-120
      below
      0
      120
      1
    
    
      37055
      14000US06073000100
      B17001B_004
      0
      12
      male
      black
      00-004
      below
      0
      4
      0
    
    
      37056
      14000US06073000100
      B17001B_005
      0
      12
      male
      black
      05-005
      below
      5
      5
      0



In [10]:

    
tracts[tracts.overlapping == 0].value_est.sum()









    Out[10]:





3452837



In [11]:

    
m = tracts_merge.merge(dia_merge, on=['raceeth','age_range','sex','pov'], how='left')

m = m[~m.rate.isnull() & (m.overlapping == 0)]
m.head()









    Out[11]:







  
    
      
      geoid
      col_name
      value_est
      value_margin
      sex
      raceeth
      age_range
      pov
      age_min
      age_max
      overlapping
      rate
    
  
  
    
      9
      14000US06073000100
      B17001B_010
      0
      12
      male
      black
      18-024
      below
      18
      24
      0
      0.004529
    
    
      10
      14000US06073000100
      B17001B_011
      0
      12
      male
      black
      25-034
      below
      25
      34
      0
      0.024107
    
    
      11
      14000US06073000100
      B17001B_012
      0
      12
      male
      black
      35-044
      below
      35
      44
      0
      0.079815
    
    
      12
      14000US06073000100
      B17001B_013
      0
      12
      male
      black
      45-054
      below
      45
      54
      0
      0.124868
    
    
      13
      14000US06073000100
      B17001B_014
      0
      12
      male
      black
      55-064
      below
      55
      64
      0
      0.265294



In [12]:

    
m.value_est.sum()









    Out[12]:





2643923



In [13]:

    
# For 2017 data, this should be about 3337685 ( whole pop) * 78.2%% (% 18 or older ) == 2610069
s = m[m.overlapping== 0].value_est.sum()
s, s-2610069, (s-2610069)/2610069









    Out[13]:





(2643923, 33854, 0.012970538326764541)



In [14]:

    
# The percentages here, 8.6%, doesn't match the AskCHIS value for SD of 9.1%, but if you 
# adjust for the differences in the population between the two datasets, it almost does (9.3%)
t = m[m.overlapping==0].copy() 

d =(t.rate*t.value_est).sum()

d, d/s, (d/s) * (2643923/2430000)









    Out[14]:





(226252.36856408906, 0.08557449236006082, 0.0931079706025058)



In [26]:

    
m['diabetes_est'] = m.value_est * m.rate
t = m.groupby('geoid').sum().copy()
t['tract_diabetes_rate'] = (t.diabetes_est / t.value_est* 100).round(0)



In [28]:

    
t[['diabetes_est','tract_diabetes_rate']].to_csv('tract_diabetes.csv')



In [73]:

    
t1 = m.groupby(['geoid', 'raceeth']).sum()[['value_est','diabetes_est']]

t2 = m.groupby('geoid').sum()[['value_est','diabetes_est']]
t2['raceeth'] = 'total'

t1 = t1.reset_index()
t2 = t2.reset_index()[t1.columns]

t =  pd.concat([t1, t2], ignore_index=True, sort=False).sort_values(['geoid','raceeth'])
t['rate'] = t.diabetes_est / t.value_est
tract_diabetes = t.set_index(['geoid','raceeth'])['rate'].unstack()
tract_diabetes.head()









    Out[73]:







  
    
      raceeth
      asian
      black
      hisp
      nhwhite
      other
      total
    
    
      geoid
      
      
      
      
      
      
    
  
  
    
      14000US06073000100
      0.090034
      NaN
      0.240076
      0.100720
      0.149703
      0.117236
    
    
      14000US06073000201
      0.083256
      0.101111
      0.093961
      0.085595
      0.094269
      0.087723
    
    
      14000US06073000202
      0.084176
      0.073590
      0.096333
      0.054982
      0.084927
      0.065198
    
    
      14000US06073000300
      0.045533
      0.108271
      0.086672
      0.069047
      0.025907
      0.068951
    
    
      14000US06073000400
      0.048468
      0.028781
      0.045808
      0.056943
      0.049220
      0.053447



In [ ]:

	geoid	col_name	value_est	value_margin	sex	raceeth	age_range	pov	age_min	age_max	overlapping
0	14000US06073000100	B17001A_001	2455	214	both	white	00-120	all	0	120	1
1	14000US06073000100	B17001A_002	102	70	both	white	00-120	below	0	120	1
2	14000US06073000100	B17001A_003	12	16	male	white	00-120	below	0	120	1
3	14000US06073000100	B17001A_004	0	12	male	white	00-004	below	0	4	1
4	14000US06073000100	B17001A_005	0	12	male	white	05-005	below	5	5	1

	raceeth	age_range	sex	pov	rate
0	asian	18-024	female	above	0.007575
1	asian	18-024	female	below	0.007575
2	asian	18-024	male	above	0.007575
3	asian	18-024	male	below	0.007575
4	asian	25-034	female	above	0.000067

	geoid	col_name	value_margin	sex	raceeth	age_range	pov	age_min	age_max	overlapping
37052	14000US06073000100	B17001B_001	12	both	black	00-120	all	0	120	1
37053	14000US06073000100	B17001B_002	12	both	black	00-120	below	0	120	1
37054	14000US06073000100	B17001B_003	12	male	black	00-120	below	0	120	1
37055	14000US06073000100	B17001B_004	12	male	black	00-004	below	0	4	0
37056	14000US06073000100	B17001B_005	12	male	black	05-005	below	5	5	0

	geoid	col_name	value_margin	sex	raceeth	age_range	pov	age_min	age_max	rate
9	14000US06073000100	B17001B_010	12	male	black	18-024	below	18	24	0.004529
10	14000US06073000100	B17001B_011	12	male	black	25-034	below	25	34	0.024107
11	14000US06073000100	B17001B_012	12	male	black	35-044	below	35	44	0.079815
12	14000US06073000100	B17001B_013	12	male	black	45-054	below	45	54	0.124868
13	14000US06073000100	B17001B_014	12	male	black	55-064	below	55	64	0.265294

raceeth	asian	black	hisp	nhwhite	other	total
geoid
14000US06073000100	0.090034	NaN	0.240076	0.100720	0.149703	0.117236
14000US06073000201	0.083256	0.101111	0.093961	0.085595	0.094269	0.087723
14000US06073000202	0.084176	0.073590	0.096333	0.054982	0.084927	0.065198
14000US06073000300	0.045533	0.108271	0.086672	0.069047	0.025907	0.068951
14000US06073000400	0.048468	0.028781	0.045808	0.056943	0.049220	0.053447