Clean census data


In [1]:
import requests
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import csv
import seaborn as sns
font = {'family' : 'Arial',
        'weight' : 'bold',
        'size'   : 25}
matplotlib.rc('font', **font)
from census import Census
from us import states
import csv

In [56]:
# load the zip code file 
import pickle
with open('zipcode_final.txt', 'rb') as f:
    zip_codes = pickle.load(f)

In [69]:
# table list: total demographics by age
tables = ['B01001_001E',
'B01001_002E',
'B01001_007E',
'B01001_008E',
'B01001_009E',
'B01001_010E',
'B01001_011E',
'B01001_012E', 
'B01001_026E',
'B01001_031E',
'B01001_032E',
'B01001_033E',
'B01001_034E',
'B01001_035E',
'B01001_036E',  
'B15002_001E',
'B15002_002E',
'B15002_015E',
'B15002_016E',
'B15002_017E',
'B15002_018E', 
'B15002_019E',
'B15002_032E',
'B15002_033E',
'B15002_034E',
'B15002_035E',  
'B17001_002E',
'C24010_015E',
'C24010_051E']
len(tables)


Out[69]:
29

In [228]:
columns = ['TotalPop',
'M_Total',
'M_18to19',
'M_20',
'M_21',
'M_22to24',
'M_25to29',
'M_30to34',     
'F_Total',
'F_18to19',
'F_20',
'F_21',
'F_22to24',
'F_25to29',
'F_30to34',   
'Total_Edu',
'M_Total_Edu',
'M_BA',
'M_MA',
'M_Prof',
'M_Doc',
'F_Total_Edu',
'F_BA',
'F_MA',
'F_Prof',
'F_Doc',
'Total_poor',
'M_Arts',
'F_Arts']
len(columns)


Out[228]:
29

In [74]:
# estimate time to collect the data
len(zip_codes)*len(tables)/3600*3


Out[74]:
12

In [90]:
# per year
year = 2013
columns = tables + ['zip_code','year']
df_ = pd.DataFrame(columns=columns)
df = pd.DataFrame(columns=tables)
c = Census(api_key,year=year)
for table in tables:
    data = [dict(d = c.acs.zipcode(table,zip_code)[0][table]) for zip_code in zip_codes]
    temp = pd.DataFrame(data)
    df[table]=temp
#     print data
df['year']=year
df_ = pd.concat([df_,df],ignore_index=True)

# If you want to use a for loop...
"""# df for census data
years = range(2011,2014)
columns = tables + ['zip_code','year']
df_ = pd.DataFrame(columns=columns)
for year in years:
    df = pd.DataFrame(columns=tables)
    c = Census(api_key,year=year)
    for table in tables:
        data = [dict(zip_code = c.acs.zipcode(table,zip_code)[0][table]) for zip_code in zip_codes]
        temp = pd.DataFrame(data)
        df[table]=temp
    df['year']=year
    df_ = pd.concat([df_,df],ignore_index=True)"""

In [91]:
df_2013 = df_

In [98]:
df_2013['zip_code']=pd.DataFrame(zip_codes)
df_2012['zip_code']=pd.DataFrame(zip_codes)
df_2011['zip_code']=pd.DataFrame(zip_codes)

In [97]:
df_2013.head()


Out[97]:
B01001_001E B01001_002E B01001_007E B01001_008E B01001_009E B01001_010E B01001_011E B01001_012E B01001_026E B01001_031E ... B15002_019E B15002_032E B15002_033E B15002_034E B15002_035E B17001_002E C24010_015E C24010_051E year zip_code
0 15867 6859 159 82 17 892 1374 653 9008 284 ... 6512 1185 751 557 164 5651 156 183 2013 21201
1 22700 13027 505 403 495 1359 1993 1078 9673 222 ... 6540 1108 660 412 152 6154 82 83 2013 21202
2 16100 7332 340 206 182 326 606 390 8768 374 ... 5283 251 153 30 0 5379 11 0 2013 21205
3 50182 22690 649 331 200 789 1579 1794 27492 911 ... 19089 2069 1341 125 25 7706 185 116 2013 21206
4 50107 22089 586 422 275 703 1291 1441 28018 633 ... 19404 3028 1643 155 105 6675 159 178 2013 21207

5 rows × 31 columns


In [89]:
df_2012.head()


Out[89]:
B01001_001E B01001_002E B01001_007E B01001_008E B01001_009E B01001_010E B01001_011E B01001_012E B01001_026E B01001_031E ... B15002_019E B15002_032E B15002_033E B15002_034E B15002_035E B17001_002E C24010_015E C24010_051E year zip_code
0 15547 6954 154 98 47 1042 1110 666 8593 290 ... 5817 1126 768 490 140 5911 208 227 2012 NaN
1 22054 12562 546 331 447 1201 1931 1070 9492 231 ... 6508 1096 674 364 158 6075 128 60 2012 NaN
2 17042 8036 332 129 175 361 685 600 9006 399 ... 5426 204 181 33 0 5855 9 7 2012 NaN
3 50147 23175 828 266 368 787 1748 1614 26972 1037 ... 18719 1713 1113 101 38 6964 152 103 2012 NaN
4 49520 21948 686 395 195 772 1365 1289 27572 550 ... 19045 2601 1648 160 89 6809 348 166 2012 NaN

5 rows × 31 columns


In [88]:
df_2011.head()


Out[88]:
B01001_001E B01001_002E B01001_007E B01001_008E B01001_009E B01001_010E B01001_011E B01001_012E B01001_026E B01001_031E ... B15002_019E B15002_032E B15002_033E B15002_034E B15002_035E B17001_002E C24010_015E C24010_051E year zip_code
0 15844 6780 143 78 94 962 1076 546 9064 256 ... 6297 1220 800 565 143 5912 161 255 2011 NaN
1 21652 12620 526 266 449 1228 2040 1061 9032 246 ... 6146 1053 626 322 142 5855 152 67 2011 NaN
2 16549 7773 311 139 152 396 600 498 8776 374 ... 5475 170 143 50 0 4995 0 8 2011 NaN
3 51179 23446 714 216 356 797 2094 1583 27733 1157 ... 18519 1968 1219 73 44 6617 202 125 2011 NaN
4 50049 22417 527 557 198 843 1438 1346 27632 573 ... 18673 2529 1676 158 48 6575 339 142 2011 NaN

5 rows × 31 columns


In [99]:
df_census = pd.concat([df_2011,df_2012,df_2013],ignore_index=True)

In [101]:
df_census.head()


Out[101]:
B01001_001E B01001_002E B01001_007E B01001_008E B01001_009E B01001_010E B01001_011E B01001_012E B01001_026E B01001_031E ... B15002_019E B15002_032E B15002_033E B15002_034E B15002_035E B17001_002E C24010_015E C24010_051E year zip_code
0 15844 6780 143 78 94 962 1076 546 9064 256 ... 6297 1220 800 565 143 5912 161 255 2011 21201
1 21652 12620 526 266 449 1228 2040 1061 9032 246 ... 6146 1053 626 322 142 5855 152 67 2011 21202
2 16549 7773 311 139 152 396 600 498 8776 374 ... 5475 170 143 50 0 4995 0 8 2011 21205
3 51179 23446 714 216 356 797 2094 1583 27733 1157 ... 18519 1968 1219 73 44 6617 202 125 2011 21206
4 50049 22417 527 557 198 843 1438 1346 27632 573 ... 18673 2529 1676 158 48 6575 339 142 2011 21207

5 rows × 31 columns


In [102]:
# save
df_census.to_csv('census_final.csv',encoding='utf-8',index=False)

In [290]:
df2 = pd.read_csv('census_final.csv')

In [291]:
len(df2.columns)
column_names = columns + ['year','zip_code']

In [292]:
df2.columns = column_names
df2.head()


Out[292]:
TotalPop M_Total M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_Total F_18to19 ... F_Total_Edu F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts year zip_code
0 15844 6780 143 78 94 962 1076 546 9064 256 ... 6297 1220 800 565 143 5912 161 255 2011 21201
1 21652 12620 526 266 449 1228 2040 1061 9032 246 ... 6146 1053 626 322 142 5855 152 67 2011 21202
2 16549 7773 311 139 152 396 600 498 8776 374 ... 5475 170 143 50 0 4995 0 8 2011 21205
3 51179 23446 714 216 356 797 2094 1583 27733 1157 ... 18519 1968 1219 73 44 6617 202 125 2011 21206
4 50049 22417 527 557 198 843 1438 1346 27632 573 ... 18673 2529 1676 158 48 6575 339 142 2011 21207

5 rows × 31 columns


In [ ]:
# deal with nan
np.isnan(df_new.M_18to19.values)

In [308]:
# normalize: do I have to?
df_new = pd.DataFrame()
# male age group
for i in range(2,8):
    df_new[columns[i]]=df2.ix[:,i].values/df2.ix[:,1]
# female age group
for i in range(9,15):
    df_new[columns[i]]=df2.ix[:,i].values/df2.ix[:,8]
# male edu
for i in range(17,21):
    df_new[columns[i]]=df2.ix[:,i].values/df2.ix[:,16]
# female edu
for i in range(22,26):
    df_new[columns[i]]=df2.ix[:,i].values/df2.ix[:,21]
# poor
df_new[columns[26]]=df2.ix[:,26].values/df2.ix[:,0]
# male art
df_new[columns[27]]=df2.ix[:,27].values/df2.ix[:,1]
# female art
df_new[columns[28]]=df2.ix[:,27].values/df2.ix[:,8]

In [309]:
df_new.head()


Out[309]:
M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_18to19 F_20 F_21 F_22to24 ... M_MA M_Prof M_Doc F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts
0 0.021091 0.011504 0.013864 0.141888 0.158702 0.080531 0.028244 0.011253 0.018645 0.107789 ... 0.120744 0.058103 0.054471 0.193743 0.127045 0.089725 0.022709 0.373138 0.023746 0.017763
1 0.041680 0.021078 0.035578 0.097306 0.161648 0.084073 0.027236 0.011847 0.038530 0.067538 ... 0.059464 0.033712 0.024699 0.171331 0.101855 0.052392 0.023104 0.270414 0.012044 0.016829
2 0.040010 0.017882 0.019555 0.050946 0.077190 0.064068 0.042616 0.027347 0.019599 0.035324 ... 0.002452 0.000000 0.001337 0.031050 0.026119 0.009132 0.000000 0.301831 0.000000 0.000000
3 0.030453 0.009213 0.015184 0.033993 0.089312 0.067517 0.041719 0.017705 0.011755 0.040854 ... 0.042170 0.009075 0.005738 0.106269 0.065824 0.003942 0.002376 0.129291 0.008616 0.007284
4 0.023509 0.024847 0.008833 0.037605 0.064148 0.060044 0.020737 0.013282 0.016285 0.043862 ... 0.038467 0.008556 0.005586 0.135436 0.089755 0.008461 0.002571 0.131371 0.015122 0.012268

5 rows × 23 columns


In [314]:
gb2 = df2.groupby(('year','zip_code'))

In [315]:
(prev_year+.0)/this_year


Out[315]:
array([[ 1.01910336,  0.97497843,  0.92857143,  0.79591837,  2.        ,
         0.92322457,  0.96936937,  0.81981982,  1.05481206,  0.88275862,
         1.61904762,  1.06289308,  0.99897751,  1.00736067,  1.19007092,
         1.02834358,  0.95970377,  0.93971061,  1.1901566 ,  0.7032967 ,
         0.76677316,  1.08251676,  1.08348135,  1.04166667,  1.15306122,
         1.02142857,  1.00016918,  0.77403846,  1.12334802]])

In [316]:
pd.DataFrame((this_year - prev_year+.0)/prev_year)


Out[316]:
0 1 2 3 4 5 6 7 8 9 ... 19 20 21 22 23 24 25 26 27 28
0 -0.018745 0.025664 0.076923 0.25641 -0.5 0.08316 0.031599 0.21978 -0.051964 0.132812 ... 0.421875 0.304167 -0.076227 -0.077049 -0.04 -0.132743 -0.020979 -0.000169 0.291925 -0.109804

1 rows × 29 columns


In [319]:
growth = pd.DataFrame((this_year - prev_year+.0)/prev_year)
growth


Out[319]:
0 1 2 3 4 5 6 7 8 9 ... 19 20 21 22 23 24 25 26 27 28
0 -0.018745 0.025664 0.076923 0.25641 -0.5 0.08316 0.031599 0.21978 -0.051964 0.132812 ... 0.421875 0.304167 -0.076227 -0.077049 -0.04 -0.132743 -0.020979 -0.000169 0.291925 -0.109804

1 rows × 29 columns


In [322]:
gb2.get_group((ref_year,int(zip_code)))[]


Out[322]:
TotalPop M_Total M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_Total F_18to19 ... F_Total_Edu F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts year zip_code
0 15844 6780 143 78 94 962 1076 546 9064 256 ... 6297 1220 800 565 143 5912 161 255 2011 21201

1 rows × 31 columns


In [325]:
ref_years = [2011, 2012]
df_cen_growth = pd.DataFrame()
for ref_year in ref_years:
    for zip_code in zip_codes:
        try:
            prev_year = gb2.get_group((ref_year,int(zip_code))).ix[:,:-2].values
            this_year = gb2.get_group((ref_year+1,int(zip_code))).ix[:,:-2].values
            growth = pd.DataFrame((this_year - prev_year+.0)/prev_year)
            growth.columns = columns
            growth['year']=ref_year
            growth['zip_code']=zip_code
            df_cen_growth = pd.concat([df_cen_growth,growth],ignore_index=True)
        except KeyError:
            continue

In [327]:
df_cen_growth.head()


Out[327]:
TotalPop M_Total M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_Total F_18to19 ... F_Total_Edu F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts year zip_code
0 -0.018745 0.025664 0.076923 0.256410 -0.500000 0.083160 0.031599 0.219780 -0.051964 0.132812 ... -0.076227 -0.077049 -0.040000 -0.132743 -0.020979 -0.000169 0.291925 -0.109804 2011 21201
1 0.018566 -0.004596 0.038023 0.244361 -0.004454 -0.021987 -0.053431 0.008483 0.050930 -0.060976 ... 0.058900 0.040836 0.076677 0.130435 0.112676 0.037575 -0.157895 -0.104478 2011 21202
2 0.029790 0.033835 0.067524 -0.071942 0.151316 -0.088384 0.141667 0.204819 0.026208 0.066845 ... -0.008950 0.200000 0.265734 -0.340000 NaN 0.172172 inf -0.125000 2011 21205
3 -0.020165 -0.011558 0.159664 0.231481 0.033708 -0.012547 -0.165234 0.019583 -0.027440 -0.103717 ... 0.010800 -0.129573 -0.086957 0.383562 -0.136364 0.052441 -0.247525 -0.176000 2011 21206
4 -0.010570 -0.020922 0.301708 -0.290844 -0.015152 -0.084223 -0.050765 -0.042348 -0.002171 -0.040140 ... 0.019922 0.028470 -0.016706 0.012658 0.854167 0.035589 0.026549 0.169014 2011 21207

5 rows × 31 columns


In [328]:
# Selecting non-nan rows (total population)
df_cen_growth = df_cen_growth[np.isfinite(df_cen_growth['TotalPop'])]

In [330]:
df_cen_growth.head()


Out[330]:
TotalPop M_Total M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_Total F_18to19 ... F_Total_Edu F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts year zip_code
0 -0.018745 0.025664 0.076923 0.256410 -0.500000 0.083160 0.031599 0.219780 -0.051964 0.132812 ... -0.076227 -0.077049 -0.040000 -0.132743 -0.020979 -0.000169 0.291925 -0.109804 2011 21201
1 0.018566 -0.004596 0.038023 0.244361 -0.004454 -0.021987 -0.053431 0.008483 0.050930 -0.060976 ... 0.058900 0.040836 0.076677 0.130435 0.112676 0.037575 -0.157895 -0.104478 2011 21202
2 0.029790 0.033835 0.067524 -0.071942 0.151316 -0.088384 0.141667 0.204819 0.026208 0.066845 ... -0.008950 0.200000 0.265734 -0.340000 NaN 0.172172 inf -0.125000 2011 21205
3 -0.020165 -0.011558 0.159664 0.231481 0.033708 -0.012547 -0.165234 0.019583 -0.027440 -0.103717 ... 0.010800 -0.129573 -0.086957 0.383562 -0.136364 0.052441 -0.247525 -0.176000 2011 21206
4 -0.010570 -0.020922 0.301708 -0.290844 -0.015152 -0.084223 -0.050765 -0.042348 -0.002171 -0.040140 ... 0.019922 0.028470 -0.016706 0.012658 0.854167 0.035589 0.026549 0.169014 2011 21207

5 rows × 31 columns


In [380]:
# filtering
df_cen_growth2 = df_cen_growth[np.isfinite(df_cen_growth['F_Total_Edu'])]
df_cen_growth2 = df_cen_growth2[np.isfinite(df_cen_growth2['M_Total_Edu'])]
df_cen_growth2 = df_cen_growth2[np.isfinite(df_cen_growth2['M_Total'])]
df_cen_growth2 = df_cen_growth2[np.isfinite(df_cen_growth2['F_Total'])]
len(df_cen_growth2)


Out[380]:
1034

In [378]:
df_cen_growth2


Out[378]:
TotalPop M_Total M_18to19 M_20 M_21 M_22to24 M_25to29 M_30to34 F_Total F_18to19 ... F_Total_Edu F_BA F_MA F_Prof F_Doc Total_poor M_Arts F_Arts year zip_code
0 -0.018745 0.025664 0.076923 0.256410 -0.500000 0.083160 0.031599 0.219780 -0.051964 0.132812 ... -0.076227 -0.077049 -0.040000 -0.132743 -0.020979 -0.000169 0.291925 -0.109804 2011 21201
1 0.018566 -0.004596 0.038023 0.244361 -0.004454 -0.021987 -0.053431 0.008483 0.050930 -0.060976 ... 0.058900 0.040836 0.076677 0.130435 0.112676 0.037575 -0.157895 -0.104478 2011 21202
2 0.029790 0.033835 0.067524 -0.071942 0.151316 -0.088384 0.141667 0.204819 0.026208 0.066845 ... -0.008950 0.200000 0.265734 -0.340000 NaN 0.172172 inf -0.125000 2011 21205
3 -0.020165 -0.011558 0.159664 0.231481 0.033708 -0.012547 -0.165234 0.019583 -0.027440 -0.103717 ... 0.010800 -0.129573 -0.086957 0.383562 -0.136364 0.052441 -0.247525 -0.176000 2011 21206
4 -0.010570 -0.020922 0.301708 -0.290844 -0.015152 -0.084223 -0.050765 -0.042348 -0.002171 -0.040140 ... 0.019922 0.028470 -0.016706 0.012658 0.854167 0.035589 0.026549 0.169014 2011 21207
5 0.020031 0.016146 0.106101 0.191919 0.228216 0.113786 -0.207598 0.110000 0.023175 0.092466 ... 0.026763 -0.037296 0.014811 0.261146 0.152648 0.056545 -0.145695 -0.390071 2011 21208
6 0.033684 0.039363 0.187097 -0.008889 -0.007692 0.103870 0.307323 -0.027263 0.028755 0.308411 ... 0.023072 0.082446 -0.046942 -0.184911 0.079327 0.046669 0.044983 0.026820 2011 21209
7 0.041244 0.066353 0.066107 -0.168116 -0.086420 0.363905 0.109533 0.429864 0.017267 0.155779 ... 0.015640 -0.039740 -0.072173 0.126126 0.232727 0.060799 -0.022857 -0.047244 2011 21210
8 -0.012600 -0.035369 -0.321429 -0.309859 -0.120690 -0.056198 0.138776 0.120347 0.010295 1.066667 ... 0.000994 -0.033079 0.001403 -0.033784 0.191011 0.006897 0.350515 -0.109589 2011 21211
9 -0.003995 -0.005620 0.033898 -0.317919 0.676991 -0.103914 -0.073373 -0.154561 -0.002572 -0.057971 ... 0.001080 -0.001403 -0.058177 0.055108 -0.052469 0.175210 -0.160458 -0.057692 2011 21212
10 -0.024523 -0.039008 -0.019397 -0.073654 -0.043478 -0.146565 -0.005769 -0.177507 -0.012705 -0.187500 ... 0.009346 0.146835 0.062842 0.785714 -0.863636 0.105351 -0.285714 NaN 2011 21213
11 -0.023917 -0.024098 -0.024138 0.016807 -0.166667 -0.225191 0.074653 -0.051870 -0.023758 0.149123 ... -0.036884 -0.005316 -0.025000 0.305556 -0.083916 -0.079327 -0.146667 -0.202586 2011 21214
12 -0.021898 -0.026632 -0.179840 0.072464 -0.166667 0.138204 0.050857 0.010296 -0.018072 0.031153 ... -0.012677 -0.124884 -0.128167 -0.027237 -0.098361 0.050043 1.155172 -0.038462 2011 21215
13 -0.020980 0.007382 -0.059649 0.188000 0.508772 -0.049658 0.435315 -0.024096 -0.043691 -0.182556 ... -0.035336 -0.165537 -0.048110 0.075472 -0.148148 -0.050315 1.216216 0.000000 2011 21216
14 0.014120 0.002005 0.009772 0.159875 0.045894 -0.077939 -0.090844 0.210526 0.023834 -0.118699 ... 0.027525 0.032282 0.026764 0.327869 0.553191 0.001368 -0.150215 0.364964 2011 21217
15 0.007662 0.016950 0.028470 -0.095890 -0.167813 -0.000578 0.035442 -0.009259 -0.000594 -0.100138 ... 0.018216 0.008345 0.009699 0.146341 0.102828 0.050553 0.142169 -0.090164 2011 21218
16 0.003566 0.011170 0.027875 0.238095 -0.204762 0.169145 -0.019358 0.190051 -0.003375 -0.207521 ... -0.013446 0.113813 -0.073379 -0.175000 -0.500000 0.068036 -0.300000 0.463415 2011 21222
17 -0.021263 -0.055028 -0.182039 -0.268657 -0.112782 -0.134571 0.108259 0.044571 0.011621 0.302326 ... 0.002720 0.000000 -0.073171 -0.200000 0.107143 -0.015401 0.320000 0.100000 2011 21223
18 0.040774 0.053528 0.116456 0.097222 0.215488 0.130307 0.075039 0.064107 0.027678 -0.057471 ... 0.024625 0.137421 -0.037781 -0.099338 0.041494 0.090606 0.368932 0.376068 2011 21224
19 0.016540 0.020416 -0.029484 -0.011811 0.190476 0.219048 0.103882 -0.031946 0.012988 0.000000 ... 0.015574 0.086957 0.752294 0.500000 inf 0.090558 0.000000 0.120000 2011 21225
20 -0.041473 -0.051168 -0.070175 0.050000 -0.179487 -0.138889 -0.088398 -0.259414 -0.031492 0.156250 ... -0.064145 -0.024055 -0.423729 -0.100000 -0.363636 -0.189944 inf -0.535714 2011 21226
21 0.022463 0.011504 0.000000 0.364103 0.378205 0.070395 -0.035082 0.056202 0.034185 -0.448560 ... 0.041540 0.063974 0.090909 0.043568 0.123418 0.026566 0.140000 -0.045139 2011 21230
22 -0.002924 -0.022876 -0.029126 -0.638889 0.135593 0.058304 0.008633 -0.097837 0.016451 0.061069 ... 0.073294 0.207241 0.142415 0.378151 -0.009950 0.004019 -0.313043 -0.020478 2011 21231
23 0.008034 0.004986 -0.101719 -0.180435 0.001546 -0.104907 -0.066127 0.025801 0.010651 0.116250 ... -0.002729 0.034199 0.129428 -0.134921 -0.128655 0.145078 0.069343 0.337621 2011 21234
24 -0.013590 -0.006539 0.211137 -0.220339 -0.036458 0.024000 -0.058824 -0.041106 -0.019729 -0.014870 ... -0.012510 -0.054391 0.093860 0.021127 0.358696 0.063104 0.178082 -0.268817 2011 21236
25 0.008755 -0.012251 0.088983 0.496063 0.037879 0.019820 -0.070949 -0.012801 0.027656 0.493617 ... 0.022590 0.008228 -0.130560 -0.120301 0.188034 0.096596 0.128788 -0.065789 2011 21237
26 0.018607 0.042117 0.053957 0.192073 -0.032986 0.605996 0.044593 -0.059638 -0.000434 0.131661 ... 0.012396 0.021150 0.052632 0.020243 0.139535 0.287282 0.909091 0.065574 2011 21239
28 0.059138 0.064752 0.638889 0.500000 inf -0.305085 0.276265 0.013274 0.053785 0.750000 ... 0.062718 0.090663 0.111446 0.194969 0.304348 -0.032787 3.625000 -0.231884 2011 02108
29 0.058140 0.160180 0.000000 -0.076923 0.066667 -0.300000 -0.011696 0.449761 -0.021595 0.000000 ... -0.061798 -0.075630 0.116667 -0.031646 -0.166667 0.039474 NaN 0.243243 2011 02109
30 -0.063052 -0.103784 0.250000 NaN NaN 0.370370 0.048780 -0.142857 -0.006051 NaN ... 0.003125 0.058824 -0.353448 1.189189 0.023810 -0.180791 1.000000 -1.000000 2011 02110
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1113 0.105263 0.113636 NaN NaN NaN 0.000000 0.200000 NaN 0.076923 NaN ... 0.000000 NaN NaN NaN NaN 0.117647 NaN NaN 2012 98174
1114 -0.042822 -0.051336 -0.125581 0.877551 2.814815 0.249084 -0.249147 0.115789 -0.034704 0.218750 ... -0.030379 -0.018762 -0.068203 0.020460 -0.022472 -0.148976 -0.078788 -0.016129 2012 98177
1115 1.416667 1.363636 NaN NaN NaN NaN NaN NaN 2.000000 NaN ... 2.000000 2.000000 NaN NaN NaN NaN NaN NaN 2012 98195
1116 0.015511 0.028168 -0.506329 -0.320000 0.000000 -0.195021 0.324031 0.118474 0.004117 -0.015873 ... -0.018111 0.006938 -0.093333 0.005634 -0.050179 -0.150508 0.426316 0.274143 2012 98199
1117 0.058102 0.040674 0.042796 -0.145558 -0.020243 -0.085962 0.099969 0.053705 0.075424 -0.092852 ... 0.079340 0.042732 0.190141 0.039927 0.140449 0.082903 0.087059 0.032982 2012 20001
1118 0.023772 0.000326 0.030349 -0.052314 -0.164114 -0.195836 0.052069 0.040682 0.047449 0.144424 ... 0.077684 0.130186 0.227258 0.008772 0.163755 -0.017760 0.041111 0.198381 2012 20002
1119 0.005582 0.008190 -0.130178 -0.072539 0.078947 -0.033376 0.030635 0.133520 0.002805 0.462963 ... 0.013565 -0.046213 0.135808 0.058527 0.019386 0.002606 -0.086400 0.064275 2012 20003
1120 -0.036501 -0.034562 NaN NaN 0.100000 -0.263158 -0.121495 -0.121212 -0.038835 NaN ... -0.065502 -0.199052 -0.120192 -0.004762 0.702703 0.527778 0.042553 0.036364 2012 20004
1121 -0.016913 -0.006831 -0.516588 0.263158 -0.500000 0.036184 0.119671 0.062823 -0.030297 -0.342105 ... 0.033325 0.328157 0.054675 0.303393 -0.318996 -0.045562 -0.059701 0.336364 2012 20005
1122 0.036722 0.149821 -0.004255 -0.098901 0.934307 0.214286 0.618557 0.173913 -0.065330 0.018667 ... -0.114754 -0.288288 0.208333 -0.151515 -0.125000 0.031955 -0.096154 -0.027778 2012 20006
1123 0.013505 0.020748 -0.144404 -0.092949 0.060976 0.429775 -0.010230 -0.013346 0.007429 -0.091065 ... 0.016662 -0.076142 0.087134 0.035016 -0.013783 0.060950 -0.228814 0.045312 2012 20007
1124 0.002289 0.001387 -0.378049 -0.075269 -0.448980 -0.174731 -0.054545 0.125173 0.003013 -0.166667 ... -0.000245 0.042704 0.062567 -0.037350 -0.135484 0.075912 -0.154381 -0.043053 2012 20008
1125 0.051371 0.059250 -0.176471 0.607143 0.333333 -0.053369 0.093594 0.021614 0.043128 -0.067164 ... 0.049707 -0.002011 0.109026 0.236383 0.086900 0.029827 0.013889 -0.036675 2012 20009
1126 0.030348 0.057659 0.104046 0.046414 0.236264 -0.059182 0.083080 0.105894 0.004269 0.226087 ... 0.028387 0.101491 0.083226 0.035225 -0.102871 -0.129381 0.098535 0.021144 2012 20010
1127 0.053723 0.057124 0.199021 0.134948 -0.226950 -0.009031 0.108530 0.228043 0.050661 -0.029921 ... 0.035452 0.061126 0.093458 -0.029412 0.131805 0.027297 0.126404 -0.079032 2012 20011
1128 -0.013593 -0.016645 0.323077 -0.053333 -0.200000 -0.084459 -0.066815 0.162011 -0.010910 -0.405063 ... 0.008213 0.177553 0.056397 -0.093301 -0.089109 -0.103205 -0.250000 0.107692 2012 20012
1129 -0.011522 -0.009904 0.031915 -0.119048 -0.500000 0.073733 -0.021645 0.074324 -0.012938 0.100000 ... -0.007651 -0.025558 0.030109 0.112266 -0.065163 0.067329 0.115702 -0.021786 2012 20015
1130 0.004501 0.003835 -0.042995 0.032720 0.160112 0.026702 0.004762 0.048698 0.005052 -0.130061 ... 0.038142 0.072068 0.052823 0.005773 -0.072331 0.214286 -0.165094 0.205722 2012 20016
1131 0.046848 0.057826 -0.171141 -0.056180 1.386861 0.219731 0.216981 0.165517 0.037628 -0.025105 ... 0.024685 0.032774 0.062287 0.304054 0.000000 0.094582 -0.160870 0.390625 2012 20017
1132 0.053492 0.110942 0.882883 0.546875 1.314607 0.079167 -0.016051 0.306818 0.012352 0.094444 ... -0.003437 0.116856 0.195775 -0.065990 0.077586 0.192919 0.040816 0.000000 2012 20018
1133 0.009001 -0.012519 -0.068427 -0.071186 0.403846 0.116779 -0.056396 -0.077419 0.025391 -0.093290 ... 0.040800 -0.108061 0.054505 0.573333 -0.072727 0.033741 -0.144828 0.132948 2012 20019
1134 -0.000722 0.026623 -0.104631 0.225722 0.003597 0.131484 -0.003909 0.017555 -0.021959 -0.097453 ... -0.025962 -0.076844 0.023810 0.009217 -0.510490 0.015000 0.095238 -0.259740 2012 20020
1135 -0.002988 -0.009762 -0.380952 2.277778 -0.063636 1.029557 -0.058520 -0.018349 0.002864 -0.382353 ... 0.007317 0.009580 0.063529 0.008671 -0.141463 0.112063 -0.169935 -0.125984 2012 20024
1136 0.040256 0.035626 0.095775 -0.151724 -0.132104 -0.012956 0.030233 0.116766 0.044195 0.308661 ... 0.037448 -0.027751 -0.215017 0.250000 0.535714 0.112315 -0.036842 0.236111 2012 20032
1137 -0.081982 -0.173742 -0.142857 -0.400000 NaN -0.089947 -0.112861 -0.105769 0.007668 NaN ... 0.023622 0.054896 -0.001416 -0.089286 0.114833 -0.081531 -0.029412 0.196429 2012 20036
1138 0.013842 -0.027238 -0.024164 -0.023739 -0.132616 -0.260417 0.005587 0.117066 0.050985 -0.084652 ... 0.122787 0.196956 0.063181 0.070740 -0.033582 -0.128609 -0.275449 0.253731 2012 20037
1142 -0.046146 -0.034146 -0.065952 0.010526 0.493671 -0.241935 0.000000 NaN -0.056079 -0.110974 ... -0.658537 NaN NaN -0.548387 NaN -0.306452 NaN NaN 2012 20057
1143 0.023159 0.048780 0.102871 -0.080851 0.114286 0.044944 -0.324324 NaN -0.001157 -0.008929 ... 0.000000 0.666667 NaN NaN NaN -0.320000 -0.357143 0.266667 2012 20064
1151 0.175926 0.204082 NaN -0.111111 -0.200000 inf 0.550000 0.241379 -0.100000 NaN ... -0.100000 NaN NaN NaN NaN NaN NaN NaN 2012 20307
1152 0.020115 0.089431 NaN NaN NaN NaN NaN NaN -0.147059 NaN ... -0.147059 -0.107143 NaN NaN NaN -0.144928 NaN NaN 2012 20317

1034 rows × 31 columns


In [381]:
df_cen_growth2.to_csv('census_filtered.csv',encoding='utf-8',index=False)