In [1]:
from sqlalchemy import create_engine
import dbconfig
import pandas as pd

In [2]:
engine = create_engine('postgresql://{conf.user}:{conf.password}@{conf.host}:5432/{conf.database}'.format(conf=dbconfig))

In [3]:
pop_housing_sql = """SELECT census.*, groups.area from shape_files.census_pop_housing as census
                    JOIN shape_files.census_blocks_groups as groups
                    on census.tract = groups.tract
                    and census.block_group = groups.blkgrp;"""

In [4]:
pop_housing_raw = pd.read_sql_query(pop_housing_sql, con=engine)

Raw census data


In [5]:
pop_housing_raw.head()


Out[5]:
index tract block_group H0030001 H0030002 H0030003 H0040002 H0040004 H0050002 H0060002 ... P0120045 P0120046 P0120047 P0120048 P0120049 P0180001 P0180002 P0180003 P0180004 area
0 0 000200 1 576 516 60 1 513 50 9 ... 13 12 17 13 7 516 255 34 221 3123974.646
1 1 000700 1 142 94 48 0 94 16 77 ... 1 2 2 1 0 94 29 25 4 1076454.735
2 2 000700 2 1182 974 208 28 944 119 726 ... 7 7 1 14 14 974 114 93 21 3162576.446
3 3 000700 3 607 512 95 39 468 27 307 ... 7 7 2 2 8 512 42 33 9 4203521.971
4 4 000900 1 684 148 536 11 136 43 42 ... 6 2 6 0 1 148 58 17 41 1459837.484

5 rows × 115 columns

Calculating census features

list of feature description and calculation can be found in folder docs/data_dictionaries

features are claculated for each pair of census tract and block


In [5]:
features = pd.DataFrame({   'tract' : pop_housing_raw.tract,
                            'block_group' : pop_housing_raw.block_group,
                            'housing_density': pop_housing_raw.H0030001/pop_housing_raw.area,
                            'rate_occupied_units': pop_housing_raw.H0030002/pop_housing_raw.H0030001,
                            'rate_vacant_units': pop_housing_raw.H0030003/pop_housing_raw.H0030001,
                            'rate_mortgage_or_loan' : pop_housing_raw.H0040002/pop_housing_raw.H0030001,
                            'rate_renter_occupied' : pop_housing_raw.H0040004/pop_housing_raw.H0030001,
                            'rate_for_rent' : pop_housing_raw.H0050002/pop_housing_raw.H0030001,
                            'rate_white_householder' : pop_housing_raw.H0060002/pop_housing_raw.P0180001,
                            'rate_black_householder' : pop_housing_raw.H0060003/pop_housing_raw.P0180001,
                            'rate_native_householder' : (pop_housing_raw.H0060004+pop_housing_raw.H0060006)/pop_housing_raw.P0180001,
                            'rate_asian_householder' : pop_housing_raw.H0060005/pop_housing_raw.P0180001,
                            'rate_other_race_householder' : pop_housing_raw.H0060007/pop_housing_raw.P0180001,
                            'rate_pop_occupied_units' : pop_housing_raw.H0100001/pop_housing_raw.P0010001,
                            'rate_1_per_household' : pop_housing_raw.H0130002/pop_housing_raw.P0180001,
                            'rate_2_per_household' : pop_housing_raw.H0130003/pop_housing_raw.P0180001,
                            'rate_3_per_household' : pop_housing_raw.H0130004/pop_housing_raw.P0180001,
                            'rate_4_per_household' : pop_housing_raw.H0130005/pop_housing_raw.P0180001,
                            'rate_5_per_household' : pop_housing_raw.H0130006/pop_housing_raw.P0180001,
                            'rate_6_per_household' : pop_housing_raw.H0130007/pop_housing_raw.P0180001,
                            'rate_7_plus_per_household' : pop_housing_raw.H0130008/pop_housing_raw.P0180001,
                            'rate_owner_occupied' : pop_housing_raw.H0140002/pop_housing_raw.H0030001,
                            'rate_owner_occupied_white' : pop_housing_raw.H0140003/pop_housing_raw.H0140002,
                            'rate_owner_occupied_black' : pop_housing_raw.H0140004/pop_housing_raw.H0140002,
                            'rate_owner_occupied_native' : (pop_housing_raw.H0140005+pop_housing_raw.H0140007)/pop_housing_raw.H0140002,
                            'rate_owner_occupied_asian' : pop_housing_raw.H0140006/pop_housing_raw.H0140002,
                            'rate_owner_occupied_other_race' : pop_housing_raw.H0140008/pop_housing_raw.H0140002,
                            'rate_renter_occupied_white' : pop_housing_raw.H0140011/pop_housing_raw.H0040004,
                            'rate_renter_occupied_black' : pop_housing_raw.H0140012/pop_housing_raw.H0040004,
                            'rate_renter_occupied_native' : (pop_housing_raw.H0140013+pop_housing_raw.H0140015)/pop_housing_raw.H0040004,
                            'rate_renter_occupied_asian' : pop_housing_raw.H0140014/pop_housing_raw.H0040004,
                            'rate_renter_occupied_other' : pop_housing_raw.H0140016/pop_housing_raw.H0040004,
                            'rate_owner_occupied_hispanic' : pop_housing_raw.H0150004/pop_housing_raw.H0140002,
                            #'rate_renter_occupied_hispanic' : pop_housing_raw.H0150005/pop_housing_raw.H0040004,
                            'rate_owner_occupied_w_children' : pop_housing_raw.H0190003/pop_housing_raw.H0140002,
                            'rate_owner_occupied_no_children' : pop_housing_raw.H0190004/pop_housing_raw.H0140002,
                            'rate_renter_occupied_no_children' : 1-(pop_housing_raw.H0190006/pop_housing_raw.H0040004),
                            'rate_renter_occupied_w_children' : pop_housing_raw.H0190006/pop_housing_raw.H0040004,
                            'population_density' : pop_housing_raw.P0010001/pop_housing_raw.area,
                            'rate_white_pop' : pop_housing_raw.P0030002/pop_housing_raw.P0010001,
                            'rate_black_pop' : pop_housing_raw.P0030003/pop_housing_raw.P0010001,
                            'rate_native_pop' : (pop_housing_raw.P0030006+pop_housing_raw.P0030004)/pop_housing_raw.P0010001,
                            'rate_asian_pop' : pop_housing_raw.P0030005/pop_housing_raw.P0010001,
                            'rate_other_race_pop' : pop_housing_raw.P0030007/pop_housing_raw.P0010001,
                            'rate_pop_over_18' : pop_housing_raw.P0110001/pop_housing_raw.P0010001,
                            'rate_male_under_18' : (pop_housing_raw.P0120003+pop_housing_raw.P0120004+pop_housing_raw.P0120005+pop_housing_raw.P0120006)/pop_housing_raw.P0010001,                         
                            'rate_male_18_35' : pop_housing_raw[['P0120007','P0120008','P0120009','P0120010','P0120011','P0120012']].sum(axis=1)/pop_housing_raw.P0010001,                         
                            'rate_male_35_50' : pop_housing_raw[['P0120013','P0120014','P0120015']].sum(axis=1)/pop_housing_raw.P0010001,
                            'rate_male_50_75' : pop_housing_raw[['P0120016',	'P0120017',	'P0120018',	'P0120019',	'P0120020',	'P0120021',	'P0120022']].sum(axis=1)/pop_housing_raw.P0010001,
                            'rate_male_over_75' : pop_housing_raw[['P0120023','P0120024','P0120025']].sum(axis=1)/pop_housing_raw.P0010001,                         
                            'rate_female_under_18' : pop_housing_raw[['P0120027','P0120028','P0120029','P0120030']].sum(axis=1)/pop_housing_raw.P0010001,  
                            'rate_female_18_35' : pop_housing_raw[['P0120031',	'P0120032',	'P0120033',	'P0120034',	'P0120035',	'P0120036']].sum(axis=1)/pop_housing_raw.P0010001,                         
                            'rate_female_35_50' : pop_housing_raw[['P0120037',	'P0120038',	'P0120039']].sum(axis=1)/pop_housing_raw.P0010001,
                            'rate_female_50_75' : pop_housing_raw[['P0120040',	'P0120041',	'P0120042',	'P0120043',	'P0120044',	'P0120045',	'P0120046']].sum(axis=1)/pop_housing_raw.P0010001,
                            'rate_male_over_75' : pop_housing_raw[['P0120047','P0120048','P0120049']].sum(axis=1)/pop_housing_raw.P0010001,
                            'rate_households' : pop_housing_raw.P0180001/pop_housing_raw.H0030001})

In [7]:
features


Out[7]:
block_group housing_density population_density rate_1_per_household rate_2_per_household rate_3_per_household rate_4_per_household rate_5_per_household rate_6_per_household rate_7_plus_per_household ... rate_renter_occupied_black rate_renter_occupied_native rate_renter_occupied_no_children rate_renter_occupied_other rate_renter_occupied_w_children rate_renter_occupied_white rate_vacant_units rate_white_householder rate_white_pop tract
0 1 0.000184 0.000342 0.445736 0.265504 0.176357 0.065891 0.019380 0.019380 0.007752 ... 0.951267 0.001949 0.711501 0.001949 0.288499 0.017544 0.104167 0.017442 0.022472 000200
1 1 0.000132 0.001477 0.563830 0.329787 0.063830 0.031915 0.010638 0.000000 0.000000 ... 0.117021 0.000000 0.936170 0.010638 0.063830 0.819149 0.338028 0.819149 0.380503 000700
2 2 0.000374 0.000398 0.757700 0.211499 0.021561 0.004107 0.002053 0.001027 0.002053 ... 0.181144 0.000000 0.979873 0.009534 0.020127 0.741525 0.175973 0.745380 0.746032 000700
3 3 0.000144 0.000154 0.859375 0.121094 0.011719 0.001953 0.005859 0.000000 0.000000 ... 0.371795 0.004274 0.985043 0.010684 0.014957 0.574786 0.156507 0.599609 0.584877 000700
4 1 0.000469 0.000290 0.486486 0.310811 0.135135 0.047297 0.013514 0.006757 0.000000 ... 0.720588 0.007353 0.764706 0.000000 0.235294 0.264706 0.783626 0.283784 0.285377 000900
5 2 0.000268 0.000688 0.611276 0.240356 0.074184 0.047478 0.011869 0.002967 0.011869 ... 0.672727 0.003636 0.778182 0.000000 0.221818 0.301818 0.296451 0.418398 0.340391 000900
6 2 0.000003 0.000008 0.164384 0.497717 0.146119 0.118721 0.050228 0.018265 0.004566 ... 0.000000 0.000000 0.666667 0.000000 0.333333 1.000000 0.102459 1.000000 0.988889 020501
7 1 0.000003 0.000008 0.185606 0.429924 0.168561 0.142045 0.045455 0.018939 0.009470 ... 0.000000 0.035714 0.857143 0.000000 0.142857 0.964286 0.045208 0.992424 0.985778 020502
8 1 0.000488 0.000607 0.475771 0.207048 0.176211 0.070485 0.039648 0.017621 0.013216 ... 0.843318 0.000000 0.668203 0.004608 0.331797 0.115207 0.428212 0.132159 0.101215 001000
9 2 0.000386 0.000509 0.614964 0.293796 0.049270 0.029197 0.007299 0.000000 0.005474 ... 0.344828 0.000000 0.868966 0.004598 0.131034 0.597701 0.234637 0.653285 0.559915 001000
10 1 0.000157 0.000216 0.465394 0.252983 0.138425 0.062053 0.054893 0.016706 0.009547 ... 0.816901 0.008451 0.611268 0.005634 0.388732 0.157746 0.358346 0.262530 0.214444 001100
11 1 0.000460 0.000681 0.570248 0.169421 0.165289 0.037190 0.045455 0.004132 0.008264 ... 0.918803 0.004274 0.683761 0.000000 0.316239 0.076923 0.622465 0.082645 0.135933 001600
12 2 0.000182 0.000234 0.578358 0.190299 0.082090 0.048507 0.052239 0.041045 0.007463 ... 0.901288 0.000000 0.763948 0.000000 0.236052 0.090129 0.358852 0.134328 0.131970 001600
13 2 0.000014 0.000031 0.287215 0.412434 0.128722 0.103327 0.044658 0.014886 0.008757 ... 0.111111 0.000000 0.569444 0.000000 0.430556 0.875000 0.047540 0.937828 0.921486 020502
14 3 0.000004 0.000010 0.191558 0.392857 0.159091 0.146104 0.071429 0.025974 0.012987 ... 0.000000 0.000000 0.500000 0.000000 0.500000 1.000000 0.052308 0.980519 0.981618 020502
15 1 0.000394 0.000609 0.448214 0.239286 0.144643 0.082143 0.051786 0.016071 0.017857 ... 0.834308 0.001949 0.633528 0.001949 0.366472 0.148148 0.418484 0.173214 0.135081 001700
16 1 0.000169 0.000255 0.485788 0.356589 0.103359 0.041344 0.007752 0.002584 0.002584 ... 0.504464 0.004464 0.843750 0.000000 0.156250 0.473214 0.147577 0.620155 0.582482 001800
17 2 0.000151 0.000253 0.423181 0.309973 0.121294 0.067385 0.048518 0.018868 0.010782 ... 0.622807 0.000000 0.710526 0.000000 0.289474 0.364035 0.210638 0.401617 0.428934 001800
18 1 0.000157 0.000193 0.581081 0.299228 0.065637 0.034749 0.011583 0.005792 0.001931 ... 0.607670 0.002950 0.849558 0.002950 0.150442 0.342183 0.248186 0.488417 0.454976 001900
19 2 0.000364 0.000421 0.648393 0.277883 0.032136 0.028355 0.009452 0.003781 0.000000 ... 0.315789 0.000000 0.936288 0.008310 0.063712 0.645429 0.258065 0.705104 0.661818 002000
20 2 0.000042 0.000069 0.501558 0.342679 0.093458 0.049844 0.009346 0.003115 0.000000 ... 0.370787 0.000000 0.803371 0.005618 0.196629 0.584270 0.118132 0.676012 0.613977 001900
21 1 0.000106 0.000136 0.545161 0.348387 0.077419 0.019355 0.006452 0.003226 0.000000 ... 0.313131 0.000000 0.878788 0.000000 0.121212 0.606061 0.247573 0.696774 0.690702 002000
22 3 0.000195 0.000303 0.542625 0.282815 0.106901 0.051421 0.010825 0.005413 0.000000 ... 0.056948 0.006834 0.963554 0.009112 0.036446 0.879271 0.096577 0.910690 0.902516 005200
23 1 0.000150 0.000272 0.333333 0.271429 0.161905 0.104762 0.047619 0.047619 0.033333 ... 0.892157 0.000000 0.715686 0.009804 0.284314 0.078431 0.290541 0.100000 0.074627 002200
24 2 0.000154 0.000270 0.454545 0.272727 0.117424 0.079545 0.037879 0.018939 0.018939 ... 0.823834 0.005181 0.751295 0.010363 0.248705 0.145078 0.278689 0.166667 0.168487 002200
25 3 0.000272 0.000474 0.378882 0.275362 0.163561 0.097308 0.045549 0.024845 0.014493 ... 0.613514 0.002703 0.810811 0.010811 0.189189 0.324324 0.241758 0.289855 0.291629 002200
26 1 0.000115 0.000162 0.364035 0.280702 0.155702 0.111842 0.048246 0.019737 0.019737 ... 0.806061 0.006061 0.636364 0.006061 0.363636 0.166667 0.437037 0.243421 0.213473 002300
27 1 0.000269 0.000489 0.452883 0.322082 0.106892 0.066104 0.026723 0.019691 0.005626 ... 0.219662 0.004608 0.897081 0.006144 0.102919 0.645161 0.082581 0.666667 0.657427 002500
28 2 0.000224 0.000446 0.396104 0.305195 0.121212 0.088745 0.047619 0.034632 0.006494 ... 0.102302 0.000000 0.969309 0.010230 0.030691 0.787724 0.104651 0.798701 0.817739 002500
29 1 0.000340 0.000745 0.360767 0.249154 0.154453 0.118377 0.075536 0.029312 0.012401 ... 0.094458 0.002519 0.940806 0.011335 0.059194 0.838791 0.106747 0.844419 0.850184 002600
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
667 1 0.000184 0.000258 0.721340 0.181658 0.070547 0.015873 0.007055 0.001764 0.001764 ... 0.908915 0.001938 0.848837 0.000000 0.151163 0.067829 0.012195 0.135802 0.158612 026500
668 2 0.000059 0.000071 0.688229 0.265111 0.038176 0.006363 0.002121 0.000000 0.000000 ... 0.194993 0.003953 0.974967 0.005270 0.025033 0.718050 0.157283 0.752916 0.746302 026500
669 1 0.000028 0.000041 0.384892 0.471223 0.089928 0.032374 0.007194 0.007194 0.007194 ... 0.238095 0.000000 0.634921 0.000000 0.365079 0.698413 0.198847 0.874101 0.833333 026600
670 2 0.000063 0.000115 0.570588 0.208824 0.094118 0.058824 0.044118 0.011765 0.011765 ... 0.812057 0.007092 0.801418 0.007092 0.198582 0.106383 0.207459 0.091176 0.089514 027000
671 2 0.000022 0.000040 0.342541 0.419890 0.099448 0.060773 0.038674 0.016575 0.022099 ... 0.325301 0.000000 0.734940 0.000000 0.265060 0.662651 0.173516 0.751381 0.777778 026600
672 3 0.000026 0.000053 0.391791 0.309701 0.138060 0.070896 0.033582 0.029851 0.026119 ... 0.078947 0.000000 0.796053 0.000000 0.203947 0.914474 0.106667 0.936567 0.914191 026600
673 1 0.000052 0.000090 0.309278 0.278351 0.171821 0.123711 0.072165 0.030928 0.013746 ... 0.894444 0.000000 0.566667 0.000000 0.433333 0.088889 0.315294 0.075601 0.061911 026700
674 2 0.000152 0.000202 0.401985 0.287841 0.138958 0.079404 0.052109 0.017370 0.022333 ... 0.919505 0.003096 0.625387 0.003096 0.374613 0.068111 0.423462 0.081886 0.065380 026700
675 1 0.000175 0.000223 0.613892 0.295199 0.063330 0.022472 0.004086 0.001021 0.000000 ... 0.037398 0.000000 0.977236 0.008130 0.022764 0.904065 0.156761 0.928498 0.928427 026800
676 1 0.000119 0.000167 0.375000 0.348214 0.111607 0.071429 0.040179 0.017857 0.035714 ... 0.784431 0.005988 0.670659 0.017964 0.329341 0.191617 0.421189 0.218750 0.231193 026900
677 2 0.000489 0.000686 0.740426 0.097872 0.070213 0.061702 0.023404 0.002128 0.004255 ... 0.964365 0.006682 0.841871 0.000000 0.158129 0.020045 0.096154 0.021277 0.020548 026900
678 3 0.000099 0.000108 0.488000 0.248000 0.136000 0.060000 0.056000 0.008000 0.004000 ... 0.877551 0.000000 0.693878 0.005102 0.306122 0.102041 0.465812 0.108000 0.105469 026900
679 4 0.000268 0.000382 0.434286 0.177143 0.097143 0.125714 0.114286 0.034286 0.017143 ... 0.932331 0.015038 0.578947 0.000000 0.421053 0.030075 0.440895 0.062857 0.065022 026900
680 1 0.000209 0.000343 0.556172 0.162275 0.117892 0.080444 0.038835 0.029126 0.015257 ... 0.972927 0.003384 0.714044 0.003384 0.285956 0.016920 0.207692 0.019417 0.016064 027000
681 3 0.000090 0.000130 0.484932 0.205479 0.158904 0.076712 0.038356 0.016438 0.019178 ... 0.935849 0.000000 0.716981 0.000000 0.283019 0.045283 0.344704 0.038356 0.032338 027000
682 4 0.000110 0.000249 0.261146 0.261146 0.203822 0.121019 0.095541 0.031847 0.025478 ... 0.947826 0.000000 0.434783 0.000000 0.565217 0.026087 0.190722 0.019108 0.018265 027000
683 1 0.000137 0.000276 0.407713 0.312672 0.146006 0.078512 0.035813 0.013774 0.005510 ... 0.852713 0.003876 0.810078 0.000000 0.189922 0.135659 0.047244 0.152893 0.120678 027100
684 2 0.000118 0.000172 0.711009 0.157492 0.059633 0.045872 0.013761 0.006116 0.006116 ... 0.857364 0.004651 0.849612 0.004651 0.150388 0.116279 0.118598 0.116208 0.088889 027100
685 3 0.000017 0.000050 0.444898 0.269388 0.151020 0.077551 0.040816 0.016327 0.000000 ... 0.961207 0.004310 0.607759 0.008621 0.392241 0.017241 0.152249 0.016327 0.178114 027100
686 4 0.000181 0.000283 0.526316 0.277512 0.126794 0.045455 0.011962 0.009569 0.002392 ... 0.825397 0.003968 0.873016 0.000000 0.126984 0.158730 0.123690 0.177033 0.158602 027100
687 1 0.000083 0.000166 0.369748 0.294118 0.131653 0.084034 0.075630 0.030812 0.014006 ... 0.624390 0.000000 0.668293 0.029268 0.331707 0.317073 0.227273 0.406162 0.346609 027200
688 2 0.000076 0.000136 0.299180 0.258197 0.135246 0.122951 0.077869 0.049180 0.057377 ... 0.668571 0.000000 0.508571 0.034286 0.491429 0.291429 0.420428 0.401639 0.351926 027200
689 3 0.000119 0.000177 0.585075 0.188060 0.062687 0.053731 0.047761 0.023881 0.038806 ... 0.425926 0.000000 0.811111 0.000000 0.188889 0.544444 0.273319 0.555224 0.434402 027200
690 1 0.000004 0.000008 0.329730 0.324324 0.167568 0.097297 0.059459 0.021622 0.000000 ... 0.250000 0.000000 0.659091 0.000000 0.340909 0.704545 0.093137 0.789189 0.778824 027300
691 2 0.000021 0.000056 0.173797 0.264706 0.187166 0.203209 0.120321 0.048128 0.002674 ... 0.000000 0.000000 0.388889 0.000000 0.611111 1.000000 0.085575 0.989305 0.983900 027300
692 3 0.000026 0.000074 0.161458 0.307292 0.177083 0.192708 0.122396 0.031250 0.007812 ... 0.000000 0.000000 0.370370 0.037037 0.629630 0.962963 0.032746 0.992188 0.988526 027300
693 1 0.000042 0.000093 0.285372 0.333333 0.201439 0.081535 0.052758 0.028777 0.016787 ... 0.100000 0.010000 0.590000 0.000000 0.410000 0.860000 0.081498 0.940048 0.915521 027400
694 2 0.000052 0.000099 0.365025 0.300509 0.157895 0.089983 0.054329 0.015280 0.016978 ... 0.379404 0.013550 0.653117 0.005420 0.346883 0.563686 0.176224 0.694397 0.701767 027400
695 3 0.000050 0.000098 0.349544 0.361702 0.088146 0.121581 0.051672 0.012158 0.015198 ... 0.194118 0.005882 0.682353 0.017647 0.317647 0.735294 0.138743 0.832827 0.806711 027400
696 4 0.000062 0.000116 0.370614 0.285088 0.153509 0.105263 0.039474 0.021930 0.024123 ... 0.717514 0.000000 0.703390 0.033898 0.296610 0.234463 0.198594 0.331140 0.315005 027400

697 rows × 54 columns


In [10]:
features.to_sql('census_features', engine, schema='shape_files', if_exists='replace', index=False)