Data to compile
geographies:
csa
populations
under census_2010_sf1/
In [ ]:
import census_api_utils as utils
import csv
import os
import codecs
import pandas as pd
from pandas import DataFrame, Series
In [ ]:
from census_api_utils import (states, counties, msas, csas,
districts, zip_code_tabulation_areas, places,
tracts, census_labels, rdot_labels, diversity, FINAL_LABELS)
CORE_LABELS = set(rdot_labels()) | set(census_labels()) | set(FINAL_LABELS)
CSVS_TO_WRITE = [('population',['NAME', 'P0050001', 'FIPS'] ),
('P005', census_labels() + ['FIPS']),
('five_categories', ['NAME', 'Total'] + rdot_labels() + ['FIPS']),
('diversity_measures', FINAL_LABELS + ['FIPS'] )
]
US_POPULATION_2010 = 308745538
def rename_column(df, rename_dict):
cols2 = []
for col in df.columns:
cols2.append(rename_dict.get(col, col))
df.columns = cols2
return df
def calc_diversity(geog):
r = list(geog(census_labels()))
df = DataFrame(r)
df = diversity(df)
return df
def write_csvs(df, geog_name, dirpath="census_2010_sf1"):
for (csv_label, df_labels) in CSVS_TO_WRITE:
fname = "{}_{}.csv".format(geog_name, csv_label)
df[df_labels].to_csv(path_or_buf=os.path.join(dirpath,fname),
index=None,
quoting=csv.QUOTE_NONNUMERIC)
In [ ]:
df= calc_diversity(states)
#state
df['FIPS'] = df.apply(lambda r: r["state"], axis=1)
write_csvs(df, 'state')
In [ ]:
# counties
df= calc_diversity(counties)
df["FIPS"] = df.apply(lambda r: r["state"] + r["county"], axis=1)
write_csvs(df, 'county')
In [ ]:
# msas
# special case
r = list(msas(census_labels()))
df=DataFrame(r)
df[census_labels(include_name=False)] = df[census_labels(include_name=False)].astype('int')
msas_grouped = df.groupby('metropolitan statistical area/micropolitan statistical area')
# deal with names in df1 and populations in df2
df1 = msas_grouped.apply(lambda x:Series((list(x['NAME']), ),
index=['msas']))
df2 = msas_grouped.sum()
df3 = pd.concat((df1,df2), axis=1)
df3['NAME'] = df3.apply(lambda x: "; ".join(x['msas']), axis=1)
df3['FIPS'] = df3.index
msas_df = diversity(df3)
write_csvs(msas_df,'msa')
In [ ]:
#csas
r = list(csas(census_labels()))
df=DataFrame(r)
df[census_labels(include_name=False)] = df[census_labels(include_name=False)].astype('int')
csas_grouped = df.groupby('combined statistical area')
# deal with names in df1 and populations in df2
df1 = csas_grouped.apply(lambda x:Series((list(x['NAME']), ),
index=['csas']))
df2 = csas_grouped.sum()
df3 = pd.concat((df1,df2), axis=1)
df3['NAME'] = df3.apply(lambda x: "; ".join(x['csas']), axis=1)
df3['FIPS'] = df3.index
csas_df = diversity(df3)
write_csvs(csas_df,'csa')
In [ ]:
# districts
df= calc_diversity(districts)
# not right to call it FIPS but I will
df["FIPS"] = df.apply(lambda r: r["state"] + r["congressional district"], axis=1)
write_csvs(df, 'district')
In [ ]:
# tract
df = calc_diversity(tracts)
df["FIPS"] = df.apply(lambda r: r["state"] + r["county"] + r["tract"], axis=1)
write_csvs(df, 'tract')
df.sort('entropy5', ascending=False)[['entropy5', 'FIPS', 'NAME', 'Total']]
In [ ]:
# place
df = calc_diversity(places)
df["FIPS"] = df.apply(lambda r: r["state"] + r["place"] , axis=1)
write_csvs(df, 'place')