Data to compile

geographies:

  • states
  • counties
  • msa
  • csa

  • populations

  • populations with P005*
  • populations and 5 categories
  • populations with entropy calcs

under census_2010_sf1/

  • states_population.csv
  • states_P005.csv
  • states_five_categories.csv
  • states_diversity_measures.csv
  • populations ['NAME', 'P0050001', 'FIPS']
  • populations with P005* default-labels
  • populations and 5 categories
  • populations with entropy calcs

In [ ]:
import census_api_utils as utils
import csv
import os
import codecs

import pandas as pd
from pandas import DataFrame, Series

In [ ]:
from census_api_utils import (states, counties, msas, csas, 
                              districts, zip_code_tabulation_areas, places,
                              tracts, census_labels, rdot_labels, diversity, FINAL_LABELS)

CORE_LABELS = set(rdot_labels()) | set(census_labels()) | set(FINAL_LABELS)

CSVS_TO_WRITE = [('population',['NAME', 'P0050001', 'FIPS'] ),
                 ('P005', census_labels() + ['FIPS']),
                 ('five_categories', ['NAME', 'Total'] + rdot_labels() + ['FIPS']),
                 ('diversity_measures', FINAL_LABELS + ['FIPS'] )
                ]

US_POPULATION_2010 = 308745538

def rename_column(df, rename_dict):
    cols2 = []
    for col in df.columns:
        cols2.append(rename_dict.get(col, col))
    df.columns = cols2
    return df

def calc_diversity(geog):
    r = list(geog(census_labels()))
    df = DataFrame(r)
    df = diversity(df)

    return df


def write_csvs(df, geog_name, dirpath="census_2010_sf1"):
    for (csv_label, df_labels) in CSVS_TO_WRITE:
        fname = "{}_{}.csv".format(geog_name, csv_label)
        df[df_labels].to_csv(path_or_buf=os.path.join(dirpath,fname), 
                 index=None,
                 quoting=csv.QUOTE_NONNUMERIC)

In [ ]:
df= calc_diversity(states)

#state
df['FIPS'] = df.apply(lambda r: r["state"], axis=1)

write_csvs(df, 'state')

In [ ]:
# counties
df= calc_diversity(counties)
df["FIPS"] = df.apply(lambda r: r["state"] + r["county"], axis=1)
write_csvs(df, 'county')

In [ ]:
# msas
# special case

r = list(msas(census_labels()))
df=DataFrame(r)
df[census_labels(include_name=False)] = df[census_labels(include_name=False)].astype('int')

msas_grouped = df.groupby('metropolitan statistical area/micropolitan statistical area')

# deal with names in df1 and populations in df2

df1 = msas_grouped.apply(lambda x:Series((list(x['NAME']), ), 
                                         index=['msas']))


df2 = msas_grouped.sum()
df3 = pd.concat((df1,df2), axis=1)
df3['NAME'] = df3.apply(lambda x: "; ".join(x['msas']), axis=1)

df3['FIPS'] = df3.index
msas_df = diversity(df3)
write_csvs(msas_df,'msa')

In [ ]:
#csas

r = list(csas(census_labels()))
df=DataFrame(r)
df[census_labels(include_name=False)] = df[census_labels(include_name=False)].astype('int')

csas_grouped = df.groupby('combined statistical area')

# deal with names in df1 and populations in df2

df1 = csas_grouped.apply(lambda x:Series((list(x['NAME']), ), 
                                         index=['csas']))


df2 = csas_grouped.sum()
df3 = pd.concat((df1,df2), axis=1)
df3['NAME'] = df3.apply(lambda x: "; ".join(x['csas']), axis=1)

df3['FIPS'] = df3.index
csas_df = diversity(df3)
write_csvs(csas_df,'csa')

In [ ]:
# districts

df= calc_diversity(districts)
# not right to call it FIPS but I will
df["FIPS"] = df.apply(lambda r: r["state"] +  r["congressional district"], axis=1)
write_csvs(df, 'district')

In [ ]:
# tract

df = calc_diversity(tracts)
df["FIPS"] = df.apply(lambda r: r["state"] +  r["county"] + r["tract"], axis=1)
write_csvs(df, 'tract')
df.sort('entropy5', ascending=False)[['entropy5', 'FIPS', 'NAME', 'Total']]

In [ ]:
# place

df = calc_diversity(places)
df["FIPS"] = df.apply(lambda r: r["state"] +  r["place"] , axis=1)
write_csvs(df, 'place')