Let's work with the collection of state CSVs
In [24]:
import csv
import numpy as np
import pandas as pd
from pandas import (DataFrame, Series)
In [7]:
import glob
glob.glob("census_2010_sf1/state*")
Out[7]:
In [14]:
# use http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html instead
# of DataFrame.from_csv to use dtype
df = pd.read_csv("census_2010_sf1/state_population.csv", dtype={'FIPS': str})
df.head()
Out[14]:
In [33]:
# use some of the pre-written code
from census_api_utils import entropy
In [19]:
df = pd.read_csv("census_2010_sf1/state_diversity_measures.csv", dtype={'FIPS': str})
df.head()
Out[19]:
In [26]:
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_array_almost_equal.html
np.testing.assert_array_almost_equal(
df[["White", "Black", "Asian", "Hispanic", "Other"]].apply(entropy, axis=1),
df.entropy5)
In [34]:
df.sort('entropy5', ascending=False).head()
Out[34]:
In [ ]: