Goals

Learn about how to use the Census variables around Hispanic origin to calculate quantities around diversity (remembering the Racial Dot Map as our framing example)



In [1]:

    
%pylab --no-import-all inline









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
import numpy as np
import matplotlib.pyplot as plt
from pandas import DataFrame, Series, Index
import pandas as pd

from itertools import islice



In [3]:

    
import census
import us

import settings

The census documentation has example URLs but needs your API key to work. In this notebook, we'll use the IPython notebook HTML display mechanism to help out.



In [4]:

    
c = census.Census(key=settings.CENSUS_KEY)



In [5]:

    
# generators for the various census geographic entities of interest

def states(variables='NAME'):
    geo={'for':'state:*'}
    states_fips = set([state.fips for state in us.states.STATES])
    # need to filter out non-states
    for r in c.sf1.get(variables, geo=geo):
        if r['state'] in states_fips:
            yield r
            
def counties(variables='NAME'):
    """ask for all the states in one call"""
    
    # tabulate a set of fips codes for the states
    states_fips = set([s.fips for s in us.states.STATES])
    
    geo={'for':'county:*',
             'in':'state:*'}    
    for county in c.sf1.get(variables, geo=geo):
        # eliminate counties whose states aren't in a state or DC
        if county['state'] in states_fips:
            yield county
        

def counties2(variables='NAME'):
    """generator for all counties"""
    
    # since we can get all the counties in one call, 
    # this function is for demonstrating the use of walking through 
    # the states to get at the counties

    for state in us.states.STATES:
        geo={'for':'county:*',
             'in':'state:{fips}'.format(fips=state.fips)}
        for county in c.sf1.get(variables, geo=geo):
            yield county

            
def tracts(variables='NAME'):
    for state in us.states.STATES:
        
        # handy to print out state to monitor progress
        # print state.fips, state
        counties_in_state={'for':'county:*',
             'in':'state:{fips}'.format(fips=state.fips)}
        
        for county in c.sf1.get('NAME', geo=counties_in_state):
            
            # print county['state'], county['NAME']
            tracts_in_county = {'for':'tract:*',
              'in': 'state:{s_fips} county:{c_fips}'.format(s_fips=state.fips, 
                                                            c_fips=county['county'])}
            
            for tract in c.sf1.get(variables,geo=tracts_in_county):
                yield tract



In [6]:

    
def block_groups(variables='NAME'):
    # http://api.census.gov/data/2010/sf1?get=P0010001&for=block+group:*&in=state:02+county:170
    # let's use the county generator
    for county in counties(variables):
        geo = {'for':'block group:*',
               'in':'state:{state} county:{county}'.format(state=county['state'],
                                                county=county['county'])
               }
        for block_group in c.sf1.get(variables, geo):
            yield block_group
    
    
def blocks(variables='NAME'):
    # http://api.census.gov/data/2010/sf1?get=P0010001&for=block:*&in=state:02+county:290+tract:00100
    
    # make use of the tract generator
    for tract in tracts(variables):
        geo={'for':'block:*',
             'in':'state:{state} county:{county} tract:{tract}'.format(state=tract['state'],
                                                                       county=tract['county'],
                                                                       tract=tract['tract'])
             }
        for block in c.sf1.get(variables, geo):
            yield block



In [7]:

    
# msa, csas, districts, zip_codes

def msas(variables="NAME"):
    
     for state in us.STATES:
        geo = {'for':'metropolitan statistical area/micropolitan statistical area:*', 
               'in':'state:{state_fips}'.format(state_fips=state.fips)
               }
    
        for msa in c.sf1.get(variables, geo=geo):
            yield msa

def csas(variables="NAME"):
    # http://api.census.gov/data/2010/sf1?get=P0010001&for=combined+statistical+area:*&in=state:24
    for state in us.STATES:
        geo = {'for':'combined statistical area:*', 
               'in':'state:{state_fips}'.format(state_fips=state.fips)
               }
    
        for csa in c.sf1.get(variables, geo=geo):
            yield csa

def districts(variables="NAME"):
    # http://api.census.gov/data/2010/sf1?get=P0010001&for=congressional+district:*&in=state:24
    for state in us.STATES:
        geo = {'for':'congressional district:*', 
               'in':'state:{state_fips}'.format(state_fips=state.fips)
               }
    
        for district in c.sf1.get(variables, geo=geo):
            yield district    
            
def zip_code_tabulation_areas(variables="NAME"):
    # http://api.census.gov/data/2010/sf1?get=P0010001&for=zip+code+tabulation+area:*&in=state:02
    for state in us.STATES:
        geo = {'for':'zip code tabulation area:*', 
               'in':'state:{state_fips}'.format(state_fips=state.fips)
               }
    
        for zip_code_tabulation_area in c.sf1.get(variables, geo=geo):
            yield zip_code_tabulation_area



In [8]:

    
list(islice(msas(), 1))









    Out[8]:





[{u'NAME': u'Albertville, AL Micro Area',
  u'metropolitan statistical area/micropolitan statistical area': u'10700',
  u'state': u'01'}]



In [9]:

    
list(islice(csas(), 1))









    Out[9]:





[{u'NAME': u'Atlanta-Sandy Springs-Gainesville, GA-AL CSA (part)',
  u'combined statistical area': u'122',
  u'state': u'01'}]



In [10]:

    
districts_list = list(islice(districts(), 1))
districts_list









    Out[10]:





[{u'NAME': u'Congressional District 1',
  u'congressional district': u'01',
  u'state': u'01'}]



In [11]:

    
list(islice(zip_code_tabulation_areas(), 1))









    Out[11]:





[{u'NAME': u'ZCTA5 30165 (part)',
  u'state': u'01',
  u'zip code tabulation area': u'30165'}]

Note: There are definitely improvements to be made in these generators. One of the most important would be to limit the generators to specific geographies -- typically, we don't want to have all the blocks in the country but the ones in a specific area. A good exercise to rewrite our generators to allow for limited geography.

We can compare the total number of tracts we calculate to:

https://www.census.gov/geo/maps-data/data/tallies/tractblock.html

and

https://www.census.gov/geo/maps-data/data/docs/geo_tallies/Tract_Block2010.txt

Hispanic or Latino Origin and Racial Subcategories

http://www.census.gov/developers/data/sf1.xml

compare to http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf

I think the P0050001 might be the key category

P0010001 = P0050001
P0050001 = P0050002 + P0050010

P0050002 Not Hispanic or Latino (total) =

P0050003 Not Hispanic White only
P0050004 Not Hispanic Black only
P0050006 Not Hispanic Asian only
Not Hispanic Other (should also be P0050002 - (P0050003 + P0050004 + P0050006)
- P0050005 Not Hispanic: American Indian/ American Indian and Alaska Native alone
- P0050007 Not Hispanic: Native Hawaiian and Other Pacific Islander alone
- P0050008 Not Hispanic: Some Other Race alone
- P0050009 Not Hispanic: Two or More Races
P0050010 Hispanic or Latino

P0050010 = P0050011...P0050017

From Hispanic and Latino Americans (Wikipedia):

While the two terms are sometimes used interchangeably, Hispanic is a narrower term which mostly refers to persons of Spanish speaking origin or ancestry, while Latino is more frequently used to refer more generally to anyone of Latin American origin or ancestry, including Brazilians.

and

The Census Bureau's 2010 census does provide a definition of the terms Latino or Hispanic and is as follows: “Hispanic or Latino” refers to a person of Cuban, Mexican, Puerto Rican, South or Central American, or other Spanish culture or origin regardless of race. It allows respondents to self-define whether they were Latino or Hispanic and then identify their specific country or place of origin.[52] On its website, the Census Bureau defines "Hispanic" or "Latino" persons as being "persons who trace their origin [to]... Spanish speaking Central and South America countries, and other Spanish cultures".

In the Racial Dot Map: "Whites are coded as blue; African-Americans, green; Asians, red; Hispanics, orange; and all other racial categories are coded as brown."

In this notebook, we will relate the Racial Dot Map 5-category scheme to the P005* variables.



In [12]:

    
# let's get the total population -- tabulated in two variables: P0010001, P0050001
# P0050002 Not Hispanic or Latino (total) 
# P0050010 Hispanic or Latino

r = list(states(('NAME','P0010001','P0050001','P0050002','P0050010')))
r[:5]









    Out[12]:





[{u'NAME': u'Alabama',
  u'P0010001': u'4779736',
  u'P0050001': u'4779736',
  u'P0050002': u'4594134',
  u'P0050010': u'185602',
  u'state': u'01'},
 {u'NAME': u'Alaska',
  u'P0010001': u'710231',
  u'P0050001': u'710231',
  u'P0050002': u'670982',
  u'P0050010': u'39249',
  u'state': u'02'},
 {u'NAME': u'Arizona',
  u'P0010001': u'6392017',
  u'P0050001': u'6392017',
  u'P0050002': u'4496868',
  u'P0050010': u'1895149',
  u'state': u'04'},
 {u'NAME': u'Arkansas',
  u'P0010001': u'2915918',
  u'P0050001': u'2915918',
  u'P0050002': u'2729868',
  u'P0050010': u'186050',
  u'state': u'05'},
 {u'NAME': u'California',
  u'P0010001': u'37253956',
  u'P0050001': u'37253956',
  u'P0050002': u'23240237',
  u'P0050010': u'14013719',
  u'state': u'06'}]



In [13]:

    
# Hispanic/Latino origin vs not-Hispanic/Latino
# Compare with http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf Table 1
# Hispanic/Latino: 50477594
# non-Hispanic/Latino: 258267944

df=DataFrame(r)
df[['P0010001', 'P0050001','P0050002','P0050010']] = \
    df[['P0010001', 'P0050001','P0050002','P0050010']].astype('int')
df[['P0010001', 'P0050001', 'P0050002', 'P0050010']].sum()









    Out[13]:





P0010001    308745538
P0050001    308745538
P0050002    258267944
P0050010     50477594
dtype: int64



In [14]:

    
# is the total Hispanic/Latino population and non-Hispanic populations the same as reported in 
# http://www.census.gov/prod/cen2010/briefs/c2010br-02.pdf Table 1
(df['P0050010'].sum() == 50477594,
 df['P0050002'].sum() == 258267944)









    Out[14]:





(True, True)



In [15]:

    
# How about the non-Hispanic/Latino White only category?
# P0050003
# total should be 196817552

df = DataFrame(list(states('NAME,P0050003')))
df['P0050003'] = df['P0050003'].astype('int')
df.P0050003.sum()









    Out[15]:





196817552

Converting to Racial Dot Map Categories

SUGGESTED EXERCISE: write a function convert_to_rdotmap(row) tha takes an input Python dict that has the keys:

* NAME
* P005001, P005002...,P0050016, P0050017

and that returns a Pandas Series with the following columns:

* Total
* White
* Black
* Asian
* Hispanic
* Other
* Name  (note lowercase)

that correspond to those used in the Racial Dot Map.

Also write a function def convert_P005_to_int(df) that converts all the P005* columns to int



In [16]:

    
# USE a little convience function to calculate the variable names to be used

def P005_range(n0,n1): 
    return tuple(('P005'+ "{i:04d}".format(i=i) for i in xrange(n0,n1)))

P005_vars = P005_range(1,18)
P005_vars_str = ",".join(P005_vars)
P005_vars_with_name = ['NAME'] + list(P005_vars)

P005_vars_with_name









    Out[16]:





['NAME',
 'P0050001',
 'P0050002',
 'P0050003',
 'P0050004',
 'P0050005',
 'P0050006',
 'P0050007',
 'P0050008',
 'P0050009',
 'P0050010',
 'P0050011',
 'P0050012',
 'P0050013',
 'P0050014',
 'P0050015',
 'P0050016',
 'P0050017']



In [17]:

    
# HAVE YOU TRIED THE EXERCISE....IF NOT....TRY IT....HERE'S ONE POSSIBLE ANSWER# 

# http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/#create

def convert_P005_to_int(df):
    # do conversion in place
    df[list(P005_vars)] = df[list(P005_vars)].astype('int')
    return df

def convert_to_rdotmap(row):
    """takes the P005 variables and maps to a series with White, Black, Asian, Hispanic, Other
    Total and Name"""
    return pd.Series({'Total':row['P0050001'],
                      'White':row['P0050003'],
                      'Black':row['P0050004'],
                      'Asian':row['P0050006'],
                      'Hispanic':row['P0050010'],
                      'Other': row['P0050005'] + row['P0050007'] + row['P0050008'] + row['P0050009'],
                      'Name': row['NAME']
                      }, index=['Name', 'Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other'])



In [18]:

    
from census import Census

import settings
from settings import CENSUS_KEY

import time
from itertools import islice

def P005_range(n0,n1): 
    return tuple(('P005'+ "{i:04d}".format(i=i) for i in xrange(n0,n1)))

P005_vars = P005_range(1,18)
P005_vars_str = ",".join(P005_vars)


# http://manishamde.github.io/blog/2013/03/07/pandas-and-python-top-10/#create
def convert_to_rdotmap(row):
    """takes the P005 variables and maps to a series with White, Black, Asian, Hispanic, Other
    Total and Name"""
    return pd.Series({'Total':row['P0050001'],
                      'White':row['P0050003'],
                      'Black':row['P0050004'],
                      'Asian':row['P0050006'],
                      'Hispanic':row['P0050010'],
                      'Other': row['P0050005'] + row['P0050007'] + row['P0050008'] + row['P0050009'],
                      'Name': row['NAME']
                      }, index=['Name', 'Total', 'White', 'Black', 'Hispanic', 'Asian', 'Other'])


def normalize(s):
    """take a Series and divide each item by the sum so that the new series adds up to 1.0"""
    total = np.sum(s)
    return s.astype('float') / total


def entropy(series):
    """Normalized Shannon Index"""
    # a series in which all the entries are equal should result in normalized entropy of 1.0
    
    # eliminate 0s
    series1 = series[series!=0]

    # if len(series) < 2 (i.e., 0 or 1) then return 0
    
    if len(series) > 1:
        # calculate the maximum possible entropy for given length of input series
        max_s = -np.log(1.0/len(series))
    
        total = float(sum(series1))
        p = series1.astype('float')/float(total)
        return sum(-p*np.log(p))/max_s
    else:
        return 0.0

    
def convert_P005_to_int(df):
    # do conversion in place
    df[list(P005_vars)] = df[list(P005_vars)].astype('int')
    return df
    

def diversity(r):

    """Returns a DataFrame with the following columns
    """
    df = DataFrame(r)
    df = convert_P005_to_int(df)
    # df[list(P005_vars)] = df[list(P005_vars)].astype('int')
    df1 = df.apply(convert_to_rdotmap, axis=1)
    
    df1['entropy5'] = df1[['Asian','Black','Hispanic','White','Other']].apply(entropy,axis=1)
    df1['entropy4'] = df1[['Asian','Black','Hispanic','White']].apply(entropy,axis=1)
    return df1



In [19]:

    
# states

r=list(states(P005_vars_with_name))
diversity(r)









    Out[19]:






  
    
      
      Name
      Total
      White
      Black
      Hispanic
      Asian
      Other
      entropy5
      entropy4
    
  
  
    
      0 
                    Alabama
        4779736
        3204402
       1244437
         185602
         52937
         92358
       0.541001
       0.570292
    
    
      1 
                     Alaska
         710231
         455320
         21949
          39249
         37459
        156254
       0.646677
       0.475235
    
    
      2 
                    Arizona
        6392017
        3695647
        239101
        1895149
        170509
        391611
       0.663524
       0.643529
    
    
      3 
                   Arkansas
        2915918
        2173469
        447102
         186050
         35647
         73650
       0.515025
       0.526205
    
    
      4 
                 California
       37253956
       14956253
       2163804
       14013719
       4775070
       1345110
       0.796994
       0.843670
    
    
      5 
                   Colorado
        5029196
        3520793
        188778
        1038687
        135564
        145374
       0.558232
       0.570130
    
    
      6 
                Connecticut
        3574097
        2546262
        335119
         479087
        134091
         79538
       0.584509
       0.615330
    
    
      7 
                   Delaware
         897934
         586752
        186782
          73221
         28308
         22871
       0.628490
       0.660917
    
    
      8 
       District of Columbia
         601723
         209464
        301053
          54749
         20818
         15639
       0.710288
       0.757369
    
    
      9 
                    Florida
       18801310
       10884722
       2851100
        4223806
        445216
        396466
       0.688393
       0.741076
    
    
      10
                    Georgia
        9687653
        5413920
       2910800
         853689
        311692
        197552
       0.677545
       0.729666
    
    
      11
                     Hawaii
        1360301
         309343
         19904
         120842
        513294
        396918
       0.833108
       0.750762
    
    
      12
                      Idaho
        1567582
        1316243
          8875
         175901
         18529
         48034
       0.360829
       0.330227
    
    
      13
                   Illinois
       12830632
        8167753
       1832924
        2027578
        580586
        221791
       0.663131
       0.719347
    
    
      14
                    Indiana
        6483802
        5286453
        582140
         389707
        101444
        124058
       0.430342
       0.439752
    
    
      15
                       Iowa
        3046355
        2701123
         86906
         151544
         52597
         54185
       0.310137
       0.300998
    
    
      16
                     Kansas
        2853118
        2230539
        162700
         300042
         66967
         92870
       0.492215
       0.483675
    
    
      17
                   Kentucky
        4339367
        3745655
        333075
         132836
         48338
         79463
       0.344293
       0.340010
    
    
      18
                  Louisiana
        4533372
        2734884
       1442420
         192560
         69327
         94181
       0.588919
       0.623788
    
    
      19
                      Maine
        1328361
        1254297
         15154
          16935
         13442
         28533
       0.180061
       0.137155
    
    
      20
                   Maryland
        5773552
        3157958
       1674229
         470632
        316694
        154039
       0.714090
       0.760596
    
    
      21
              Massachusetts
        6547629
        4984800
        391693
         627654
        347495
        195987
       0.535423
       0.540767
    
    
      22
                   Michigan
        9883640
        7569939
       1383756
         436358
        236490
        257097
       0.498010
       0.504299
    
    
      23
                  Minnesota
        5303925
        4405142
        269141
         250258
        212996
        166388
       0.427024
       0.407947
    
    
      24
                Mississippi
        2967297
        1722287
       1093512
          81481
         25477
         44540
       0.550642
       0.591949
    
    
      25
                   Missouri
        5988927
        4850748
        687149
         212470
         97221
        141339
       0.430525
       0.429356
    
    
      26
                    Montana
         989415
         868628
          3743
          28565
          6138
         82341
       0.295872
       0.149198
    
    
      27
                   Nebraska
        1826341
        1499753
         80959
         167405
         31919
         46305
       0.424281
       0.417907
    
    
      28
                     Nevada
        2700551
        1462081
        208058
         716501
        191047
        122864
       0.751622
       0.774363
    
    
      29
              New Hampshire
        1316470
        1215050
         13625
          36704
         28241
         22850
       0.232308
       0.210183
    
    
      30
                 New Jersey
        8791894
        5214878
       1125401
        1555144
        719827
        176644
       0.722462
       0.783517
    
    
      31
                 New Mexico
        2059179
         833810
         35462
         953403
         26305
        210199
       0.671781
       0.603770
    
    
      32
                   New York
       19378102
       11304247
       2783857
        3416922
       1406194
        466882
       0.732727
       0.787727
    
    
      33
             North Carolina
        9535483
        6223995
       2019854
         800120
        206579
        284935
       0.623233
       0.645955
    
    
      34
               North Dakota
         672591
         598007
          7720
          13467
          6839
         46558
       0.289289
       0.165826
    
    
      35
                       Ohio
       11536504
        9359263
       1389115
         354674
        190765
        242687
       0.422934
       0.426370
    
    
      36
                   Oklahoma
        3751351
        2575381
        272071
         332007
         64154
        507738
       0.623426
       0.506346
    
    
      37
                     Oregon
        3831074
        3005848
         64984
         450062
        139436
        170744
       0.478609
       0.444008
    
    
      38
               Pennsylvania
       12702379
       10094652
       1327091
         719660
        346288
        214688
       0.465015
       0.486249
    
    
      39
               Rhode Island
        1052567
         803685
         51560
         130655
         29988
         36679
       0.516377
       0.508129
    
    
      40
             South Carolina
        4625364
        2962740
       1279998
         235682
         58307
         88637
       0.573768
       0.609445
    
    
      41
               South Dakota
         814180
         689502
          9959
          22119
          7553
         85047
       0.355383
       0.191061
    
    
      42
                  Tennessee
        6346105
        4800782
       1049391
         290059
         90311
        115562
       0.486619
       0.508575
    
    
      43
                      Texas
       25145561
       11397345
       2886825
        9460921
        948426
        452044
       0.727466
       0.793870
    
    
      44
                       Utah
        2763885
        2221719
         25951
         358340
         54176
        103699
       0.425283
       0.393087
    
    
      45
                    Vermont
         625741
         590223
          5943
           9208
          7875
         12492
       0.183061
       0.144800
    
    
      46
                   Virginia
        8001024
        5186450
       1523704
         631825
        436298
        222747
       0.655915
       0.688954
    
    
      47
                 Washington
        6724540
        4876804
        229603
         755790
        475634
        386709
       0.587508
       0.555274
    
    
      48
              West Virginia
        1852994
        1726256
         62122
          22268
         12285
         30063
       0.206960
       0.183409
    
    
      49
                  Wisconsin
        5686986
        4738411
        350898
         336056
        128052
        133569
       0.412929
       0.408698
    
    
      50
                    Wyoming
         563626
         483874
          4351
          50231
          4279
         20891
       0.337501
       0.288172
    
  

51 rows × 9 columns



In [20]:

    
# counties

r = list(counties(P005_vars_with_name))



In [21]:

    
df2 = diversity(r)



In [22]:

    
df2.sort_index(by='entropy5',ascending=False)









    Out[22]:






  
    
      
      Name
      Total
      White
      Black
      Hispanic
      Asian
      Other
      entropy5
      entropy4
    
  
  
    
      1868
                    Queens County
       2230722
        616727
        395881
        613750
        508334
        96030
       0.925644
       0.989171
    
    
      68  
       Aleutians West Census Area
          5561
          1745
           318
           726
          1575
         1197
       0.920216
       0.882623
    
    
      186 
                   Alameda County
       1510271
        514559
        184126
        339889
        390524
        81173
       0.910834
       0.957875
    
    
      233 
                    Solano County
        413344
        168628
         58743
         99356
         59027
        27590
       0.897416
       0.926901
    
    
      67  
           Aleutians East Borough
          3141
           425
           212
           385
          1113
         1006
       0.896064
       0.864996
    
    
      2601
                 Fort Bend County
        585375
        211680
        123267
        138967
         98762
        12699
       0.882673
       0.970379
    
    
      1851
                     Kings County
       2504700
        893306
        799066
        496285
        260129
        55914
       0.853105
       0.934130
    
    
      219 
                Sacramento County
       1418788
        687166
        139949
        306196
        198944
        86533
       0.842896
       0.865689
    
    
      453 
                  Gwinnett County
        805321
        354316
        184122
        162035
         84763
        20085
       0.838965
       0.912596
    
    
      550 
                      Maui County
        154834
         49193
           818
         15710
         43384
        45729
       0.833108
       0.751354
    
    
      546 
                    Hawaii County
        185079
         57831
           899
         21383
         39588
        65378
       0.830209
       0.765941
    
    
      192 
              Contra Costa County
       1049025
        500923
         93604
        255560
        148881
        50057
       0.829415
       0.865931
    
    
      1207
                Montgomery County
        971777
        478765
        161689
        165398
        134677
        31248
       0.828220
       0.887528
    
    
      224 
               San Joaquin County
        685306
        245919
         48540
        266341
         94547
        29959
       0.828052
       0.869824
    
    
      1782
                    Hudson County
        634266
        195510
         71315
        267853
         83825
        15763
       0.827493
       0.899094
    
    
      1229
                   Suffolk County
        722023
        346979
        142980
        143455
         58963
        29646
       0.826117
       0.871302
    
    
      549 
                     Kauai County
         67091
         20611
           258
          6315
         20296
        19611
       0.824890
       0.737361
    
    
      2936
               Manassas Park city
         14273
          6070
          1784
          4645
          1261
          513
       0.821891
       0.873913
    
    
      2892
            Prince William County
        402002
        195656
         78492
         81460
         29986
        16408
       0.818339
       0.862245
    
    
      547 
                  Honolulu County
        953207
        181684
         17929
         77433
        410019
       266142
       0.816249
       0.722054
    
    
      223 
             San Francisco County
        805235
        337451
         46781
        121774
        265700
        33529
       0.816230
       0.858482
    
    
      226 
                 San Mateo County
        718451
        303609
         18763
        182502
        175934
        37643
       0.811677
       0.837993
    
    
      1967
                   Robeson County
        134168
         36160
         32347
         10932
           971
        53758
       0.809458
       0.757652
    
    
      1936
                      Hoke County
         46952
         19142
         15392
          5823
           467
         6128
       0.808919
       0.758726
    
    
      1858
                  New York County
       1585873
        761493
        205340
        403577
        177624
        37839
       0.807452
       0.877051
    
    
      228 
               Santa Clara County
       1781642
        626909
         42331
        479210
        565466
        67726
       0.806568
       0.852239
    
    
      1780
                     Essex County
        783969
        260177
        308358
        159117
         35292
        21025
       0.803622
       0.867143
    
    
      1748
                     Clark County
       1951269
        935955
        194821
        568644
        165121
        86728
       0.800982
       0.835903
    
    
      1785
                 Middlesex County
        809858
        398724
         71557
        148975
        172534
        18068
       0.800874
       0.872133
    
    
      610 
                      Cook County
       5194675
       2278358
       1265778
       1244762
        318869
        86908
       0.800036
       0.882240
    
    
      2579
                    Dallas County
       2368139
        784693
        518732
        905940
        117797
        40977
       0.798837
       0.879632
    
    
      204 
               Los Angeles County
       9818605
       2728321
        815086
       4687889
       1325671
       261638
       0.796781
       0.859287
    
    
      2623
                    Harris County
       4092459
       1349646
        754258
       1671540
        249853
        67162
       0.796176
       0.878354
    
    
      367 
                    Orange County
       1145956
        526754
        223200
        308244
         55541
        32217
       0.792964
       0.852138
    
    
      325 
                   Broward County
       1748066
        760817
        449677
        438247
         55692
        43633
       0.782936
       0.845847
    
    
      992 
                 Wyandotte County
        157505
         68170
         39046
         41633
          3886
         4770
       0.781122
       0.834209
    
    
      2848
                   Fairfax County
       1081726
        590622
         96078
        168482
        188737
        37807
       0.780967
       0.826231
    
    
      1793
                     Union County
        536499
        243312
        111705
        146704
         24496
        10282
       0.780779
       0.854469
    
    
      2294
              Philadelphia County
       1526006
        562585
        644287
        187611
         95521
        36002
       0.777585
       0.842034
    
    
      2935
                    Manassas city
         37821
         17994
          4905
         11876
          1861
         1185
       0.769676
       0.818704
    
    
      2536
                      Bell County
        310235
        157289
         63380
         67010
          8350
        14206
       0.769433
       0.795595
    
    
      2915
                  Alexandria city
        139966
         74878
         29778
         22524
          8351
         4435
       0.767634
       0.815651
    
    
      1921
                    Durham County
        267587
        112697
        100260
         36077
         12180
         6373
       0.765370
       0.827083
    
    
      222 
                 San Diego County
       3095313
       1500047
        146600
        991348
        328058
       129260
       0.764654
       0.795817
    
    
      1784
                    Mercer County
        366513
        199909
         71378
         55318
         32545
         7363
       0.763097
       0.831665
    
    
      2645
                 Jefferson County
        252273
        112503
         84500
         42899
          8525
         3846
       0.749335
       0.825618
    
    
      2742
                   Tarrant County
       1809034
        937135
        262522
        482977
         83378
        43022
       0.748173
       0.806698
    
    
      215 
                    Orange County
       3010232
       1328499
         44000
       1012973
        532477
        92283
       0.747159
       0.792896
    
    
      80  
            Kodiak Island Borough
         13592
          7137
            85
           996
          2620
         2754
       0.747039
       0.631715
    
    
      242 
                      Yolo County
        200849
        100240
          4752
         60953
         25640
         9264
       0.746835
       0.767605
    
    
      221 
            San Bernardino County
       2035210
        677598
        170700
       1001145
        123978
        61789
       0.745345
       0.791244
    
    
      236 
                    Sutter County
         94737
         47782
          1713
         27251
         13442
         4549
       0.745009
       0.762589
    
    
      1949
               Mecklenburg County
        919628
        465372
        278042
        111944
         41991
        22279
       0.741724
       0.798176
    
    
      1915
                Cumberland County
        319431
        150749
        113939
         30190
          6885
        17668
       0.738064
       0.743731
    
    
      1789
                   Passaic County
        501226
        227144
         55480
        185677
         24556
         8369
       0.737074
       0.807972
    
    
      2146
                  Comanche County
        124098
         73122
         20794
         13896
          2663
        13623
       0.733880
       0.676671
    
    
      2185
                  Oklahoma County
        718633
        425791
        108954
        108543
         21151
        54194
       0.733371
       0.712225
    
    
      195 
                    Fresno County
        930450
        304522
         45005
        468070
         86856
        25997
       0.732562
       0.780302
    
    
      2542
                  Brazoria County
        313166
        166674
         36880
         86643
         17013
         5956
       0.731108
       0.795982
    
    
      1774
                  Atlantic County
        274549
        160871
         40882
         46241
         20419
         6136
       0.730078
       0.787988
    
    
      
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
  

3143 rows × 9 columns



In [ ]:

	Name	Total	White	Black	Hispanic	Asian	Other	entropy5	entropy4
0	Alabama	4779736	3204402	1244437	185602	52937	92358	0.541001	0.570292
1	Alaska	710231	455320	21949	39249	37459	156254	0.646677	0.475235
2	Arizona	6392017	3695647	239101	1895149	170509	391611	0.663524	0.643529
3	Arkansas	2915918	2173469	447102	186050	35647	73650	0.515025	0.526205
4	California	37253956	14956253	2163804	14013719	4775070	1345110	0.796994	0.843670
5	Colorado	5029196	3520793	188778	1038687	135564	145374	0.558232	0.570130
6	Connecticut	3574097	2546262	335119	479087	134091	79538	0.584509	0.615330
7	Delaware	897934	586752	186782	73221	28308	22871	0.628490	0.660917
8	District of Columbia	601723	209464	301053	54749	20818	15639	0.710288	0.757369
9	Florida	18801310	10884722	2851100	4223806	445216	396466	0.688393	0.741076
10	Georgia	9687653	5413920	2910800	853689	311692	197552	0.677545	0.729666
11	Hawaii	1360301	309343	19904	120842	513294	396918	0.833108	0.750762
12	Idaho	1567582	1316243	8875	175901	18529	48034	0.360829	0.330227
13	Illinois	12830632	8167753	1832924	2027578	580586	221791	0.663131	0.719347
14	Indiana	6483802	5286453	582140	389707	101444	124058	0.430342	0.439752
15	Iowa	3046355	2701123	86906	151544	52597	54185	0.310137	0.300998
16	Kansas	2853118	2230539	162700	300042	66967	92870	0.492215	0.483675
17	Kentucky	4339367	3745655	333075	132836	48338	79463	0.344293	0.340010
18	Louisiana	4533372	2734884	1442420	192560	69327	94181	0.588919	0.623788
19	Maine	1328361	1254297	15154	16935	13442	28533	0.180061	0.137155
20	Maryland	5773552	3157958	1674229	470632	316694	154039	0.714090	0.760596
21	Massachusetts	6547629	4984800	391693	627654	347495	195987	0.535423	0.540767
22	Michigan	9883640	7569939	1383756	436358	236490	257097	0.498010	0.504299
23	Minnesota	5303925	4405142	269141	250258	212996	166388	0.427024	0.407947
24	Mississippi	2967297	1722287	1093512	81481	25477	44540	0.550642	0.591949
25	Missouri	5988927	4850748	687149	212470	97221	141339	0.430525	0.429356
26	Montana	989415	868628	3743	28565	6138	82341	0.295872	0.149198
27	Nebraska	1826341	1499753	80959	167405	31919	46305	0.424281	0.417907
28	Nevada	2700551	1462081	208058	716501	191047	122864	0.751622	0.774363
29	New Hampshire	1316470	1215050	13625	36704	28241	22850	0.232308	0.210183
30	New Jersey	8791894	5214878	1125401	1555144	719827	176644	0.722462	0.783517
31	New Mexico	2059179	833810	35462	953403	26305	210199	0.671781	0.603770
32	New York	19378102	11304247	2783857	3416922	1406194	466882	0.732727	0.787727
33	North Carolina	9535483	6223995	2019854	800120	206579	284935	0.623233	0.645955
34	North Dakota	672591	598007	7720	13467	6839	46558	0.289289	0.165826
35	Ohio	11536504	9359263	1389115	354674	190765	242687	0.422934	0.426370
36	Oklahoma	3751351	2575381	272071	332007	64154	507738	0.623426	0.506346
37	Oregon	3831074	3005848	64984	450062	139436	170744	0.478609	0.444008
38	Pennsylvania	12702379	10094652	1327091	719660	346288	214688	0.465015	0.486249
39	Rhode Island	1052567	803685	51560	130655	29988	36679	0.516377	0.508129
40	South Carolina	4625364	2962740	1279998	235682	58307	88637	0.573768	0.609445
41	South Dakota	814180	689502	9959	22119	7553	85047	0.355383	0.191061
42	Tennessee	6346105	4800782	1049391	290059	90311	115562	0.486619	0.508575
43	Texas	25145561	11397345	2886825	9460921	948426	452044	0.727466	0.793870
44	Utah	2763885	2221719	25951	358340	54176	103699	0.425283	0.393087
45	Vermont	625741	590223	5943	9208	7875	12492	0.183061	0.144800
46	Virginia	8001024	5186450	1523704	631825	436298	222747	0.655915	0.688954
47	Washington	6724540	4876804	229603	755790	475634	386709	0.587508	0.555274
48	West Virginia	1852994	1726256	62122	22268	12285	30063	0.206960	0.183409
49	Wisconsin	5686986	4738411	350898	336056	128052	133569	0.412929	0.408698
50	Wyoming	563626	483874	4351	50231	4279	20891	0.337501	0.288172

	Name	Total	White	Black	Hispanic	Asian	Other	entropy5	entropy4
1868	Queens County	2230722	616727	395881	613750	508334	96030	0.925644	0.989171
68	Aleutians West Census Area	5561	1745	318	726	1575	1197	0.920216	0.882623
186	Alameda County	1510271	514559	184126	339889	390524	81173	0.910834	0.957875
233	Solano County	413344	168628	58743	99356	59027	27590	0.897416	0.926901
67	Aleutians East Borough	3141	425	212	385	1113	1006	0.896064	0.864996
2601	Fort Bend County	585375	211680	123267	138967	98762	12699	0.882673	0.970379
1851	Kings County	2504700	893306	799066	496285	260129	55914	0.853105	0.934130
219	Sacramento County	1418788	687166	139949	306196	198944	86533	0.842896	0.865689
453	Gwinnett County	805321	354316	184122	162035	84763	20085	0.838965	0.912596
550	Maui County	154834	49193	818	15710	43384	45729	0.833108	0.751354
546	Hawaii County	185079	57831	899	21383	39588	65378	0.830209	0.765941
192	Contra Costa County	1049025	500923	93604	255560	148881	50057	0.829415	0.865931
1207	Montgomery County	971777	478765	161689	165398	134677	31248	0.828220	0.887528
224	San Joaquin County	685306	245919	48540	266341	94547	29959	0.828052	0.869824
1782	Hudson County	634266	195510	71315	267853	83825	15763	0.827493	0.899094
1229	Suffolk County	722023	346979	142980	143455	58963	29646	0.826117	0.871302
549	Kauai County	67091	20611	258	6315	20296	19611	0.824890	0.737361
2936	Manassas Park city	14273	6070	1784	4645	1261	513	0.821891	0.873913
2892	Prince William County	402002	195656	78492	81460	29986	16408	0.818339	0.862245
547	Honolulu County	953207	181684	17929	77433	410019	266142	0.816249	0.722054
223	San Francisco County	805235	337451	46781	121774	265700	33529	0.816230	0.858482
226	San Mateo County	718451	303609	18763	182502	175934	37643	0.811677	0.837993
1967	Robeson County	134168	36160	32347	10932	971	53758	0.809458	0.757652
1936	Hoke County	46952	19142	15392	5823	467	6128	0.808919	0.758726
1858	New York County	1585873	761493	205340	403577	177624	37839	0.807452	0.877051
228	Santa Clara County	1781642	626909	42331	479210	565466	67726	0.806568	0.852239
1780	Essex County	783969	260177	308358	159117	35292	21025	0.803622	0.867143
1748	Clark County	1951269	935955	194821	568644	165121	86728	0.800982	0.835903
1785	Middlesex County	809858	398724	71557	148975	172534	18068	0.800874	0.872133
610	Cook County	5194675	2278358	1265778	1244762	318869	86908	0.800036	0.882240
2579	Dallas County	2368139	784693	518732	905940	117797	40977	0.798837	0.879632
204	Los Angeles County	9818605	2728321	815086	4687889	1325671	261638	0.796781	0.859287
2623	Harris County	4092459	1349646	754258	1671540	249853	67162	0.796176	0.878354
367	Orange County	1145956	526754	223200	308244	55541	32217	0.792964	0.852138
325	Broward County	1748066	760817	449677	438247	55692	43633	0.782936	0.845847
992	Wyandotte County	157505	68170	39046	41633	3886	4770	0.781122	0.834209
2848	Fairfax County	1081726	590622	96078	168482	188737	37807	0.780967	0.826231
1793	Union County	536499	243312	111705	146704	24496	10282	0.780779	0.854469
2294	Philadelphia County	1526006	562585	644287	187611	95521	36002	0.777585	0.842034
2935	Manassas city	37821	17994	4905	11876	1861	1185	0.769676	0.818704
2536	Bell County	310235	157289	63380	67010	8350	14206	0.769433	0.795595
2915	Alexandria city	139966	74878	29778	22524	8351	4435	0.767634	0.815651
1921	Durham County	267587	112697	100260	36077	12180	6373	0.765370	0.827083
222	San Diego County	3095313	1500047	146600	991348	328058	129260	0.764654	0.795817
1784	Mercer County	366513	199909	71378	55318	32545	7363	0.763097	0.831665
2645	Jefferson County	252273	112503	84500	42899	8525	3846	0.749335	0.825618
2742	Tarrant County	1809034	937135	262522	482977	83378	43022	0.748173	0.806698
215	Orange County	3010232	1328499	44000	1012973	532477	92283	0.747159	0.792896
80	Kodiak Island Borough	13592	7137	85	996	2620	2754	0.747039	0.631715
242	Yolo County	200849	100240	4752	60953	25640	9264	0.746835	0.767605
221	San Bernardino County	2035210	677598	170700	1001145	123978	61789	0.745345	0.791244
236	Sutter County	94737	47782	1713	27251	13442	4549	0.745009	0.762589
1949	Mecklenburg County	919628	465372	278042	111944	41991	22279	0.741724	0.798176
1915	Cumberland County	319431	150749	113939	30190	6885	17668	0.738064	0.743731
1789	Passaic County	501226	227144	55480	185677	24556	8369	0.737074	0.807972
2146	Comanche County	124098	73122	20794	13896	2663	13623	0.733880	0.676671
2185	Oklahoma County	718633	425791	108954	108543	21151	54194	0.733371	0.712225
195	Fresno County	930450	304522	45005	468070	86856	25997	0.732562	0.780302
2542	Brazoria County	313166	166674	36880	86643	17013	5956	0.731108	0.795982
1774	Atlantic County	274549	160871	40882	46241	20419	6136	0.730078	0.787988
	...	...	...	...	...	...	...	...	...