Machine Learning Project 0



In [2]:

    
import numpy as np
import pandas as pd
import requests
from urllib.parse import quote_plus, urlencode



In [3]:

    
# Helper Functions
def check_nulls(df, col):
    '''
    returns df with NaN in specified column
    '''
    return df.ix[df.ix[:,col].isnull()]

def get_notnulls(df, col):
    '''
    returns df with NaN in specified column
    '''
    return df.ix[df.ix[:,col].notnull()]



def clean_data(df, cleaning_tuples):
    '''
    replace a string in a column (pat) with a clean string (repl):
    e.g. cleaning_tuples = [(col, pat, repl)]
    '''
    for col, pat, repl in cleaning_tuples:
        df.ix[:,col] = df.ix[:,col].str.replace(pat, repl)
        
def clean_grouped_data(grouped_df):
    '''
    returns df with counts that result from groupby
    '''
    counts = pd.DataFrame(grouped_df.count().ix[:,"service_request_number"])
    counts = counts.unstack()
    counts.columns = counts.columns.droplevel()
    counts.columns.name = None
    counts.index.name = None
    counts.fillna(0, inplace=True)
    return counts



In [4]:

    
BASE_URL = "https://data.cityofchicago.org/resource/"

def make_data_frame(verbose=True):
    '''
    Idiosyncratic command that collects 311 response data and cleans up pandas dataframe 
    '''
    date  =            ["creation_date","completion_date","type_of_service_request"]
    date2 =            ["date_service_request_was_received","service_request_type"]

    endpoints = {"787j-mys9.json?":{"SELECT": date, "WHERE": date[0]},
                 "cdmx-wzbz.json?":{"SELECT": date, "WHERE": date[0]},
                 "kcdz-f29q.json?":{"SELECT": date, "WHERE": date[0]},
                 "yama-9had.json?":{"SELECT": date2, "WHERE": date2[0]}}

    base_query = {"SELECT": ["service_request_number",
                            "community_area",
                            "latitude",
                            "longitude"], 
                  "WHERE": " between '2017-03-21T00:00:00' and '2017-03-31T00:00:00'"}
    
    city_data = get_city_data(endpoints, base_query,verbose=verbose)

    combine_cols(city_data, date[0], date2[0])
    combine_cols(city_data, date[2], date2[1])
    
    return city_data
    
def get_city_data(endpoints, base_query,verbose=False):
    '''
    Collects 311 response data based on SoQL query
    returns data (pd.DataFrame)
    '''
    data = pd.DataFrame()
    for endpoint, vals in endpoints.items():
        query = dict(base_query)
        query["SELECT"] = query["SELECT"] + vals["SELECT"]
        query["WHERE"] = vals["WHERE"] + query["WHERE"]
        new_data = get_data(endpoint, query, verbose=verbose)
        data = pd.concat([data, new_data], ignore_index=True)
    return data

def get_data(endpoint, query, BASE_URL=BASE_URL, verbose=False):
    '''
    Inputs: endpoint (string) (e.g.: "yama-9had.json?")
            query (dictionary) (SoQL statements divided into constituent parts)
    '''
    url = BASE_URL + endpoint
    count_query = url + "$query=SELECT%20count(*)"
    r = requests.get(count_query)
    query["LIMIT"] = r.json()[0]["count"]
    query = encode_query(query)
    query_url = url + "$query=" + query
    
    if verbose:
        print("query url: ", query_url)
    
    return pd.read_json(query_url)
        
def encode_query(dict):
    '''
    Take dictionary with SQL statements parts and combine them into a web-safe string
    '''
    encoded = ""
    for key, val in dict.items():
        if isinstance(val, list):
            try:
                val = ", ".join(val)
            except:
                return "Lists must contain strings only"
        try:
            encoded += key + " " + val + " "
        except:
            return "Dictionary values must be strings or list of strings"
    return quote_plus(encoded)

def combine_cols(df, col, extra):
    '''
    Inputs:
        df (pd.DataFrame)
        col,extra (string) column names
    
    Combines columns with similar information into a single column and drops extra.
    '''
    df.ix[:,col] = df.ix[:,col].where(df.ix[:,col].notnull(), df.ix[:,extra])
    df.drop(extra, axis=1, inplace=True)
    
def get_com_areas(df):
    '''
    downloads community areas information from data portal and appends to data frame
    '''
    try:
        community_areas=pd.read_json("https://data.cityofchicago.org/resource/igwz-8jzy.json")
    except:
        return "download failed"
    
    com_areas = community_areas.ix[:,["area_numbe","community"]]
    df = pd.merge(df, com_areas, left_on="community_area", right_on="area_numbe")
    return df



In [5]:

    
# To re-download data from city portal uncomment below.    
# city_data = make_data_frame()



In [6]:

    
FIPS_URL = "https://www.broadbandmap.gov/broadbandmap/census/block?format=json&"

def fips_from_broadbandmapAPI(url):
    '''
    Pulls FIPS code from census json 
    e.g. https://www.broadbandmap.gov/broadbandmap/census/block?format=json&latitude=&longitude=
    '''
    if pd.notnull(url):
        try:
            r = requests.get(url).json()
        except:
            return None
       
        if r['status']=='OK':
            return r.get("Results").get("block")[0].get("FIPS")
        else:
            return r['status']

def add_latlng(lat,lng):
    if pd.notnull(lat) and pd.notnull(lng):
        return "latitude={}&longitude={}".format(lat, lng) 
    
    

def add_lnglat_url(df,base_url = FIPS_URL):
    '''
    Add url for API to dataframe with latitude and longitude
    '''
    URLS = pd.DataFrame(base_url + pd.Series(map(add_latlng, df.ix[:,"latitude"], df.ix[:,"longitude"])))
    df = df.assign(URLS=URLS)
    return df

def batch_FIPS_from_API(df):
    FIPS = pd.DataFrame(df.ix[:,"URLS"]).applymap(fips_from_broadbandmapAPI)
    FIPS = FIPS.rename(columns={"URLS": "FIPS"})
    return FIPS

Get FIPS Data



In [ ]:

    
#Remove triple quotes to pull FIPS data.
'''
df = batch_FIPS_from_API(city_data.ix[city_data.loc[:,'type_of_service_request']=='Vacant/Abandoned Building'])
df = df.rename(columns={"URLS": "FIPS"})

sanitation_df = batch_FIPS_from_API(city_data.ix[city_data.loc[:,'type_of_service_request']=="Sanitation Code Violation"])
sanitation_df = sanitation_df.rename(columns={"URLS": "FIPS"})

city_data = pd.merge(city_data, df, left_index=True, right_index=True, how='left')
city_data = pd.merge(city_data, sanitation_df, left_index=True, right_index=True, how='left')
city_data.ix[:,"FIPS_x"] = city_data.ix[:,"FIPS_x"].combine_first(city_data.ix[:,"FIPS_y"])
city_data = city_data.rename(columns={"FIPS_x":"FIPS"})
city_data.drop("FIPS_y",axis=1,inplace=True)
city_data.to_csv("Safety_csv_1.csv")
del df, sanitation_df

missing_fips = check_nulls(city_data.ix[(city_data.loc[:,'type_of_service_request'] == "Sanitation Code Violation") | (city_data.loc[:,'type_of_service_request'] == 'Vacant/Abandoned Building')], "FIPS")
df = batch_FIPS_from_API(missing_fips)
city_data.ix[:,"FIPS"] = city_data.ix[:,"FIPS"].combine_first(df.ix[:,"FIPS"])
del df
'''

At this point my kernel crashed and I had to reload the data from CSV. This served as a natural break point, because the API heavy-lifting just finished.



In [24]:

    
#http://stackoverflow.com/questions/21269399/datetime-dtypes-in-pandas-read-csv
col_dtypes = {"":int,"community_area":float,"completion_date":object,
              "creation_date":object,"latitude":float,
              "longitude":float,"service_request_number":object,
              "type_of_service_request":str,"URLS":str,"FIPS":object}
parse_dates = ["completion_date","creation_date"]
city_data = pd.read_csv("Safety_csv_1.csv",dtype=col_dtypes, parse_dates=parse_dates)
city_data.drop('Unnamed: 0',axis=1,inplace=True)
city_data.drop('URLS',axis=1,inplace=True)

city_data.ix[:,"time_to_completion"] = city_data.ix[:,"completion_date"] - city_data.ix[:,"creation_date"]
city_data.ix[:,"time_to_completion"] = city_data.ix[:,"time_to_completion"]/ np.timedelta64(1, 'D')

city_data = get_com_areas(city_data)

########

Get Census Data



In [9]:

    
CENSUS_URL = "http://api.census.gov/data/2015/pdb/blockgroup?get={}&for=block+group:*&in=state:17+county:031"

census_fields = ["pct_NH_Blk_alone_ACS_09_13",
                "pct_NH_White_alone_ACS_09_13",
                "pct_Hispanic_ACS_09_13",
                
                "pct_College_ACS_09_13",
                "pct_Renter_Occp_HU_ACS_09_13",
                "pct_Males_ACS_09_13",
                "pct_Prs_Blw_Pov_Lev_ACS_09_13",
                "pct_Diff_HU_1yr_Ago_ACS_09_13",
                 "Tot_Population_ACS_09_13",
                "GIDBG"]

fields = len(census_fields) - 1 

uscensus = pd.read_json(CENSUS_URL.format(",".join(census_fields)))
uscensus.columns = uscensus.ix[0]
uscensus = uscensus.rename(columns={"GIDBG": "FIPS"})
uscensus = uscensus.ix[1:]
uscensus.ix[:,0:fields] = uscensus.ix[:,0:fields].apply(pd.to_numeric, axis=1)


cnsdf = get_notnulls(city_data, "FIPS")
cnsdf=cnsdf.groupby(["FIPS", "type_of_service_request"])
counts = clean_grouped_data(cnsdf)
counts.ix[:,"FIPS"] = counts.index
counts.ix[:,"FIPS"] = pd.DataFrame(counts.ix[:,"FIPS"]).applymap(lambda x: x[:-3])
census_by_311 = pd.merge(counts, uscensus, left_on="FIPS", right_on="FIPS")


for col in census_fields[:-1]:
    census_by_311[col] = pd.to_numeric(census_by_311[col])


census_by_311[census_by_311.columns[0] + " per 10000"]=10000*census_by_311.ix[:,0].fillna(0)/pd.to_numeric(census_by_311.Tot_Population_ACS_09_13)
census_by_311[census_by_311.columns[1] + " per 10000"]=10000*census_by_311.ix[:,1].fillna(0)/pd.to_numeric(census_by_311.Tot_Population_ACS_09_13)

Generate summary statistics:

number of requests of each type (and subtype within each of the types above) over time, by neighborhood response time by the city



In [26]:

    
from bokeh.plotting import figure
from bokeh.io import show, output_notebook,save
from bokeh.charts import Histogram
from bokeh.layouts import gridplot
output_notebook()

import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from matplotlib.patches import PathPatch
% matplotlib inline









    





    
        
        Loading BokehJS ...



In [11]:

    
df = city_data



In [12]:

    
gdf = df.groupby("type_of_service_request")
totals = pd.DataFrame(gdf.count().ix[:,"service_request_number"])
totals.index.name=None
totals.columns
TYPES = totals.index
totals









    Out[12]:






  
    
      
      service_request_number
    
  
  
    
      Graffiti Removal
      117711
    
    
      Pothole in Street
      56185
    
    
      Sanitation Code Violation
      21540
    
    
      Vacant/Abandoned Building
      4765



In [13]:

    
s = df.ix[:,"time_to_completion"].describe(percentiles=np.linspace(0,1,11))
s[1:]









    Out[13]:





mean       7.855461
std       21.615558
min        0.000000
0%         0.000000
10%        0.000000
20%        0.000000
30.0%      0.000000
40%        0.000000
50%        1.000000
60.0%      1.000000
70%        3.000000
80%        6.000000
90%       21.000000
100%     321.000000
max      321.000000
Name: time_to_completion, dtype: float64



In [14]:

    
g2df = df.groupby(["community","type_of_service_request"])
counts = clean_grouped_data(g2df)
community_area_descriptives=round(counts.describe(),1)
community_area_descriptives









    Out[14]:






  
    
      
      Graffiti Removal
      Pothole in Street
      Sanitation Code Violation
      Vacant/Abandoned Building
    
  
  
    
      count
      77.0
      77.0
      77.0
      77.0
    
    
      mean
      1528.7
      729.7
      279.7
      61.9
    
    
      std
      1954.9
      432.2
      209.1
      92.5
    
    
      min
      8.0
      66.0
      8.0
      0.0
    
    
      25%
      160.0
      392.0
      115.0
      10.0
    
    
      50%
      591.0
      672.0
      233.0
      23.0
    
    
      75%
      2306.0
      1050.0
      419.0
      67.0
    
    
      max
      9539.0
      2116.0
      858.0
      475.0



In [15]:

    
means = counts.mean()
counts.ix[(counts.ix[:,0]>means[0]) & (counts.ix[:,1]>means[1]) & (counts.ix[:,2]>means[2]) & (counts.ix[:,3]>means[3])]









    Out[15]:






  
    
      
      Graffiti Removal
      Pothole in Street
      Sanitation Code Violation
      Vacant/Abandoned Building
    
  
  
    
      ASHBURN
      1948.0
      1395.0
      353.0
      67.0
    
    
      CHICAGO LAWN
      2380.0
      794.0
      451.0
      184.0



In [27]:

    
p1 = Histogram(counts.ix[:,0], title = counts.columns[0])
p2 = Histogram(counts.ix[:,1], title = counts.columns[1])
p3 = Histogram(counts.ix[:,2], title = counts.columns[2])
p4 = Histogram(counts.ix[:,3], title = counts.columns[3])

row1 = [p1,p2]
row2 = [p3,p4]
layout = gridplot([[p1,p2],[p3,p4]], plot_width=250, plot_height = 250)

show(layout)
save(layout)









    






    
        
    







    Out[27]:





'/Users/arianisfeld/anaconda2/envs/py36/lib/python3.6/runpy.html'



In [30]:

    
def graph_over_time(col = 'type_of_service_request',request_type = "Sanitation Code Violation"):
    a = df[df.ix[:,col]==request_type]
    g = a.groupby("completion_date").count().ix[:,"service_request_number"]
    p = figure(plot_width=400, plot_height=400,x_axis_type="datetime",title=request_type)
    p.line(g.index, g)
    save(p, filename= quote_plus(request_type) + ".html")
    return p
    
    
for  t in TYPES:
    show(graph_over_time(request_type = t))









    



/Users/arianisfeld/anaconda2/envs/py36/lib/python3.6/site-packages/bokeh/io.py:419: UserWarning: save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN
  warnings.warn("save() called but no resources were supplied and output_file(...) was never called, defaulting to resources.CDN")
/Users/arianisfeld/anaconda2/envs/py36/lib/python3.6/site-packages/bokeh/io.py:429: UserWarning: save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'
  warnings.warn("save() called but no title was supplied and output_file(...) was never called, using default title 'Bokeh Plot'")

5 interesting findings from 311 data:

Of 77 community areas, only Ashburn and Chicago Lawn had above average call-ins for all four 311 categories.

In Chicago, graffiti is far more common than the other disturbances with twice the amount of calls for Potholes, 6 times the amount of sanitation code violations and 24 times the number of calls for vacant buildings. As a Chicago driver, the result is suprising. On the other hand, people may have given up on potholes and do not make 311 calls as often as they encouneter them.

If the completion data is to be trusted, the city resolved over 60 percent of the 200,000 complaints within one day; And over 80 percent in a week. After that the numbers increase significantly so that the people in the top decile are waiting at least 3 weeks.

Each call has it's own seasonality, but calls seem to decrease in December: especially the vacant building calls. Potholes have the most distinct shape with a bimodal distribution that peaks in July and February.

The variance and means are very close in the neighborhood level table; also the histograms shape show the probability mass is skewed to the left for all four graphs. This suggests to me a poisson process, which matches what theory says happens when we are waiting for events.



In [18]:

    
census_by_311.ix[:,-2:].describe()









    Out[18]:






  
    
      
      Sanitation Code Violation per 10000
      Vacant/Abandoned Building per 10000
    
  
  
    
      count
      1.170400e+04
      11697.000000
    
    
      mean
      inf
      4.407622
    
    
      std
      NaN
      15.408849
    
    
      min
      0.000000e+00
      0.000000
    
    
      25%
      6.172840e+00
      0.000000
    
    
      50%
      1.029071e+01
      0.000000
    
    
      75%
      1.902950e+01
      4.977601
    
    
      max
      inf
      1111.111111



In [19]:

    
vacants = census_by_311.ix[census_by_311.ix[:,"Vacant/Abandoned Building per 10000"] > 5]
vd = vacants.describe(percentiles=[]).ix[1:,2:10]
vd









    Out[19]:






  
    
      
      pct_NH_Blk_alone_ACS_09_13
      pct_NH_White_alone_ACS_09_13
      pct_Hispanic_ACS_09_13
      pct_College_ACS_09_13
      pct_Renter_Occp_HU_ACS_09_13
      pct_Males_ACS_09_13
      pct_Prs_Blw_Pov_Lev_ACS_09_13
      pct_Diff_HU_1yr_Ago_ACS_09_13
    
  
  
    
      mean
      75.510387
      8.360839
      13.970421
      15.719904
      53.52797
      46.181369
      32.346967
      15.130931
    
    
      std
      35.783775
      18.911239
      25.290452
      14.605759
      22.61187
      6.851958
      17.609540
      10.789673
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
      0.00000
      25.340000
      0.000000
      0.000000
    
    
      50%
      93.980000
      0.830000
      1.460000
      11.370000
      57.08000
      46.470000
      31.860000
      13.790000
    
    
      max
      100.000000
      100.000000
      100.000000
      96.880000
      100.00000
      77.580000
      100.000000
      81.340000



In [20]:

    
sanitation_violation = census_by_311.ix[census_by_311.ix[:,'Sanitation Code Violation per 10000'] > 19]
sv = sanitation_violation.describe(percentiles=[]).ix[1:,2:10]
sv









    Out[20]:






  
    
      
      pct_NH_Blk_alone_ACS_09_13
      pct_NH_White_alone_ACS_09_13
      pct_Hispanic_ACS_09_13
      pct_College_ACS_09_13
      pct_Renter_Occp_HU_ACS_09_13
      pct_Males_ACS_09_13
      pct_Prs_Blw_Pov_Lev_ACS_09_13
      pct_Diff_HU_1yr_Ago_ACS_09_13
    
  
  
    
      mean
      53.425456
      21.969299
      19.942636
      25.899928
      53.647595
      47.148232
      26.127249
      15.837395
    
    
      std
      43.765658
      29.294776
      27.541073
      23.607052
      23.111254
      7.715226
      17.394081
      11.559911
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      50%
      73.250000
      4.860000
      5.330000
      17.080000
      55.770000
      47.390000
      23.050000
      14.010000
    
    
      max
      100.000000
      100.000000
      100.000000
      96.880000
      100.000000
      77.580000
      100.000000
      66.180000



In [21]:

    
sv - vd









    Out[21]:






  
    
      
      pct_NH_Blk_alone_ACS_09_13
      pct_NH_White_alone_ACS_09_13
      pct_Hispanic_ACS_09_13
      pct_College_ACS_09_13
      pct_Renter_Occp_HU_ACS_09_13
      pct_Males_ACS_09_13
      pct_Prs_Blw_Pov_Lev_ACS_09_13
      pct_Diff_HU_1yr_Ago_ACS_09_13
    
  
  
    
      mean
      -22.084930
      13.608460
      5.972215
      10.180024
      0.119625
      0.966863
      -6.219718
      0.706464
    
    
      std
      7.981883
      10.383537
      2.250621
      9.001293
      0.499385
      0.863268
      -0.215459
      0.770238
    
    
      min
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      -25.340000
      0.000000
      0.000000
    
    
      50%
      -20.730000
      4.030000
      3.870000
      5.710000
      -1.310000
      0.920000
      -8.810000
      0.220000
    
    
      max
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      -15.160000

Analysis of Blocks by Census Data

Vacant property and sanitation complaints are common 311 complaints. Over the past year, the City recieved roughly 5000 vacant property complaints and 20000 sanitation complaints. With block level Census data, we can characterize the communities that draw these respective claims. The following analysis describes the average census block originating these 311 calls. To draw sharper edges, I describe census blocks in the top quarter of the distribution taking into account population.

The average block made 4.4 calls per 10000 residents to 311 about Vacant property; however the median census block had 0 calls. This suggest that these calls are isolated to certain areas. On average, the top quarter, those with 5 or more calls per 10000 residents over the last year, have large African American populations and relatively high levels of poverty compared to sanitation violations. On average these blocks have 54 percent renter occupancy and 15 percent of households moved, which are very close to the sanitation violation blocks. The sanitation violation blocks have larger white and latino populations, and also higher levels of college attainment. Compare 25 percent to 15 percent.



In [22]:

    
prob = counts.ix[["WEST ENGLEWOOD", "AUBURN GRESHAM"],:]
s = prob.sum() 
pr = s/s.sum()
pr









    Out[22]:





Graffiti Removal             0.150587
Pothole in Street            0.422058
Sanitation Code Violation    0.245452
Vacant/Abandoned Building    0.181902
dtype: float64



In [23]:

    
prob = counts.ix[["SOUTH LAWNDALE","UPTOWN"],:]


SLAWNDALE = prob.ix[1]/prob.T.sum().T[1]
UPTOWN = prob.ix[1]/prob.T.sum().T[1]
prob.T.sum().T/prob.T.sum().T.sum()









    Out[23]:





SOUTH LAWNDALE    0.71421
UPTOWN            0.28579
dtype: float64

Probability

This address is on the border of West Englewood and Auburn Gresham. For simplicity, I assigned probabilities based on the sum of those two areas. The most likely 311 call is the Pothole in Street call.

Graffiti Removal 0.150587 Pothole in Street 0.422058 Sanitation Code Violation 0.245452 Vacant/Abandoned Building 0.181902

Using the table above, we see there were 7095 calls about Graffiti Removal. 5309 came from South Lawndale, so we'd say the base probabilities suggest South Lawndale is more likely. Using Bayes Theorm

SOUTH LAWNDALE 0.714 Graffiti Removal 0.787102

UPTOWN 0.28579 Graffiti Removal 0.661727

TOTAL Graffiti Removal 0.75127

Pr(SOUTH LAWNDALE | Graffiti) $P(A|B) = \frac{0.787102 * 0.714}{.75127} = 0.748$

Pr(UPTOWN | Graffiti) $P(A|B) = \frac{ 0.661727 * 0.28579}{.75127} = 0.25$

South Lawndale is about 50% more likely to be making the call.

This problem is solved with Bayes Theorem. $$ P(A|B) =\frac{P(B|A)\cdot P(A)}{P(B)} $$ Where: B is a graffiti call. A is the event takes place in Englewood. Then $P(A) = .6$, $P(B) = .24$, and $P(B|A) = .16666$.

So the probability that a call for Graffiti Removal originated in Engelwood is $P(A|B) = \frac{.6 * \frac{1}{6}}{.24} = \frac{5}{12}$.

And so the probability in originated in Uptown is $\frac{7}{12}$, which is $\frac{1}{6}$ more likely.



In [ ]:

	service_request_number
Graffiti Removal	117711
Pothole in Street	56185
Sanitation Code Violation	21540
Vacant/Abandoned Building	4765

	Graffiti Removal	Pothole in Street	Sanitation Code Violation	Vacant/Abandoned Building
count	77.0	77.0	77.0	77.0
mean	1528.7	729.7	279.7	61.9
std	1954.9	432.2	209.1	92.5
min	8.0	66.0	8.0	0.0
25%	160.0	392.0	115.0	10.0
50%	591.0	672.0	233.0	23.0
75%	2306.0	1050.0	419.0	67.0
max	9539.0	2116.0	858.0	475.0

	Graffiti Removal	Pothole in Street	Sanitation Code Violation	Vacant/Abandoned Building
ASHBURN	1948.0	1395.0	353.0	67.0
CHICAGO LAWN	2380.0	794.0	451.0	184.0

	Sanitation Code Violation per 10000	Vacant/Abandoned Building per 10000
count	1.170400e+04	11697.000000
mean	inf	4.407622
std	NaN	15.408849
min	0.000000e+00	0.000000
25%	6.172840e+00	0.000000
50%	1.029071e+01	0.000000
75%	1.902950e+01	4.977601
max	inf	1111.111111

	pct_NH_Blk_alone_ACS_09_13	pct_NH_White_alone_ACS_09_13	pct_Hispanic_ACS_09_13	pct_College_ACS_09_13	pct_Renter_Occp_HU_ACS_09_13	pct_Males_ACS_09_13	pct_Prs_Blw_Pov_Lev_ACS_09_13	pct_Diff_HU_1yr_Ago_ACS_09_13
mean	75.510387	8.360839	13.970421	15.719904	53.52797	46.181369	32.346967	15.130931
std	35.783775	18.911239	25.290452	14.605759	22.61187	6.851958	17.609540	10.789673
min	0.000000	0.000000	0.000000	0.000000	0.00000	25.340000	0.000000	0.000000
50%	93.980000	0.830000	1.460000	11.370000	57.08000	46.470000	31.860000	13.790000
max	100.000000	100.000000	100.000000	96.880000	100.00000	77.580000	100.000000	81.340000

	pct_NH_Blk_alone_ACS_09_13	pct_NH_White_alone_ACS_09_13	pct_Hispanic_ACS_09_13	pct_College_ACS_09_13	pct_Renter_Occp_HU_ACS_09_13	pct_Males_ACS_09_13	pct_Prs_Blw_Pov_Lev_ACS_09_13	pct_Diff_HU_1yr_Ago_ACS_09_13
mean	53.425456	21.969299	19.942636	25.899928	53.647595	47.148232	26.127249	15.837395
std	43.765658	29.294776	27.541073	23.607052	23.111254	7.715226	17.394081	11.559911
min	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
50%	73.250000	4.860000	5.330000	17.080000	55.770000	47.390000	23.050000	14.010000
max	100.000000	100.000000	100.000000	96.880000	100.000000	77.580000	100.000000	66.180000

	pct_NH_Blk_alone_ACS_09_13	pct_NH_White_alone_ACS_09_13	pct_Hispanic_ACS_09_13	pct_College_ACS_09_13	pct_Renter_Occp_HU_ACS_09_13	pct_Males_ACS_09_13	pct_Prs_Blw_Pov_Lev_ACS_09_13	pct_Diff_HU_1yr_Ago_ACS_09_13
mean	-22.084930	13.608460	5.972215	10.180024	0.119625	0.966863	-6.219718	0.706464
std	7.981883	10.383537	2.250621	9.001293	0.499385	0.863268	-0.215459	0.770238
min	0.000000	0.000000	0.000000	0.000000	0.000000	-25.340000	0.000000	0.000000
50%	-20.730000	4.030000	3.870000	5.710000	-1.310000	0.920000	-8.810000	0.220000
max	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	-15.160000