notebook.community

Edit and run



In [1]:

    
import pandas as pd

#All 311 requests for 2016 -import zip as a string
quality = pd.io.parsers.read_csv('finalProjectDoomed/311_Service_Requests_from_2010_to_Present.csv', dtype={'Incident Zip': 'str'})









    



C:\Users\David\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (17,40,41,43,44,45,47,48,49) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [2]:

    
# Get all New York City Zip Codes
stuff = pd.io.parsers.read_csv('finalProjectDoomed/nycZip.csv',dtype={'ZipNy': 'str'}) # Get list of all New York City Zip Codes
zipNy= stuff[['ZipNy','Borough']] # Clean up Data
zipNy = zipNy.ZipNy.unique() # remove double entriesy
#zipNy



In [3]:

    
# Remove all non New York City Zips
realNYC = quality[quality['Incident Zip'].isin(zipNy)] # remove outliers from the data, Only New York City Zips
realNYC = realNYC.reset_index(drop=True)
#realNYC



In [4]:

    
# Get Show Zip occurences
test = realNYC
test = test.rename(columns={'Incident Zip':'ZIP'})
test = test['ZIP']
test = test.value_counts()
test









    Out[4]:





11226    51099
10467    43086
10453    39814
10458    39157
10468    36876
11207    36728
11385    36032
10452    34797
10031    34661
10456    34143
11208    33193
10457    32925
11221    32676
10472    32041
11213    29780
11225    29486
11216    28792
11212    28692
11234    28441
11233    28260
11230    27534
10032    27429
10462    27293
11203    26879
11206    26609
10025    26484
10463    26241
10033    25924
11235    25812
11238    25785
         ...  
10172       22
10123       21
10165       20
10111       20
11359       18
10167       18
10106       18
10107       18
10153       18
10174       17
11241       16
10105       15
10158       15
10120       14
10173       13
10170       13
10177       12
10048       11
10115       11
11242       11
10155       10
10178       10
10176        9
10151        7
10045        7
10152        7
10122        6
10055        3
10166        3
10171        2
Name: ZIP, dtype: int64



In [5]:

    
#get Lat Lon for Zips Codes
zipy =  pd.io.parsers.read_csv('finalProjectDoomed/zipLatLon.csv',dtype={'ZIP': 'str'})# get Lat Lon of Zip 
realZip = zipy[zipy['ZIP'].isin(zipNy)]# Only Ny City ones
realZip = realZip.reset_index(drop=True)
realZip = realZip.set_index('ZIP')
#realZip



In [6]:

    
# Convert back to Dataframe
test = pd.DataFrame(test).reset_index()
test = test.rename(columns={'index':'ZIP','ZIP':'NUM'})
test = test.set_index('ZIP')
#test



In [7]:

    
# Concat together with Lat Lon
tried = [test,realZip]
graphOut = pd.concat(tried, axis=1)



In [8]:

    
import numpy as np
# rename columns and remove misformed Zip codes
graphOut = graphOut.rename(columns={'LNG':'LON'})
graphOut.index.name = 'ZIP'
graphOut = graphOut[np.isfinite(graphOut['LON'])]
graphOut = graphOut[np.isfinite(graphOut['LAT'])]
graphOut = graphOut[np.isfinite(graphOut['NUM'])]



In [9]:

    
#Save dataset
graphOut.to_csv("graphOut.csv", sep=',')

Kmeans Finding Clusters in the Data



In [10]:

    
# Get unique Complaint type occurences
get = realNYC['Complaint Type'].value_counts()
get = pd.DataFrame(get).reset_index()
get = get.rename(columns={'index':'Complaint Type','Complaint Type':'index'})
#get



In [11]:

    
# Create a dictionary to convert complaint types to numerical value
dictGet = {}

for index, row in get.iterrows():
    dictGet[row["Complaint Type"]] = index

#dictGet



In [14]:

    
# Pass all complaints through dictionary and store values
qualCom = realNYC['Complaint Type'].values
for (i, n) in enumerate(realNYC['Complaint Type']):
         qualCom[i] = dictGet[n]



In [15]:

    
# rename ZIP
q = realNYC
q = q.rename(columns={'Incident Zip':'ZIP'})
q = q[q['ZIP'] >= 0] # remove malformed zip code



In [16]:

    
import numpy as np
from sklearn.cluster import KMeans

# Perform Kmeans 2-10
clustZip = q['ZIP'].values

X=np.matrix(zip(qualCom,clustZip))
numK = range(1,11)
results = []

for i in numK:
        results.append(KMeans(n_clusters=i).fit(X))



In [17]:

    
#Create a new Panda with Zip column for storing kmeans output 
groupPlot = q[["ZIP"]]
groupPlot = groupPlot.reset_index(drop=True)



In [19]:

    
#Create new columns and assign data
groupPlot["cc"] = results[0].labels_ # Array of 0's
groupPlot["k2"] = results[1].labels_
groupPlot["k3"] = results[2].labels_
groupPlot["k4"] = results[3].labels_
groupPlot["k5"] = results[4].labels_
groupPlot["k6"] = results[5].labels_
groupPlot["k7"] = results[6].labels_
groupPlot["k8"] = results[7].labels_
groupPlot["k9"] = results[8].labels_
groupPlot["k10"] = results[9].labels_
groupPlot



In [ ]:

    
#Add Clusters

#for i in range(1,6):
  #  for j, cluster in enumerate(results[i].cluster_centers_) :
       # rowAdd = [cluster[1],cluster[0],1,0,0,0,0,0,0,0,0,0]
        #rowAdd[i+2] = j;
        #rowAdd[2] = i+1;
        #groupPlot.loc[len(prostitution)] = rowAdd



In [21]:

    
#remove duplicates from the data so only 1 for each zip
outy = groupPlot.drop_duplicates(['ZIP'])
outy['ZipCode'] = outy['ZIP'] 
outy = outy.set_index('ZIP')
outy









    



C:\Users\David\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()






    Out[21]:






  
    
      
      cc
      k2
      k3
      k4
      k5
      k6
      k7
      k8
      k9
      k10
      ZipCode
    
    
      ZIP
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      11204
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11204
    
    
      11205
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11205
    
    
      11211
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11211
    
    
      11234
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11234
    
    
      11422
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11422
    
    
      11413
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11413
    
    
      11358
      0
      0
      1
      0
      0
      3
      3
      7
      0
      6
      11358
    
    
      11355
      0
      0
      1
      0
      0
      3
      3
      7
      0
      6
      11355
    
    
      10003
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10003
    
    
      10025
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10025
    
    
      10128
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10128
    
    
      11225
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11225
    
    
      10001
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10001
    
    
      10470
      0
      1
      2
      2
      2
      0
      4
      2
      2
      2
      10470
    
    
      10452
      0
      1
      2
      2
      2
      0
      4
      2
      2
      2
      10452
    
    
      11210
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11210
    
    
      11207
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11207
    
    
      11361
      0
      0
      1
      0
      0
      3
      3
      7
      0
      6
      11361
    
    
      11435
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11435
    
    
      11419
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11419
    
    
      10467
      0
      1
      2
      2
      2
      0
      4
      2
      2
      2
      10467
    
    
      10014
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10014
    
    
      11411
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11411
    
    
      11691
      0
      0
      1
      0
      4
      4
      5
      4
      4
      5
      11691
    
    
      10306
      0
      1
      2
      2
      2
      5
      1
      5
      5
      4
      10306
    
    
      11421
      0
      0
      1
      0
      0
      3
      3
      3
      6
      0
      11421
    
    
      11368
      0
      0
      1
      0
      0
      3
      3
      7
      0
      6
      11368
    
    
      11235
      0
      0
      1
      3
      3
      1
      0
      1
      8
      9
      11235
    
    
      10005
      0
      1
      0
      1
      1
      2
      2
      0
      1
      8
      10005
    
    
      10032
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10032
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      10278
      0
      1
      2
      2
      2
      5
      1
      5
      5
      4
      10278
    
    
      10105
      0
      1
      0
      1
      1
      2
      2
      0
      1
      8
      10105
    
    
      11359
      0
      0
      1
      0
      0
      3
      3
      7
      0
      6
      11359
    
    
      10122
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10122
    
    
      10111
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10111
    
    
      10168
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10168
    
    
      10120
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10120
    
    
      10176
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10176
    
    
      10110
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10110
    
    
      10048
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10048
    
    
      10174
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10174
    
    
      10177
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10177
    
    
      10165
      0
      1
      0
      1
      1
      5
      1
      5
      5
      8
      10165
    
    
      10151
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10151
    
    
      10121
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10121
    
    
      10167
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10167
    
    
      10279
      0
      1
      2
      2
      2
      5
      1
      5
      5
      4
      10279
    
    
      10152
      0
      1
      0
      1
      1
      2
      2
      0
      1
      8
      10152
    
    
      10169
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10169
    
    
      10173
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10173
    
    
      10055
      0
      1
      0
      1
      1
      2
      2
      0
      1
      8
      10055
    
    
      10158
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10158
    
    
      10155
      0
      1
      0
      1
      1
      2
      2
      0
      1
      8
      10155
    
    
      10172
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10172
    
    
      10170
      0
      1
      0
      1
      1
      5
      1
      5
      5
      8
      10170
    
    
      11242
      0
      0
      1
      3
      3
      1
      0
      1
      3
      3
      11242
    
    
      10106
      0
      1
      0
      1
      1
      2
      2
      0
      1
      1
      10106
    
    
      10178
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10178
    
    
      10171
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10171
    
    
      10166
      0
      1
      0
      1
      1
      5
      1
      5
      5
      4
      10166
    
  

224 rows × 11 columns



In [22]:

    
import numpy as np
#Combine with Zip Lat Lon data
graphStep = graphOut 
fram = [graphStep,outy]
graphKm = pd.concat(fram, axis=1)
graphKm = graphKm[np.isfinite(graphKm['k2'])] #Remove misformed zipcodes data
graphKm = graphKm[np.isfinite(graphKm['LAT'])]

graphKm.to_csv("graphKm.csv", sep=',')
#graphKm



In [23]:

    
#Save dataset
graphKm.to_csv("graphKm.csv", sep=',')

Cluster Income Data



In [ ]:

    
income = pd.read_csv('incomeData.csv')



In [ ]:

    
incomeNy = income[income['STATE'] == "NY"] 
incomeNy.to_csv("incomeNy.csv", sep=',')



In [29]:

    
import pandas as pd
income = pd.io.parsers.read_csv('finalProjectDoomed/incomeNy.csv', dtype={'zipcode': 'str'})



In [122]:

    
#income



In [30]:

    
incomeInc = income[income['zipcode'].isin(zipNy)] # Just New York City Zip



In [31]:

    
# Create data set to be used for CLustering
dfAgi = incomeInc['agi_stub'].values
dfZip = incomeInc['zipcode'].values
dfNum = incomeInc['N1'].values
print dfNum









    



[ 3880.  2530.  1850. ...,   240.   490.   180.]



In [32]:

    
# Create dataset for clustering
incY = []
incX = []

rani = len(dfAgi)

for i in range(0,rani):
    ranj = int(dfNum[i]) 
    #print ranj
    for j in range(0,ranj):
        incX.append(dfZip[i])
        incY.append(dfAgi[i])



In [34]:

    
import numpy as np
from sklearn.cluster import KMeans
 
clustZip = incY

X=np.matrix(zip(incX,clustZip))
numK = range(1,11)
resultsInc = []

for i in numK:
        resultsInc.append(KMeans(n_clusters=i).fit(X))



In [40]:

    
groupPlotInc = graphOut
del groupPlotInc['NUM']
groupPlotInc



In [49]:

    
groupInc = incX
groupInc = pd.DataFrame(groupInc).reset_index()
del groupInc['index']
groupInc = groupInc.rename(columns={'0':'Zip'})
groupInc = groupInc.reset_index(drop=True)
groupInc.columns = ['ZIP']
groupInc['ZipCode'] = groupInc['ZIP']
groupInc = groupInc.set_index('ZIP')
groupInc



In [44]:

    
#Create new columns and assign data
groupInc["cc"] = resultsInc[0].labels_ # Array of 0's
groupInc["k2"] = resultsInc[1].labels_
groupInc["k3"] = resultsInc[2].labels_
groupInc["k4"] = resultsInc[3].labels_
groupInc["k5"] = resultsInc[4].labels_
groupInc["k6"] = resultsInc[5].labels_
groupInc["k7"] = resultsInc[6].labels_
groupInc["k8"] = resultsInc[7].labels_
groupInc["k9"] = resultsInc[8].labels_
groupInc["k10"] = resultsInc[9].labels_



In [51]:

    
outInc = groupInc.drop_duplicates(['ZipCode']).reset_index()
outInc = outInc.set_index('ZIP')
#del outInc['index']
outInc



In [52]:

    
import numpy as np

fram = [groupPlotInc,outInc]
graphInc = pd.concat(fram, axis=1)
graphInc = graphInc[np.isfinite(graphInc['k2'])] # remove zip codes no data for

graphInc.to_csv("graphInc.csv", sep=',')
#graphInc

'Noise - Vehicle' 'Noise' 'Noise - Residential' 'Noise - Street/Sidewalk'



In [65]:

    
# Get all noise complaints for New York City Only
noise1 = realNYC[realNYC['Complaint Type'] == 'Noise - Vehicle']
noise2 = realNYC[realNYC['Complaint Type'] == 'Noise']
noise3 = realNYC[realNYC['Complaint Type'] == 'Noise - Residential']
noise4 = realNYC[realNYC['Complaint Type'] == 'Noise - Street/Sidewalk']
noise5 = realNYC[realNYC['Complaint Type'] == 'Noise - Commercial']



In [66]:

    
# Combine the different noise complaints
frames = [noise1,noise2,noise3,noise4,noise5]
noise = pd.concat(frames)
noise = noise.reset_index(drop=True)



In [150]:

    
noiseOut = noise.rename(columns={'Incident Zip':'ZIP'})
noiseOut = noiseOut['ZIP']
noiseOut = noiseOut.value_counts()

# Convert back to Dataframe
noiseOut = pd.DataFrame(noiseOut).reset_index()
noiseOut = noiseOut.rename(columns={'index':'ZIP','ZIP':'Noise'})

noiseOut['ZipCode'] = noiseOut['ZIP']
noiseOut = noiseOut.set_index('ZIP')
noiseOut

noiseStep = graphOut
# Concat together
noiseTry = [noiseStep,noiseOut]
noiseOutp = pd.concat(noiseTry, axis=1)
#noiseOutp



In [151]:

    
# Get  food complaints
foodPosioning = realNYC[realNYC['Complaint Type'] == 'Food Poisoning']
foodEstablishment = realNYC[realNYC['Complaint Type'] == 'Food Establishment']
frames = [foodPosioning,foodEstablishment]
food = pd.concat(frames)
food = food.reset_index(drop=True)



In [152]:

    
foodOut = food.rename(columns={'Incident Zip':'ZIP'})
foodOut = foodOut['ZIP']
foodOut = foodOut.value_counts()

# Convert back to Dataframe
foodOut = pd.DataFrame(foodOut).reset_index()
foodOut = foodOut.rename(columns={'index':'ZIP','ZIP':'Food'})

foodOut = foodOut.set_index('ZIP')
foodOut

foodStep = graphOut
# Concat together
foodTry = [foodStep,foodOut]
foodOutp = pd.concat(foodTry, axis=1)
#foodOutp



In [140]:

    
# Homeless People
homelessPerson = realNYC[realNYC['Complaint Type'] == 'Homeless Person Assistance'] 
HomelessEncampment = realNYC[realNYC['Complaint Type'] == 'Homeless Encampment'] 
frame = [homelessPerson,HomelessEncampment]
homeless = pd.concat(frame)
homeless = homeless.reset_index(drop=True)



In [153]:

    
homeOut = homeless.rename(columns={'Incident Zip':'ZIP'})
homeOut = homeOut['ZIP']
homeOut = homeOut.value_counts()

# Convert back to Dataframe
homeOut = pd.DataFrame(homeOut).reset_index()
homeOut = homeOut.rename(columns={'index':'ZIP','ZIP':'Homeless'})

homeOut = homeOut.set_index('ZIP')
homeOut

homeStep = graphOut
# Concat together
homeTry = [homeStep,homeOut]
homeOutp = pd.concat(homeTry, axis=1)
#homeOutp



In [74]:

    
# Neighbourhood Condition

streetRoad  = realNYC[realNYC['Complaint Type'] == 'Street Condition']
streetLight  = realNYC[realNYC['Complaint Type'] == 'Street Light Condition']
sweeping = realNYC[realNYC['Complaint Type'] == 'Sweeping/Inadequate']
graffiti = realNYC[realNYC['Complaint Type'] == 'Graffiti']
derelictV = realNYC[realNYC['Complaint Type'] == 'Derelict Vehicle']

frames = [streetRoad,streetLight,derelictV,sweeping,graffiti]
neighbourhood = pd.concat(frames)
neighbourhood = neighbourhood.reset_index(drop=True)



In [154]:

    
neighOut = neighbourhood.rename(columns={'Incident Zip':'ZIP'})
neighOut = neighOut['ZIP']
neighOut = neighOut.value_counts()

# Convert back to Dataframe
neighOut = pd.DataFrame(neighOut).reset_index()
neighOut = neighOut.rename(columns={'index':'ZIP','ZIP':'Neighbourhood'})
neighOut = neighOut.set_index('ZIP')
neighOut

neighStep = graphOut
# Concat together
neighTry = [neighStep,neighOut]
neighOutp = pd.concat(neighTry, axis=1)
#neighOutp



In [142]:

    
# Sanitation
rodent = realNYC[realNYC['Complaint Type'] == 'Rodent']
dirty = realNYC[realNYC['Complaint Type'] == 'Dirty Conditions']
sanitationC = realNYC[realNYC['Complaint Type'] == 'Sanitation Condition']
sewer = realNYC[realNYC['Complaint Type'] == 'Sewer']
bask = realNYC[realNYC['Complaint Type'] == 'Overflowing Recycling Baskets']
unsan = realNYC[realNYC['Complaint Type'] == 'UNSANITARY CONDITION']
missed = realNYC[realNYC['Complaint Type'] == 'Missed Collection (All Materials)']

frames = [rodent,dirty,sanitationC,sewer,bask,unsan,missed]
sanitation = pd.concat(frames)
sanitation = sanitation.reset_index(drop=True)



In [211]:

    
sanOut = sanitation.rename(columns={'Incident Zip':'ZIP'})
sanOut = sanOut['ZIP']
sanOut = sanOut.value_counts()
#len(sanOut)

# Convert back to Dataframe
sanOut = pd.DataFrame(sanOut).reset_index()
sanOut = sanOut.rename(columns={'index':'ZIP','ZIP':'Sanitation'})
sanOut = sanOut.set_index('ZIP')
sanOut

sanStep = graphOut
# Concat together
sanTry = [sanStep,sanOut]
sanOutp = pd.concat(sanTry, axis=1)
#sanOutp



In [328]:

    
frameB = [sanOut,foodOut,homeOut,neighOut,noiseOut]
graphMove = pd.concat(frameB, axis=1)



In [330]:

    
#Fill in the holes
g2 = graphMove.fillna(0)









    Out[330]:






  
    
      
      Sanitation
      Food
      Homeless
      Neighbourhood
      Noise
      ZipCode
    
  
  
    
      10001
      993.0
      281.0
      3501.0
      815.0
      4882.0
      10001
    
    
      10002
      1756.0
      247.0
      625.0
      1960.0
      8631.0
      10002
    
    
      10003
      1465.0
      399.0
      4377.0
      1209.0
      6344.0
      10003
    
    
      10004
      97.0
      70.0
      311.0
      260.0
      533.0
      10004
    
    
      10005
      71.0
      35.0
      321.0
      272.0
      1022.0
      10005
    
    
      10006
      46.0
      21.0
      145.0
      144.0
      567.0
      10006
    
    
      10007
      155.0
      88.0
      360.0
      461.0
      1179.0
      10007
    
    
      10009
      1481.0
      144.0
      1348.0
      816.0
      7564.0
      10009
    
    
      10010
      706.0
      186.0
      1106.0
      506.0
      2838.0
      10010
    
    
      10011
      1654.0
      328.0
      2610.0
      1019.0
      4141.0
      10011
    
    
      10012
      926.0
      233.0
      1575.0
      781.0
      3929.0
      10012
    
    
      10013
      1092.0
      304.0
      859.0
      1191.0
      3412.0
      10013
    
    
      10014
      1292.0
      186.0
      896.0
      1027.0
      3719.0
      10014
    
    
      10016
      1157.0
      310.0
      2455.0
      1058.0
      5824.0
      10016
    
    
      10017
      289.0
      235.0
      1994.0
      462.0
      1464.0
      10017
    
    
      10018
      394.0
      188.0
      1933.0
      393.0
      1364.0
      10018
    
    
      10019
      1246.0
      295.0
      3009.0
      1208.0
      4490.0
      10019
    
    
      10020
      17.0
      12.0
      58.0
      41.0
      21.0
      10020
    
    
      10021
      760.0
      134.0
      553.0
      774.0
      1764.0
      10021
    
    
      10022
      757.0
      252.0
      1478.0
      1046.0
      2041.0
      10022
    
    
      10023
      909.0
      186.0
      3492.0
      845.0
      2805.0
      10023
    
    
      10024
      1134.0
      161.0
      1654.0
      884.0
      3267.0
      10024
    
    
      10025
      2483.0
      215.0
      1895.0
      1081.0
      6061.0
      10025
    
    
      10026
      1432.0
      113.0
      653.0
      462.0
      4426.0
      10026
    
    
      10027
      2039.0
      174.0
      1184.0
      800.0
      6886.0
      10027
    
    
      10028
      935.0
      156.0
      588.0
      578.0
      2654.0
      10028
    
    
      10029
      2066.0
      474.0
      554.0
      734.0
      6511.0
      10029
    
    
      10030
      1513.0
      45.0
      167.0
      276.0
      3891.0
      10030
    
    
      10031
      2699.0
      116.0
      479.0
      467.0
      10029.0
      10031
    
    
      10032
      2330.0
      82.0
      144.0
      571.0
      7603.0
      10032
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      11379
      1251.0
      32.0
      21.0
      1699.0
      716.0
      11379
    
    
      11385
      4361.0
      179.0
      104.0
      3706.0
      5261.0
      11385
    
    
      11411
      808.0
      18.0
      10.0
      1141.0
      411.0
      11411
    
    
      11412
      2026.0
      24.0
      14.0
      1589.0
      1012.0
      11412
    
    
      11413
      1608.0
      32.0
      17.0
      2058.0
      716.0
      11413
    
    
      11414
      1651.0
      51.0
      31.0
      1471.0
      759.0
      11414
    
    
      11415
      606.0
      29.0
      53.0
      630.0
      1510.0
      11415
    
    
      11416
      1033.0
      30.0
      28.0
      1249.0
      1278.0
      11416
    
    
      11417
      2403.0
      52.0
      40.0
      1715.0
      1084.0
      11417
    
    
      11418
      1601.0
      71.0
      62.0
      1737.0
      1737.0
      11418
    
    
      11419
      2208.0
      92.0
      16.0
      1534.0
      2452.0
      11419
    
    
      11420
      2543.0
      74.0
      20.0
      2592.0
      2596.0
      11420
    
    
      11421
      1236.0
      35.0
      20.0
      1583.0
      1900.0
      11421
    
    
      11422
      1197.0
      29.0
      9.0
      1583.0
      623.0
      11422
    
    
      11423
      1330.0
      25.0
      37.0
      1155.0
      1074.0
      11423
    
    
      11426
      528.0
      15.0
      2.0
      787.0
      278.0
      11426
    
    
      11427
      653.0
      24.0
      17.0
      1028.0
      754.0
      11427
    
    
      11428
      632.0
      25.0
      17.0
      1006.0
      642.0
      11428
    
    
      11429
      939.0
      10.0
      20.0
      1033.0
      436.0
      11429
    
    
      11430
      13.0
      25.0
      8.0
      65.0
      8.0
      11430
    
    
      11432
      2241.0
      136.0
      215.0
      1762.0
      1926.0
      11432
    
    
      11433
      2392.0
      29.0
      82.0
      1407.0
      1352.0
      11433
    
    
      11434
      2843.0
      78.0
      31.0
      2531.0
      1733.0
      11434
    
    
      11435
      2114.0
      56.0
      107.0
      1861.0
      2133.0
      11435
    
    
      11436
      901.0
      16.0
      13.0
      989.0
      726.0
      11436
    
    
      11691
      2723.0
      45.0
      33.0
      2034.0
      2225.0
      11691
    
    
      11692
      699.0
      23.0
      4.0
      667.0
      862.0
      11692
    
    
      11693
      498.0
      20.0
      11.0
      505.0
      790.0
      11693
    
    
      11694
      895.0
      28.0
      9.0
      962.0
      991.0
      11694
    
    
      11697
      14.0
      4.0
      4.0
      37.0
      11.0
      11697
    
  

222 rows × 6 columns



In [304]:

    
# Remove any misformed zip codes
g2 = g2[g2['ZipCode'] > 0]
g2









    Out[304]:






  
    
      
      Sanitation
      Food
      Homeless
      Neighbourhood
      Noise
      ZipCode
    
  
  
    
      10001
      993.0
      281.0
      3501.0
      815.0
      4882.0
      10001
    
    
      10002
      1756.0
      247.0
      625.0
      1960.0
      8631.0
      10002
    
    
      10003
      1465.0
      399.0
      4377.0
      1209.0
      6344.0
      10003
    
    
      10004
      97.0
      70.0
      311.0
      260.0
      533.0
      10004
    
    
      10005
      71.0
      35.0
      321.0
      272.0
      1022.0
      10005
    
    
      10006
      46.0
      21.0
      145.0
      144.0
      567.0
      10006
    
    
      10007
      155.0
      88.0
      360.0
      461.0
      1179.0
      10007
    
    
      10009
      1481.0
      144.0
      1348.0
      816.0
      7564.0
      10009
    
    
      10010
      706.0
      186.0
      1106.0
      506.0
      2838.0
      10010
    
    
      10011
      1654.0
      328.0
      2610.0
      1019.0
      4141.0
      10011
    
    
      10012
      926.0
      233.0
      1575.0
      781.0
      3929.0
      10012
    
    
      10013
      1092.0
      304.0
      859.0
      1191.0
      3412.0
      10013
    
    
      10014
      1292.0
      186.0
      896.0
      1027.0
      3719.0
      10014
    
    
      10016
      1157.0
      310.0
      2455.0
      1058.0
      5824.0
      10016
    
    
      10017
      289.0
      235.0
      1994.0
      462.0
      1464.0
      10017
    
    
      10018
      394.0
      188.0
      1933.0
      393.0
      1364.0
      10018
    
    
      10019
      1246.0
      295.0
      3009.0
      1208.0
      4490.0
      10019
    
    
      10020
      17.0
      12.0
      58.0
      41.0
      21.0
      10020
    
    
      10021
      760.0
      134.0
      553.0
      774.0
      1764.0
      10021
    
    
      10022
      757.0
      252.0
      1478.0
      1046.0
      2041.0
      10022
    
    
      10023
      909.0
      186.0
      3492.0
      845.0
      2805.0
      10023
    
    
      10024
      1134.0
      161.0
      1654.0
      884.0
      3267.0
      10024
    
    
      10025
      2483.0
      215.0
      1895.0
      1081.0
      6061.0
      10025
    
    
      10026
      1432.0
      113.0
      653.0
      462.0
      4426.0
      10026
    
    
      10027
      2039.0
      174.0
      1184.0
      800.0
      6886.0
      10027
    
    
      10028
      935.0
      156.0
      588.0
      578.0
      2654.0
      10028
    
    
      10029
      2066.0
      474.0
      554.0
      734.0
      6511.0
      10029
    
    
      10030
      1513.0
      45.0
      167.0
      276.0
      3891.0
      10030
    
    
      10031
      2699.0
      116.0
      479.0
      467.0
      10029.0
      10031
    
    
      10032
      2330.0
      82.0
      144.0
      571.0
      7603.0
      10032
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      11379
      1251.0
      32.0
      21.0
      1699.0
      716.0
      11379
    
    
      11385
      4361.0
      179.0
      104.0
      3706.0
      5261.0
      11385
    
    
      11411
      808.0
      18.0
      10.0
      1141.0
      411.0
      11411
    
    
      11412
      2026.0
      24.0
      14.0
      1589.0
      1012.0
      11412
    
    
      11413
      1608.0
      32.0
      17.0
      2058.0
      716.0
      11413
    
    
      11414
      1651.0
      51.0
      31.0
      1471.0
      759.0
      11414
    
    
      11415
      606.0
      29.0
      53.0
      630.0
      1510.0
      11415
    
    
      11416
      1033.0
      30.0
      28.0
      1249.0
      1278.0
      11416
    
    
      11417
      2403.0
      52.0
      40.0
      1715.0
      1084.0
      11417
    
    
      11418
      1601.0
      71.0
      62.0
      1737.0
      1737.0
      11418
    
    
      11419
      2208.0
      92.0
      16.0
      1534.0
      2452.0
      11419
    
    
      11420
      2543.0
      74.0
      20.0
      2592.0
      2596.0
      11420
    
    
      11421
      1236.0
      35.0
      20.0
      1583.0
      1900.0
      11421
    
    
      11422
      1197.0
      29.0
      9.0
      1583.0
      623.0
      11422
    
    
      11423
      1330.0
      25.0
      37.0
      1155.0
      1074.0
      11423
    
    
      11426
      528.0
      15.0
      2.0
      787.0
      278.0
      11426
    
    
      11427
      653.0
      24.0
      17.0
      1028.0
      754.0
      11427
    
    
      11428
      632.0
      25.0
      17.0
      1006.0
      642.0
      11428
    
    
      11429
      939.0
      10.0
      20.0
      1033.0
      436.0
      11429
    
    
      11430
      13.0
      25.0
      8.0
      65.0
      8.0
      11430
    
    
      11432
      2241.0
      136.0
      215.0
      1762.0
      1926.0
      11432
    
    
      11433
      2392.0
      29.0
      82.0
      1407.0
      1352.0
      11433
    
    
      11434
      2843.0
      78.0
      31.0
      2531.0
      1733.0
      11434
    
    
      11435
      2114.0
      56.0
      107.0
      1861.0
      2133.0
      11435
    
    
      11436
      901.0
      16.0
      13.0
      989.0
      726.0
      11436
    
    
      11691
      2723.0
      45.0
      33.0
      2034.0
      2225.0
      11691
    
    
      11692
      699.0
      23.0
      4.0
      667.0
      862.0
      11692
    
    
      11693
      498.0
      20.0
      11.0
      505.0
      790.0
      11693
    
    
      11694
      895.0
      28.0
      9.0
      962.0
      991.0
      11694
    
    
      11697
      14.0
      4.0
      4.0
      37.0
      11.0
      11697
    
  

203 rows × 6 columns



In [305]:

    
# Save dataset
g2.to_csv("graphMove.csv", sep=',')



In [326]:

    
# Heat Maps
geo_data = {'lat':neighbourhood['Latitude'].values, 'lon':neighbourhood['Longitude'].values}
#geo_data2 = {'lat':sanitation['Latitude'].values, 'lon':sanitation['Longitude'].values}
#geo_data3 = {'lat':neighbourhood['Latitude'].values, 'lon':neighbourhood['Longitude'].values}
#geo_data



In [327]:

    
import geoplotlib as gp
from geoplotlib.utils import BoundingBox

 # Prepare data for bounding box
max_lat = max(geo_data['lat'])
print max_lat
min_lon = min(geo_data['lon'])
    #print min_lon
min_lat = min(geo_data['lat'])
    #print min_lat
max_lon = max(geo_data['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat, west=min_lon, south=min_lat, east=max_lon)
gp.set_bbox(bbox)
gp.kde(geo_data,5)
gp.show()









    



40.9128687953
('smallest non-zero count', 2.1494359633152135e-09)
('max count:', 14.263694107127527)
('smallest non-zero count', 5.8081207521849619e-08)
('max count:', 5.2529070043199892)
('smallest non-zero count', 9.8343065261074206e-09)
('max count:', 5.2529070043199892)
('smallest non-zero count', 7.7441610029132826e-08)
('max count:', 5.2529070043199892)
('smallest non-zero count', 4.2292940046479448e-06)
('max count:', 1.7508267523673384)



In [ ]:

    
# Prepare data for bounding box
max_lat2 = max(geo_data2['lat'])
   
min_lon2 = min(geo_data2['lon'])
    #print min_lon
min_lat2 = min(geo_data2['lat'])
    #print min_lat
max_lon2 = max(geo_data2['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat2, west=min_lon2, south=min_lat2, east=max_lon2)
gp.set_bbox(bbox)
gp.kde(geo_data2,5)
gp.show()



In [ ]:

    
import geoplotlib as gp
from geoplotlib.utils import BoundingBox

 # Prepare data for bounding box
max_lat3 = max(geo_data['lat'])
   
min_lon3 = min(geo_data['lon'])
    #print min_lon
min_lat3 = min(geo_data['lat'])
    #print min_lat
max_lon3 = max(geo_data['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat3, west=min_lon3, south=min_lat3, east=max_lon3)
gp.set_bbox(bbox)
gp.kde(geo_data3,3)
gp.show()

	cc	k2	k3	k4	k5	k6	k7	k8	k9	k10	ZipCode
ZIP
11204	0	0	1	3	3	1	0	1	3	3	11204
11205	0	0	1	3	3	1	0	1	3	3	11205
11211	0	0	1	3	3	1	0	1	3	3	11211
11234	0	0	1	3	3	1	0	1	3	3	11234
11422	0	0	1	0	0	3	3	3	6	0	11422
11413	0	0	1	0	0	3	3	3	6	0	11413
11358	0	0	1	0	0	3	3	7	0	6	11358
11355	0	0	1	0	0	3	3	7	0	6	11355
10003	0	1	0	1	1	2	2	0	1	1	10003
10025	0	1	0	1	1	2	2	0	1	1	10025
10128	0	1	0	1	1	2	2	0	1	1	10128
11225	0	0	1	3	3	1	0	1	3	3	11225
10001	0	1	0	1	1	2	2	0	1	1	10001
10470	0	1	2	2	2	0	4	2	2	2	10470
10452	0	1	2	2	2	0	4	2	2	2	10452
11210	0	0	1	3	3	1	0	1	3	3	11210
11207	0	0	1	3	3	1	0	1	3	3	11207
11361	0	0	1	0	0	3	3	7	0	6	11361
11435	0	0	1	0	0	3	3	3	6	0	11435
11419	0	0	1	0	0	3	3	3	6	0	11419
10467	0	1	2	2	2	0	4	2	2	2	10467
10014	0	1	0	1	1	2	2	0	1	1	10014
11411	0	0	1	0	0	3	3	3	6	0	11411
11691	0	0	1	0	4	4	5	4	4	5	11691
10306	0	1	2	2	2	5	1	5	5	4	10306
11421	0	0	1	0	0	3	3	3	6	0	11421
11368	0	0	1	0	0	3	3	7	0	6	11368
11235	0	0	1	3	3	1	0	1	8	9	11235
10005	0	1	0	1	1	2	2	0	1	8	10005
10032	0	1	0	1	1	2	2	0	1	1	10032
...	...	...	...	...	...	...	...	...	...	...	...
10278	0	1	2	2	2	5	1	5	5	4	10278
10105	0	1	0	1	1	2	2	0	1	8	10105
11359	0	0	1	0	0	3	3	7	0	6	11359
10122	0	1	0	1	1	2	2	0	1	1	10122
10111	0	1	0	1	1	2	2	0	1	1	10111
10168	0	1	0	1	1	5	1	5	5	4	10168
10120	0	1	0	1	1	2	2	0	1	1	10120
10176	0	1	0	1	1	5	1	5	5	4	10176
10110	0	1	0	1	1	2	2	0	1	1	10110
10048	0	1	0	1	1	2	2	0	1	1	10048
10174	0	1	0	1	1	5	1	5	5	4	10174
10177	0	1	0	1	1	5	1	5	5	4	10177
10165	0	1	0	1	1	5	1	5	5	8	10165
10151	0	1	0	1	1	2	2	0	1	1	10151
10121	0	1	0	1	1	2	2	0	1	1	10121
10167	0	1	0	1	1	5	1	5	5	4	10167
10279	0	1	2	2	2	5	1	5	5	4	10279
10152	0	1	0	1	1	2	2	0	1	8	10152
10169	0	1	0	1	1	5	1	5	5	4	10169
10173	0	1	0	1	1	5	1	5	5	4	10173
10055	0	1	0	1	1	2	2	0	1	8	10055
10158	0	1	0	1	1	2	2	0	1	1	10158
10155	0	1	0	1	1	2	2	0	1	8	10155
10172	0	1	0	1	1	5	1	5	5	4	10172
10170	0	1	0	1	1	5	1	5	5	8	10170
11242	0	0	1	3	3	1	0	1	3	3	11242
10106	0	1	0	1	1	2	2	0	1	1	10106
10178	0	1	0	1	1	5	1	5	5	4	10178
10171	0	1	0	1	1	5	1	5	5	4	10171
10166	0	1	0	1	1	5	1	5	5	4	10166

	Sanitation	Food	Homeless	Neighbourhood	Noise	ZipCode
10001	993.0	281.0	3501.0	815.0	4882.0	10001
10002	1756.0	247.0	625.0	1960.0	8631.0	10002
10003	1465.0	399.0	4377.0	1209.0	6344.0	10003
10004	97.0	70.0	311.0	260.0	533.0	10004
10005	71.0	35.0	321.0	272.0	1022.0	10005
10006	46.0	21.0	145.0	144.0	567.0	10006
10007	155.0	88.0	360.0	461.0	1179.0	10007
10009	1481.0	144.0	1348.0	816.0	7564.0	10009
10010	706.0	186.0	1106.0	506.0	2838.0	10010
10011	1654.0	328.0	2610.0	1019.0	4141.0	10011
10012	926.0	233.0	1575.0	781.0	3929.0	10012
10013	1092.0	304.0	859.0	1191.0	3412.0	10013
10014	1292.0	186.0	896.0	1027.0	3719.0	10014
10016	1157.0	310.0	2455.0	1058.0	5824.0	10016
10017	289.0	235.0	1994.0	462.0	1464.0	10017
10018	394.0	188.0	1933.0	393.0	1364.0	10018
10019	1246.0	295.0	3009.0	1208.0	4490.0	10019
10020	17.0	12.0	58.0	41.0	21.0	10020
10021	760.0	134.0	553.0	774.0	1764.0	10021
10022	757.0	252.0	1478.0	1046.0	2041.0	10022
10023	909.0	186.0	3492.0	845.0	2805.0	10023
10024	1134.0	161.0	1654.0	884.0	3267.0	10024
10025	2483.0	215.0	1895.0	1081.0	6061.0	10025
10026	1432.0	113.0	653.0	462.0	4426.0	10026
10027	2039.0	174.0	1184.0	800.0	6886.0	10027
10028	935.0	156.0	588.0	578.0	2654.0	10028
10029	2066.0	474.0	554.0	734.0	6511.0	10029
10030	1513.0	45.0	167.0	276.0	3891.0	10030
10031	2699.0	116.0	479.0	467.0	10029.0	10031
10032	2330.0	82.0	144.0	571.0	7603.0	10032
...	...	...	...	...	...	...
11379	1251.0	32.0	21.0	1699.0	716.0	11379
11385	4361.0	179.0	104.0	3706.0	5261.0	11385
11411	808.0	18.0	10.0	1141.0	411.0	11411
11412	2026.0	24.0	14.0	1589.0	1012.0	11412
11413	1608.0	32.0	17.0	2058.0	716.0	11413
11414	1651.0	51.0	31.0	1471.0	759.0	11414
11415	606.0	29.0	53.0	630.0	1510.0	11415
11416	1033.0	30.0	28.0	1249.0	1278.0	11416
11417	2403.0	52.0	40.0	1715.0	1084.0	11417
11418	1601.0	71.0	62.0	1737.0	1737.0	11418
11419	2208.0	92.0	16.0	1534.0	2452.0	11419
11420	2543.0	74.0	20.0	2592.0	2596.0	11420
11421	1236.0	35.0	20.0	1583.0	1900.0	11421
11422	1197.0	29.0	9.0	1583.0	623.0	11422
11423	1330.0	25.0	37.0	1155.0	1074.0	11423
11426	528.0	15.0	2.0	787.0	278.0	11426
11427	653.0	24.0	17.0	1028.0	754.0	11427
11428	632.0	25.0	17.0	1006.0	642.0	11428
11429	939.0	10.0	20.0	1033.0	436.0	11429
11430	13.0	25.0	8.0	65.0	8.0	11430
11432	2241.0	136.0	215.0	1762.0	1926.0	11432
11433	2392.0	29.0	82.0	1407.0	1352.0	11433
11434	2843.0	78.0	31.0	2531.0	1733.0	11434
11435	2114.0	56.0	107.0	1861.0	2133.0	11435
11436	901.0	16.0	13.0	989.0	726.0	11436
11691	2723.0	45.0	33.0	2034.0	2225.0	11691
11692	699.0	23.0	4.0	667.0	862.0	11692
11693	498.0	20.0	11.0	505.0	790.0	11693
11694	895.0	28.0	9.0	962.0	991.0	11694
11697	14.0	4.0	4.0	37.0	11.0	11697