In [1]:
import pandas as pd

#All 311 requests for 2016 -import zip as a string
quality = pd.io.parsers.read_csv('finalProjectDoomed/311_Service_Requests_from_2010_to_Present.csv', dtype={'Incident Zip': 'str'})


C:\Users\David\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (17,40,41,43,44,45,47,48,49) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [2]:
# Get all New York City Zip Codes
stuff = pd.io.parsers.read_csv('finalProjectDoomed/nycZip.csv',dtype={'ZipNy': 'str'}) # Get list of all New York City Zip Codes
zipNy= stuff[['ZipNy','Borough']] # Clean up Data
zipNy = zipNy.ZipNy.unique() # remove double entriesy
#zipNy

In [3]:
# Remove all non New York City Zips
realNYC = quality[quality['Incident Zip'].isin(zipNy)] # remove outliers from the data, Only New York City Zips
realNYC = realNYC.reset_index(drop=True)
#realNYC

In [4]:
# Get Show Zip occurences
test = realNYC
test = test.rename(columns={'Incident Zip':'ZIP'})
test = test['ZIP']
test = test.value_counts()
test


Out[4]:
11226    51099
10467    43086
10453    39814
10458    39157
10468    36876
11207    36728
11385    36032
10452    34797
10031    34661
10456    34143
11208    33193
10457    32925
11221    32676
10472    32041
11213    29780
11225    29486
11216    28792
11212    28692
11234    28441
11233    28260
11230    27534
10032    27429
10462    27293
11203    26879
11206    26609
10025    26484
10463    26241
10033    25924
11235    25812
11238    25785
         ...  
10172       22
10123       21
10165       20
10111       20
11359       18
10167       18
10106       18
10107       18
10153       18
10174       17
11241       16
10105       15
10158       15
10120       14
10173       13
10170       13
10177       12
10048       11
10115       11
11242       11
10155       10
10178       10
10176        9
10151        7
10045        7
10152        7
10122        6
10055        3
10166        3
10171        2
Name: ZIP, dtype: int64

In [5]:
#get Lat Lon for Zips Codes
zipy =  pd.io.parsers.read_csv('finalProjectDoomed/zipLatLon.csv',dtype={'ZIP': 'str'})# get Lat Lon of Zip 
realZip = zipy[zipy['ZIP'].isin(zipNy)]# Only Ny City ones
realZip = realZip.reset_index(drop=True)
realZip = realZip.set_index('ZIP')
#realZip

In [6]:
# Convert back to Dataframe
test = pd.DataFrame(test).reset_index()
test = test.rename(columns={'index':'ZIP','ZIP':'NUM'})
test = test.set_index('ZIP')
#test

In [7]:
# Concat together with Lat Lon
tried = [test,realZip]
graphOut = pd.concat(tried, axis=1)

In [8]:
import numpy as np
# rename columns and remove misformed Zip codes
graphOut = graphOut.rename(columns={'LNG':'LON'})
graphOut.index.name = 'ZIP'
graphOut = graphOut[np.isfinite(graphOut['LON'])]
graphOut = graphOut[np.isfinite(graphOut['LAT'])]
graphOut = graphOut[np.isfinite(graphOut['NUM'])]

In [9]:
#Save dataset
graphOut.to_csv("graphOut.csv", sep=',')

Kmeans Finding Clusters in the Data


In [10]:
# Get unique Complaint type occurences
get = realNYC['Complaint Type'].value_counts()
get = pd.DataFrame(get).reset_index()
get = get.rename(columns={'index':'Complaint Type','Complaint Type':'index'})
#get

In [11]:
# Create a dictionary to convert complaint types to numerical value
dictGet = {}

for index, row in get.iterrows():
    dictGet[row["Complaint Type"]] = index

#dictGet

In [14]:
# Pass all complaints through dictionary and store values
qualCom = realNYC['Complaint Type'].values
for (i, n) in enumerate(realNYC['Complaint Type']):
         qualCom[i] = dictGet[n]

In [15]:
# rename ZIP
q = realNYC
q = q.rename(columns={'Incident Zip':'ZIP'})
q = q[q['ZIP'] >= 0] # remove malformed zip code

In [16]:
import numpy as np
from sklearn.cluster import KMeans

# Perform Kmeans 2-10
clustZip = q['ZIP'].values

X=np.matrix(zip(qualCom,clustZip))
numK = range(1,11)
results = []

for i in numK:
        results.append(KMeans(n_clusters=i).fit(X))

In [17]:
#Create a new Panda with Zip column for storing kmeans output 
groupPlot = q[["ZIP"]]
groupPlot = groupPlot.reset_index(drop=True)

In [19]:
#Create new columns and assign data
groupPlot["cc"] = results[0].labels_ # Array of 0's
groupPlot["k2"] = results[1].labels_
groupPlot["k3"] = results[2].labels_
groupPlot["k4"] = results[3].labels_
groupPlot["k5"] = results[4].labels_
groupPlot["k6"] = results[5].labels_
groupPlot["k7"] = results[6].labels_
groupPlot["k8"] = results[7].labels_
groupPlot["k9"] = results[8].labels_
groupPlot["k10"] = results[9].labels_
groupPlot

In [ ]:
#Add Clusters

#for i in range(1,6):
  #  for j, cluster in enumerate(results[i].cluster_centers_) :
       # rowAdd = [cluster[1],cluster[0],1,0,0,0,0,0,0,0,0,0]
        #rowAdd[i+2] = j;
        #rowAdd[2] = i+1;
        #groupPlot.loc[len(prostitution)] = rowAdd

In [21]:
#remove duplicates from the data so only 1 for each zip
outy = groupPlot.drop_duplicates(['ZIP'])
outy['ZipCode'] = outy['ZIP'] 
outy = outy.set_index('ZIP')
outy


C:\Users\David\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
Out[21]:
cc k2 k3 k4 k5 k6 k7 k8 k9 k10 ZipCode
ZIP
11204 0 0 1 3 3 1 0 1 3 3 11204
11205 0 0 1 3 3 1 0 1 3 3 11205
11211 0 0 1 3 3 1 0 1 3 3 11211
11234 0 0 1 3 3 1 0 1 3 3 11234
11422 0 0 1 0 0 3 3 3 6 0 11422
11413 0 0 1 0 0 3 3 3 6 0 11413
11358 0 0 1 0 0 3 3 7 0 6 11358
11355 0 0 1 0 0 3 3 7 0 6 11355
10003 0 1 0 1 1 2 2 0 1 1 10003
10025 0 1 0 1 1 2 2 0 1 1 10025
10128 0 1 0 1 1 2 2 0 1 1 10128
11225 0 0 1 3 3 1 0 1 3 3 11225
10001 0 1 0 1 1 2 2 0 1 1 10001
10470 0 1 2 2 2 0 4 2 2 2 10470
10452 0 1 2 2 2 0 4 2 2 2 10452
11210 0 0 1 3 3 1 0 1 3 3 11210
11207 0 0 1 3 3 1 0 1 3 3 11207
11361 0 0 1 0 0 3 3 7 0 6 11361
11435 0 0 1 0 0 3 3 3 6 0 11435
11419 0 0 1 0 0 3 3 3 6 0 11419
10467 0 1 2 2 2 0 4 2 2 2 10467
10014 0 1 0 1 1 2 2 0 1 1 10014
11411 0 0 1 0 0 3 3 3 6 0 11411
11691 0 0 1 0 4 4 5 4 4 5 11691
10306 0 1 2 2 2 5 1 5 5 4 10306
11421 0 0 1 0 0 3 3 3 6 0 11421
11368 0 0 1 0 0 3 3 7 0 6 11368
11235 0 0 1 3 3 1 0 1 8 9 11235
10005 0 1 0 1 1 2 2 0 1 8 10005
10032 0 1 0 1 1 2 2 0 1 1 10032
... ... ... ... ... ... ... ... ... ... ... ...
10278 0 1 2 2 2 5 1 5 5 4 10278
10105 0 1 0 1 1 2 2 0 1 8 10105
11359 0 0 1 0 0 3 3 7 0 6 11359
10122 0 1 0 1 1 2 2 0 1 1 10122
10111 0 1 0 1 1 2 2 0 1 1 10111
10168 0 1 0 1 1 5 1 5 5 4 10168
10120 0 1 0 1 1 2 2 0 1 1 10120
10176 0 1 0 1 1 5 1 5 5 4 10176
10110 0 1 0 1 1 2 2 0 1 1 10110
10048 0 1 0 1 1 2 2 0 1 1 10048
10174 0 1 0 1 1 5 1 5 5 4 10174
10177 0 1 0 1 1 5 1 5 5 4 10177
10165 0 1 0 1 1 5 1 5 5 8 10165
10151 0 1 0 1 1 2 2 0 1 1 10151
10121 0 1 0 1 1 2 2 0 1 1 10121
10167 0 1 0 1 1 5 1 5 5 4 10167
10279 0 1 2 2 2 5 1 5 5 4 10279
10152 0 1 0 1 1 2 2 0 1 8 10152
10169 0 1 0 1 1 5 1 5 5 4 10169
10173 0 1 0 1 1 5 1 5 5 4 10173
10055 0 1 0 1 1 2 2 0 1 8 10055
10158 0 1 0 1 1 2 2 0 1 1 10158
10155 0 1 0 1 1 2 2 0 1 8 10155
10172 0 1 0 1 1 5 1 5 5 4 10172
10170 0 1 0 1 1 5 1 5 5 8 10170
11242 0 0 1 3 3 1 0 1 3 3 11242
10106 0 1 0 1 1 2 2 0 1 1 10106
10178 0 1 0 1 1 5 1 5 5 4 10178
10171 0 1 0 1 1 5 1 5 5 4 10171
10166 0 1 0 1 1 5 1 5 5 4 10166

224 rows × 11 columns


In [22]:
import numpy as np
#Combine with Zip Lat Lon data
graphStep = graphOut 
fram = [graphStep,outy]
graphKm = pd.concat(fram, axis=1)
graphKm = graphKm[np.isfinite(graphKm['k2'])] #Remove misformed zipcodes data
graphKm = graphKm[np.isfinite(graphKm['LAT'])]

graphKm.to_csv("graphKm.csv", sep=',')
#graphKm

In [23]:
#Save dataset
graphKm.to_csv("graphKm.csv", sep=',')

Cluster Income Data


In [ ]:
income = pd.read_csv('incomeData.csv')

In [ ]:
incomeNy = income[income['STATE'] == "NY"] 
incomeNy.to_csv("incomeNy.csv", sep=',')

In [29]:
import pandas as pd
income = pd.io.parsers.read_csv('finalProjectDoomed/incomeNy.csv', dtype={'zipcode': 'str'})

In [122]:
#income

In [30]:
incomeInc = income[income['zipcode'].isin(zipNy)] # Just New York City Zip

In [31]:
# Create data set to be used for CLustering
dfAgi = incomeInc['agi_stub'].values
dfZip = incomeInc['zipcode'].values
dfNum = incomeInc['N1'].values
print dfNum


[ 3880.  2530.  1850. ...,   240.   490.   180.]

In [32]:
# Create dataset for clustering
incY = []
incX = []

rani = len(dfAgi)

for i in range(0,rani):
    ranj = int(dfNum[i]) 
    #print ranj
    for j in range(0,ranj):
        incX.append(dfZip[i])
        incY.append(dfAgi[i])

In [34]:
import numpy as np
from sklearn.cluster import KMeans
 
clustZip = incY

X=np.matrix(zip(incX,clustZip))
numK = range(1,11)
resultsInc = []

for i in numK:
        resultsInc.append(KMeans(n_clusters=i).fit(X))

In [40]:
groupPlotInc = graphOut
del groupPlotInc['NUM']
groupPlotInc

In [49]:
groupInc = incX
groupInc = pd.DataFrame(groupInc).reset_index()
del groupInc['index']
groupInc = groupInc.rename(columns={'0':'Zip'})
groupInc = groupInc.reset_index(drop=True)
groupInc.columns = ['ZIP']
groupInc['ZipCode'] = groupInc['ZIP']
groupInc = groupInc.set_index('ZIP')
groupInc

In [44]:
#Create new columns and assign data
groupInc["cc"] = resultsInc[0].labels_ # Array of 0's
groupInc["k2"] = resultsInc[1].labels_
groupInc["k3"] = resultsInc[2].labels_
groupInc["k4"] = resultsInc[3].labels_
groupInc["k5"] = resultsInc[4].labels_
groupInc["k6"] = resultsInc[5].labels_
groupInc["k7"] = resultsInc[6].labels_
groupInc["k8"] = resultsInc[7].labels_
groupInc["k9"] = resultsInc[8].labels_
groupInc["k10"] = resultsInc[9].labels_

In [51]:
outInc = groupInc.drop_duplicates(['ZipCode']).reset_index()
outInc = outInc.set_index('ZIP')
#del outInc['index']
outInc

In [52]:
import numpy as np

fram = [groupPlotInc,outInc]
graphInc = pd.concat(fram, axis=1)
graphInc = graphInc[np.isfinite(graphInc['k2'])] # remove zip codes no data for

graphInc.to_csv("graphInc.csv", sep=',')
#graphInc

'Noise - Vehicle' 'Noise' 'Noise - Residential' 'Noise - Street/Sidewalk'


In [65]:
# Get all noise complaints for New York City Only
noise1 = realNYC[realNYC['Complaint Type'] == 'Noise - Vehicle']
noise2 = realNYC[realNYC['Complaint Type'] == 'Noise']
noise3 = realNYC[realNYC['Complaint Type'] == 'Noise - Residential']
noise4 = realNYC[realNYC['Complaint Type'] == 'Noise - Street/Sidewalk']
noise5 = realNYC[realNYC['Complaint Type'] == 'Noise - Commercial']

In [66]:
# Combine the different noise complaints
frames = [noise1,noise2,noise3,noise4,noise5]
noise = pd.concat(frames)
noise = noise.reset_index(drop=True)

In [150]:
noiseOut = noise.rename(columns={'Incident Zip':'ZIP'})
noiseOut = noiseOut['ZIP']
noiseOut = noiseOut.value_counts()

# Convert back to Dataframe
noiseOut = pd.DataFrame(noiseOut).reset_index()
noiseOut = noiseOut.rename(columns={'index':'ZIP','ZIP':'Noise'})

noiseOut['ZipCode'] = noiseOut['ZIP']
noiseOut = noiseOut.set_index('ZIP')
noiseOut

noiseStep = graphOut
# Concat together
noiseTry = [noiseStep,noiseOut]
noiseOutp = pd.concat(noiseTry, axis=1)
#noiseOutp

In [151]:
# Get  food complaints
foodPosioning = realNYC[realNYC['Complaint Type'] == 'Food Poisoning']
foodEstablishment = realNYC[realNYC['Complaint Type'] == 'Food Establishment']
frames = [foodPosioning,foodEstablishment]
food = pd.concat(frames)
food = food.reset_index(drop=True)

In [152]:
foodOut = food.rename(columns={'Incident Zip':'ZIP'})
foodOut = foodOut['ZIP']
foodOut = foodOut.value_counts()

# Convert back to Dataframe
foodOut = pd.DataFrame(foodOut).reset_index()
foodOut = foodOut.rename(columns={'index':'ZIP','ZIP':'Food'})

foodOut = foodOut.set_index('ZIP')
foodOut

foodStep = graphOut
# Concat together
foodTry = [foodStep,foodOut]
foodOutp = pd.concat(foodTry, axis=1)
#foodOutp

In [140]:
# Homeless People
homelessPerson = realNYC[realNYC['Complaint Type'] == 'Homeless Person Assistance'] 
HomelessEncampment = realNYC[realNYC['Complaint Type'] == 'Homeless Encampment'] 
frame = [homelessPerson,HomelessEncampment]
homeless = pd.concat(frame)
homeless = homeless.reset_index(drop=True)

In [153]:
homeOut = homeless.rename(columns={'Incident Zip':'ZIP'})
homeOut = homeOut['ZIP']
homeOut = homeOut.value_counts()

# Convert back to Dataframe
homeOut = pd.DataFrame(homeOut).reset_index()
homeOut = homeOut.rename(columns={'index':'ZIP','ZIP':'Homeless'})

homeOut = homeOut.set_index('ZIP')
homeOut

homeStep = graphOut
# Concat together
homeTry = [homeStep,homeOut]
homeOutp = pd.concat(homeTry, axis=1)
#homeOutp

In [74]:
# Neighbourhood Condition

streetRoad  = realNYC[realNYC['Complaint Type'] == 'Street Condition']
streetLight  = realNYC[realNYC['Complaint Type'] == 'Street Light Condition']
sweeping = realNYC[realNYC['Complaint Type'] == 'Sweeping/Inadequate']
graffiti = realNYC[realNYC['Complaint Type'] == 'Graffiti']
derelictV = realNYC[realNYC['Complaint Type'] == 'Derelict Vehicle']

frames = [streetRoad,streetLight,derelictV,sweeping,graffiti]
neighbourhood = pd.concat(frames)
neighbourhood = neighbourhood.reset_index(drop=True)

In [154]:
neighOut = neighbourhood.rename(columns={'Incident Zip':'ZIP'})
neighOut = neighOut['ZIP']
neighOut = neighOut.value_counts()

# Convert back to Dataframe
neighOut = pd.DataFrame(neighOut).reset_index()
neighOut = neighOut.rename(columns={'index':'ZIP','ZIP':'Neighbourhood'})
neighOut = neighOut.set_index('ZIP')
neighOut

neighStep = graphOut
# Concat together
neighTry = [neighStep,neighOut]
neighOutp = pd.concat(neighTry, axis=1)
#neighOutp

In [142]:
# Sanitation
rodent = realNYC[realNYC['Complaint Type'] == 'Rodent']
dirty = realNYC[realNYC['Complaint Type'] == 'Dirty Conditions']
sanitationC = realNYC[realNYC['Complaint Type'] == 'Sanitation Condition']
sewer = realNYC[realNYC['Complaint Type'] == 'Sewer']
bask = realNYC[realNYC['Complaint Type'] == 'Overflowing Recycling Baskets']
unsan = realNYC[realNYC['Complaint Type'] == 'UNSANITARY CONDITION']
missed = realNYC[realNYC['Complaint Type'] == 'Missed Collection (All Materials)']

frames = [rodent,dirty,sanitationC,sewer,bask,unsan,missed]
sanitation = pd.concat(frames)
sanitation = sanitation.reset_index(drop=True)

In [211]:
sanOut = sanitation.rename(columns={'Incident Zip':'ZIP'})
sanOut = sanOut['ZIP']
sanOut = sanOut.value_counts()
#len(sanOut)

# Convert back to Dataframe
sanOut = pd.DataFrame(sanOut).reset_index()
sanOut = sanOut.rename(columns={'index':'ZIP','ZIP':'Sanitation'})
sanOut = sanOut.set_index('ZIP')
sanOut

sanStep = graphOut
# Concat together
sanTry = [sanStep,sanOut]
sanOutp = pd.concat(sanTry, axis=1)
#sanOutp

In [328]:
frameB = [sanOut,foodOut,homeOut,neighOut,noiseOut]
graphMove = pd.concat(frameB, axis=1)

In [330]:
#Fill in the holes
g2 = graphMove.fillna(0)


Out[330]:
Sanitation Food Homeless Neighbourhood Noise ZipCode
10001 993.0 281.0 3501.0 815.0 4882.0 10001
10002 1756.0 247.0 625.0 1960.0 8631.0 10002
10003 1465.0 399.0 4377.0 1209.0 6344.0 10003
10004 97.0 70.0 311.0 260.0 533.0 10004
10005 71.0 35.0 321.0 272.0 1022.0 10005
10006 46.0 21.0 145.0 144.0 567.0 10006
10007 155.0 88.0 360.0 461.0 1179.0 10007
10009 1481.0 144.0 1348.0 816.0 7564.0 10009
10010 706.0 186.0 1106.0 506.0 2838.0 10010
10011 1654.0 328.0 2610.0 1019.0 4141.0 10011
10012 926.0 233.0 1575.0 781.0 3929.0 10012
10013 1092.0 304.0 859.0 1191.0 3412.0 10013
10014 1292.0 186.0 896.0 1027.0 3719.0 10014
10016 1157.0 310.0 2455.0 1058.0 5824.0 10016
10017 289.0 235.0 1994.0 462.0 1464.0 10017
10018 394.0 188.0 1933.0 393.0 1364.0 10018
10019 1246.0 295.0 3009.0 1208.0 4490.0 10019
10020 17.0 12.0 58.0 41.0 21.0 10020
10021 760.0 134.0 553.0 774.0 1764.0 10021
10022 757.0 252.0 1478.0 1046.0 2041.0 10022
10023 909.0 186.0 3492.0 845.0 2805.0 10023
10024 1134.0 161.0 1654.0 884.0 3267.0 10024
10025 2483.0 215.0 1895.0 1081.0 6061.0 10025
10026 1432.0 113.0 653.0 462.0 4426.0 10026
10027 2039.0 174.0 1184.0 800.0 6886.0 10027
10028 935.0 156.0 588.0 578.0 2654.0 10028
10029 2066.0 474.0 554.0 734.0 6511.0 10029
10030 1513.0 45.0 167.0 276.0 3891.0 10030
10031 2699.0 116.0 479.0 467.0 10029.0 10031
10032 2330.0 82.0 144.0 571.0 7603.0 10032
... ... ... ... ... ... ...
11379 1251.0 32.0 21.0 1699.0 716.0 11379
11385 4361.0 179.0 104.0 3706.0 5261.0 11385
11411 808.0 18.0 10.0 1141.0 411.0 11411
11412 2026.0 24.0 14.0 1589.0 1012.0 11412
11413 1608.0 32.0 17.0 2058.0 716.0 11413
11414 1651.0 51.0 31.0 1471.0 759.0 11414
11415 606.0 29.0 53.0 630.0 1510.0 11415
11416 1033.0 30.0 28.0 1249.0 1278.0 11416
11417 2403.0 52.0 40.0 1715.0 1084.0 11417
11418 1601.0 71.0 62.0 1737.0 1737.0 11418
11419 2208.0 92.0 16.0 1534.0 2452.0 11419
11420 2543.0 74.0 20.0 2592.0 2596.0 11420
11421 1236.0 35.0 20.0 1583.0 1900.0 11421
11422 1197.0 29.0 9.0 1583.0 623.0 11422
11423 1330.0 25.0 37.0 1155.0 1074.0 11423
11426 528.0 15.0 2.0 787.0 278.0 11426
11427 653.0 24.0 17.0 1028.0 754.0 11427
11428 632.0 25.0 17.0 1006.0 642.0 11428
11429 939.0 10.0 20.0 1033.0 436.0 11429
11430 13.0 25.0 8.0 65.0 8.0 11430
11432 2241.0 136.0 215.0 1762.0 1926.0 11432
11433 2392.0 29.0 82.0 1407.0 1352.0 11433
11434 2843.0 78.0 31.0 2531.0 1733.0 11434
11435 2114.0 56.0 107.0 1861.0 2133.0 11435
11436 901.0 16.0 13.0 989.0 726.0 11436
11691 2723.0 45.0 33.0 2034.0 2225.0 11691
11692 699.0 23.0 4.0 667.0 862.0 11692
11693 498.0 20.0 11.0 505.0 790.0 11693
11694 895.0 28.0 9.0 962.0 991.0 11694
11697 14.0 4.0 4.0 37.0 11.0 11697

222 rows × 6 columns


In [304]:
# Remove any misformed zip codes
g2 = g2[g2['ZipCode'] > 0]
g2


Out[304]:
Sanitation Food Homeless Neighbourhood Noise ZipCode
10001 993.0 281.0 3501.0 815.0 4882.0 10001
10002 1756.0 247.0 625.0 1960.0 8631.0 10002
10003 1465.0 399.0 4377.0 1209.0 6344.0 10003
10004 97.0 70.0 311.0 260.0 533.0 10004
10005 71.0 35.0 321.0 272.0 1022.0 10005
10006 46.0 21.0 145.0 144.0 567.0 10006
10007 155.0 88.0 360.0 461.0 1179.0 10007
10009 1481.0 144.0 1348.0 816.0 7564.0 10009
10010 706.0 186.0 1106.0 506.0 2838.0 10010
10011 1654.0 328.0 2610.0 1019.0 4141.0 10011
10012 926.0 233.0 1575.0 781.0 3929.0 10012
10013 1092.0 304.0 859.0 1191.0 3412.0 10013
10014 1292.0 186.0 896.0 1027.0 3719.0 10014
10016 1157.0 310.0 2455.0 1058.0 5824.0 10016
10017 289.0 235.0 1994.0 462.0 1464.0 10017
10018 394.0 188.0 1933.0 393.0 1364.0 10018
10019 1246.0 295.0 3009.0 1208.0 4490.0 10019
10020 17.0 12.0 58.0 41.0 21.0 10020
10021 760.0 134.0 553.0 774.0 1764.0 10021
10022 757.0 252.0 1478.0 1046.0 2041.0 10022
10023 909.0 186.0 3492.0 845.0 2805.0 10023
10024 1134.0 161.0 1654.0 884.0 3267.0 10024
10025 2483.0 215.0 1895.0 1081.0 6061.0 10025
10026 1432.0 113.0 653.0 462.0 4426.0 10026
10027 2039.0 174.0 1184.0 800.0 6886.0 10027
10028 935.0 156.0 588.0 578.0 2654.0 10028
10029 2066.0 474.0 554.0 734.0 6511.0 10029
10030 1513.0 45.0 167.0 276.0 3891.0 10030
10031 2699.0 116.0 479.0 467.0 10029.0 10031
10032 2330.0 82.0 144.0 571.0 7603.0 10032
... ... ... ... ... ... ...
11379 1251.0 32.0 21.0 1699.0 716.0 11379
11385 4361.0 179.0 104.0 3706.0 5261.0 11385
11411 808.0 18.0 10.0 1141.0 411.0 11411
11412 2026.0 24.0 14.0 1589.0 1012.0 11412
11413 1608.0 32.0 17.0 2058.0 716.0 11413
11414 1651.0 51.0 31.0 1471.0 759.0 11414
11415 606.0 29.0 53.0 630.0 1510.0 11415
11416 1033.0 30.0 28.0 1249.0 1278.0 11416
11417 2403.0 52.0 40.0 1715.0 1084.0 11417
11418 1601.0 71.0 62.0 1737.0 1737.0 11418
11419 2208.0 92.0 16.0 1534.0 2452.0 11419
11420 2543.0 74.0 20.0 2592.0 2596.0 11420
11421 1236.0 35.0 20.0 1583.0 1900.0 11421
11422 1197.0 29.0 9.0 1583.0 623.0 11422
11423 1330.0 25.0 37.0 1155.0 1074.0 11423
11426 528.0 15.0 2.0 787.0 278.0 11426
11427 653.0 24.0 17.0 1028.0 754.0 11427
11428 632.0 25.0 17.0 1006.0 642.0 11428
11429 939.0 10.0 20.0 1033.0 436.0 11429
11430 13.0 25.0 8.0 65.0 8.0 11430
11432 2241.0 136.0 215.0 1762.0 1926.0 11432
11433 2392.0 29.0 82.0 1407.0 1352.0 11433
11434 2843.0 78.0 31.0 2531.0 1733.0 11434
11435 2114.0 56.0 107.0 1861.0 2133.0 11435
11436 901.0 16.0 13.0 989.0 726.0 11436
11691 2723.0 45.0 33.0 2034.0 2225.0 11691
11692 699.0 23.0 4.0 667.0 862.0 11692
11693 498.0 20.0 11.0 505.0 790.0 11693
11694 895.0 28.0 9.0 962.0 991.0 11694
11697 14.0 4.0 4.0 37.0 11.0 11697

203 rows × 6 columns


In [305]:
# Save dataset
g2.to_csv("graphMove.csv", sep=',')

In [326]:
# Heat Maps
geo_data = {'lat':neighbourhood['Latitude'].values, 'lon':neighbourhood['Longitude'].values}
#geo_data2 = {'lat':sanitation['Latitude'].values, 'lon':sanitation['Longitude'].values}
#geo_data3 = {'lat':neighbourhood['Latitude'].values, 'lon':neighbourhood['Longitude'].values}
#geo_data

In [327]:
import geoplotlib as gp
from geoplotlib.utils import BoundingBox

 # Prepare data for bounding box
max_lat = max(geo_data['lat'])
print max_lat
min_lon = min(geo_data['lon'])
    #print min_lon
min_lat = min(geo_data['lat'])
    #print min_lat
max_lon = max(geo_data['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat, west=min_lon, south=min_lat, east=max_lon)
gp.set_bbox(bbox)
gp.kde(geo_data,5)
gp.show()


40.9128687953
('smallest non-zero count', 2.1494359633152135e-09)
('max count:', 14.263694107127527)
('smallest non-zero count', 5.8081207521849619e-08)
('max count:', 5.2529070043199892)
('smallest non-zero count', 9.8343065261074206e-09)
('max count:', 5.2529070043199892)
('smallest non-zero count', 7.7441610029132826e-08)
('max count:', 5.2529070043199892)
('smallest non-zero count', 4.2292940046479448e-06)
('max count:', 1.7508267523673384)

In [ ]:
# Prepare data for bounding box
max_lat2 = max(geo_data2['lat'])
   
min_lon2 = min(geo_data2['lon'])
    #print min_lon
min_lat2 = min(geo_data2['lat'])
    #print min_lat
max_lon2 = max(geo_data2['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat2, west=min_lon2, south=min_lat2, east=max_lon2)
gp.set_bbox(bbox)
gp.kde(geo_data2,5)
gp.show()

In [ ]:
import geoplotlib as gp
from geoplotlib.utils import BoundingBox

 # Prepare data for bounding box
max_lat3 = max(geo_data['lat'])
   
min_lon3 = min(geo_data['lon'])
    #print min_lon
min_lat3 = min(geo_data['lat'])
    #print min_lat
max_lon3 = max(geo_data['lon'])
    #print max_lon
        
    
    # Create and set bounding box for map of San Francisko
bbox = BoundingBox(north=max_lat3, west=min_lon3, south=min_lat3, east=max_lon3)
gp.set_bbox(bbox)
gp.kde(geo_data3,3)
gp.show()