notebook.community

Edit and run



In [1]:

    
import pandas as pd
from os import listdir
from os.path import isfile, join



In [2]:

    
data_example = pd.read_csv("data/2015-04/2015-04-city-of-london-street.csv")



In [3]:

    
data_example.head(5)









    Out[3]:






  
    
      
      Crime ID
      Month
      Reported by
      Falls within
      Longitude
      Latitude
      Location
      LSOA code
      LSOA name
      Crime type
      Last outcome category
      Context
    
  
  
    
      0
      9d28c69cc3695dc7972f8a990b8f2224d4aa10dcf87f8e...
      2015-04
      City of London Police
      City of London Police
      -0.111497
      51.518226
      On or near Pedestrian Subway
      E01000914
      Camden 028B
      Other theft
      Investigation complete; no suspect identified
      NaN
    
    
      1
      b8571f69a113df635ce0911394273418481246a7536de8...
      2015-04
      City of London Police
      City of London Police
      -0.113767
      51.517372
      On or near Stone Buildings
      E01000914
      Camden 028B
      Theft from the person
      Investigation complete; no suspect identified
      NaN
    
    
      2
      NaN
      2015-04
      City of London Police
      City of London Police
      -0.097736
      51.520206
      On or near Conference/Exhibition Centre
      E01000001
      City of London 001A
      Anti-social behaviour
      NaN
      NaN
    
    
      3
      NaN
      2015-04
      City of London Police
      City of London Police
      -0.097601
      51.520699
      On or near Carthusian Street
      E01000001
      City of London 001A
      Anti-social behaviour
      NaN
      NaN
    
    
      4
      05f9ae9d8f9c2d06457fa33060ceabb7fc6108d2ba8f37...
      2015-04
      City of London Police
      City of London Police
      -0.098572
      51.516767
      On or near King Edward Street
      E01000001
      City of London 001A
      Bicycle theft
      Investigation complete; no suspect identified
      NaN



In [4]:

    
data_example["Crime type"].unique()









    Out[4]:





array(['Other theft', 'Theft from the person', 'Anti-social behaviour',
       'Bicycle theft', 'Drugs', 'Public order', 'Shoplifting',
       'Violence and sexual offences', 'Other crime',
       'Criminal damage and arson', 'Burglary', 'Robbery', 'Vehicle crime',
       'Possession of weapons'], dtype=object)



In [5]:

    
data = pd.DataFrame()
dirs = listdir("data")

#listdir("data/{}".format(dirs[1]))
#join("data",dirs[1])
for i in dirs:
    if (i != ".DS_Store") and (i[:4] == "2015"):
        for l in listdir("data/{}".format(i)):
            if l != ".DS_Store":
                if l[-10:] == "street.csv":
                    tmp = pd.read_csv(join("data",i,l))
                    data = data.append(tmp)



In [6]:

    
data.shape









    Out[6]:





(3827573, 12)



In [7]:

    
coord = data[["Longitude","Latitude"]]



In [8]:

    
from math import radians, cos, sin, asin, sqrt
london_geo = {"Latitude": 51.507, "Longitude": -0.127}
def haversine(coord):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    lon1=london_geo["Longitude"]
    lat1=london_geo["Latitude"]
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, coord["Longitude"], coord["Latitude"]])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    return km



In [9]:

    
new_coord = coord[coord["Latitude"].notnull()]



In [10]:

    
new_coord.insert(len(new_coord.columns), "Weight",1)
new_coord.head()



In [11]:

    
new_coord = new_coord.groupby(["Latitude", "Longitude"], as_index=False)["Weight"].sum()



In [12]:

    
new_coord.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 585740 entries, 0 to 585739
Data columns (total 3 columns):
Latitude     585740 non-null float64
Longitude    585740 non-null float64
Weight       585740 non-null int64
dtypes: float64(2), int64(1)
memory usage: 17.9 MB



In [13]:

    
new_coord.insert(len(new_coord.columns), "Distance", new_coord.apply(haversine, axis=1))



In [14]:

    
new_coord[new_coord["Distance"] <= 30].shape









    Out[14]:





(68963, 4)



In [15]:

    
new_coord[new_coord["Distance"] <= 30].to_json("coord.json", orient="records")



In [ ]:

	Longitude	Latitude	Weight
0	-2.509930	51.410873	1
1	-2.512153	51.412941	1
2	-2.515816	51.408717	1
3	-2.511761	51.409966	1
4	-2.509126	51.416137	1

	Crime ID	Month	Reported by	Falls within	Longitude	Latitude	Location	LSOA code	LSOA name	Crime type	Last outcome category	Context
0	9d28c69cc3695dc7972f8a990b8f2224d4aa10dcf87f8e...	2015-04	City of London Police	City of London Police	-0.111497	51.518226	On or near Pedestrian Subway	E01000914	Camden 028B	Other theft	Investigation complete; no suspect identified	NaN
1	b8571f69a113df635ce0911394273418481246a7536de8...	2015-04	City of London Police	City of London Police	-0.113767	51.517372	On or near Stone Buildings	E01000914	Camden 028B	Theft from the person	Investigation complete; no suspect identified	NaN
2	NaN	2015-04	City of London Police	City of London Police	-0.097736	51.520206	On or near Conference/Exhibition Centre	E01000001	City of London 001A	Anti-social behaviour	NaN	NaN
3	NaN	2015-04	City of London Police	City of London Police	-0.097601	51.520699	On or near Carthusian Street	E01000001	City of London 001A	Anti-social behaviour	NaN	NaN
4	05f9ae9d8f9c2d06457fa33060ceabb7fc6108d2ba8f37...	2015-04	City of London Police	City of London Police	-0.098572	51.516767	On or near King Edward Street	E01000001	City of London 001A	Bicycle theft	Investigation complete; no suspect identified	NaN