In [4]:
import json
import pandas as pd
import urllib.request
import urllib.parse
In [5]:
def latlon_from_address(address):
url = 'http://loc.geopunt.be/v2/Location?'
#strip spaces for more robustness
values = {'q' : address.strip()}
data = urllib.parse.urlencode(values)
#get request is url + data
#for POST request you need data.encode('utf8') first and use binary_data as a second arg instead of a '+'
req = urllib.request.Request(url + data)
req.add_header('Content-Type', 'text/json')
response = urllib.request.urlopen(req)
binary_response = response.read()
decoded = binary_response.decode('utf8')
response.close()
jsonobj = json.loads(decoded)
results = jsonobj['LocationResult']
if len(results) == 0:
return None
location = results[0]['Location']
latkeyID = 'Lat_WGS84'
lonkeyID = 'Lon_WGS84'
return (location[latkeyID], location[lonkeyID])
In [6]:
all_data = pd.read_csv('EVA_restoswithlocations.csv')
all_data.head(n=3)
Out[6]:
In [7]:
df_notfound = all_data[all_data['missing']]
print(df_notfound.shape)
print(all_data.shape)
In [8]:
def overwriteAddress(full_address, index, dataframe):
parts = full_address.split(' ')
city = parts[-1]
zipcode = int(parts[-2])
street = " ".join(parts[:len(parts)-2])
latlon = latlon_from_address(full_address)
dataframe.set_value(index, 'street', street)
dataframe.set_value(index, 'city', city)
dataframe.set_value(index, 'zipcode', zipcode)
dataframe.set_value(index, 'full_address', full_address)
dataframe.set_value(index, 'latlon', latlon)
dataframe.set_value(index, 'missing', False)
def overwriteAddress2(street,zipcode,city, index, dataframe):
full_address = " ".join([street,zipcode,city])
latlon = latlon_from_address(full_address)
dataframe.set_value(index, 'street', street)
dataframe.set_value(index, 'city', city)
dataframe.set_value(index, 'zipcode', zipcode)
dataframe.set_value(index, 'full_address', full_address)
dataframe.set_value(index, 'latlon', latlon)
dataframe.set_value(index, 'missing', False)
In [9]:
df_notfound[0:10]
Out[9]:
In [10]:
overwriteAddress('Camille Huysmansstraat 126 3128 Tremelo',7,df_notfound)
overwriteAddress('Leeuwenstraat 23 2000 Antwerpen',16,df_notfound)
overwriteAddress('Rijselstraat 42 8900 Ieper',24,df_notfound)
overwriteAddress('Victor Van Sandelaan 33 9230 Wetteren',32,df_notfound)
overwriteAddress('Mirakelstraat 104A 8790 Waregem',46,df_notfound)
overwriteAddress('Sacramentsstraat 3 9100 Sint-Niklaas',62,df_notfound)
overwriteAddress('Koolstraat 107 9300 Aalst',68,df_notfound)
overwriteAddress('Adolf Buylstraat 44B 8400 Oostende',81,df_notfound)
overwriteAddress('Ferdinand Lousbergkaai 99 9000 Gent',91,df_notfound)
overwriteAddress('Koningin Maria-Hendrikaplein 65a 9000 Gent',98,df_notfound)
df_notfound[0:10]
Out[10]:
In [11]:
df_notfound[10:20]
Out[11]:
In [12]:
# spelfout, fr/nl, fr/nl, spelfout, postcode
overwriteAddress('Philippe de Champagnestraat 23 1000 Brussel',100,df_notfound)
overwriteAddress('Marie Collartstraat 1620 Drogenbos',124,df_notfound)
overwriteAddress('Rue Américaine 117 1050 Elsene',128,df_notfound)
overwriteAddress('Keizerstraat 23 2800 Mechelen',132,df_notfound)
overwriteAddress('Ulbeekstraat 21 3832 Wellen',139,df_notfound)
# afkorting+spelfouten, spelfout, stad, bad record, contractie
overwriteAddress('Sint Jakobstraat 63 8000 Brugge',140,df_notfound)
overwriteAddress('Zwartzustersstraat 16 3000 Leuven',147,df_notfound)
overwriteAddress('Gobbelsrode 68a 3220 Holsbeek',153,df_notfound)
#bad record
overwriteAddress2('Maalse Steenweg 438', '8310', 'Brugge-Sint Kruis',163,df_notfound)
df_notfound[10:20]
Out[12]:
In [13]:
df_notfound[20:30]
Out[13]:
In [15]:
overwriteAddress('Rijselstraat 5 8800 Rumbeke',167,df_notfound)
overwriteAddress('Dikstraat 3 2430 Laakdal',172,df_notfound)
overwriteAddress('Veemarkt 30 8500 Kortrijk',189,df_notfound)
overwriteAddress('Sint Jacobsstraat 36 8000 Brugge',192,df_notfound)
overwriteAddress('Reygaerdijkstraat 22 8630 Zoutenaaie',194,df_notfound)
overwriteAddress('Sint Jozefsplein 13 3511 Stokrooie',197,df_notfound)
overwriteAddress('Coupure Links 497 9000 Gent',201,df_notfound)
overwriteAddress('Genkersteenweg 214 3500 Hasselt',224,df_notfound)
overwriteAddress('Kapucijnenstraat 44 8400 Oostende',232,df_notfound)
overwriteAddress('Dorpsstraat 30 9667 Sint-Maria-Horebeke',239,df_notfound)
df_notfound[20:30]
Out[15]:
In [16]:
df_notfound[30:40]
Out[16]:
In [18]:
overwriteAddress('IJzerenleen 10 2800 Mechelen',240,df_notfound)
#print(latlon_from_address('')) #NIET GEVONDEN
overwriteAddress("Chaussée D'Ixelles 12 1050 Ixelles",247,df_notfound)
overwriteAddress('Lousbergskaai 134 9000 Gent',254,df_notfound)
overwriteAddress('Rooigemlaan 389 9000 Gent',259,df_notfound)
overwriteAddress('Recollettenlei 10 9000 Gent',269,df_notfound)
overwriteAddress('Tervuursevest 3000 Leuven',287,df_notfound)
#print(latlon_from_address('')) #FAILLIET
overwriteAddress('Emile Jacqmainlaan 7 1000 Brussel',309,df_notfound)
overwriteAddress('Baron August de Becker Remyplein 19 3010 Leuven',317,df_notfound)
df_notfound[30:40]
Out[18]:
In [19]:
df_notfound[40:50]
Out[19]:
In [20]:
overwriteAddress('Sacramentsstraat 9100 Sint-Niklaas',321,df_notfound)
overwriteAddress('Dries 29 8956 Nieuwkerke',322,df_notfound)
#(latlon_from_address('')) # Wallonia 50.262275, 5.656895
df_notfound.set_value(337, 'latlon', (50.262275, 5.656895))
df_notfound.set_value(337, 'missing', False)
overwriteAddress('Wulvestraat 1 8951 Heuvelland',354,df_notfound)
overwriteAddress('Dorpsstraat 36 3500 Hasselt',358,df_notfound)
overwriteAddress('Vissersstraat 23 8370 Blankenberge',384,df_notfound)
overwriteAddress('Pelgrimstraat 13 2000 Antwerpen',396,df_notfound)
#('Rue du purnalet 9 6980 La Roche-en-Ardenne')) #50.181260, 5.575643
df_notfound.set_value(411, 'latlon', (50.181260, 5.575643))
df_notfound.set_value(411, 'missing', False)
overwriteAddress('Louizalaan 455 1050 Elsene',414,df_notfound)
overwriteAddress('Wandelingstraat 8 3000 Leuven',419,df_notfound)
df_notfound[40:50]
Out[20]:
In [21]:
df_notfound[50:60]
Out[21]:
In [22]:
overwriteAddress('Stationsstraat 220 9450 Heldergem',424,df_notfound)
overwriteAddress('Frederik de Merodestraat 59 2800 Mechelen',425,df_notfound)
overwriteAddress('Rue Borgvalstraat 11 1000 Brussel',428,df_notfound)
#latlon_from_address('')) #DUPLICATE
overwriteAddress('Chaussée de Wavre 335 1040 Etterbeek',430,df_notfound)
overwriteAddress('Chaussée de Boondael 339a 1050 Ixelles',432,df_notfound)
overwriteAddress('Herdersstraat 17 1050 Elsene',449,df_notfound)
overwriteAddress('Dambruggestraat 36/1 2060 Antwerpen',452,df_notfound)
#('')) #eerder een verhuisservice!?
overwriteAddress('Congresstraat 2060 Laar',459,df_notfound)
df_notfound[50:60]
Out[22]:
In [23]:
df_notfound[60:70]
Out[23]:
In [24]:
overwriteAddress('Wapenmakersstraat 5 8000 Brugge',462,df_notfound)
overwriteAddress('Rich. van Britsomstraat 18, 9100 Sint-Niklaas',497,df_notfound)
overwriteAddress('Koloniënstraat 58 Brussel',498,df_notfound)
#print(latlon_from_address('')) #BAD RECORD
overwriteAddress('Chaussée de Vleurgat 324 1050 Ixelles',523,df_notfound)
overwriteAddress('Jean Volderslaan 1060 Sint-Gillis',536,df_notfound)
overwriteAddress('Dweersstraat 26 8000 Brugge',544,df_notfound)
overwriteAddress('Sint-Amandstraat 17 9000 Gent',546,df_notfound)
overwriteAddress('Sint-Amandstraat 15 9000 Gent',556,df_notfound)
overwriteAddress('Dorpsstraat 42 3500 Hasselt',563,df_notfound)
df_notfound[60:70]
Out[24]:
In [25]:
df_notfound[70:80]
Out[25]:
In [28]:
overwriteAddress('Rovorst 33 9660 Zegelsem',566,df_notfound)
overwriteAddress('Stationsstraat 13 9300 Aalst',577,df_notfound)
overwriteAddress('Engelse Wandeling 2 8500 Kortrijk',586,df_notfound)
overwriteAddress('Schildknaaps Straat 31 1000 Brussel',589,df_notfound)
overwriteAddress('Jan Hoetplein 1 9000 Gent',597,df_notfound)
overwriteAddress('Geelsebaan 29/2 2460 Kasterlee',608,df_notfound)
overwriteAddress('Rue Royale 165 1190 Vorst',610,df_notfound)
overwriteAddress('Joseph Stevensstraat 16-18 1000 Brussel',612,df_notfound)
overwriteAddress('Makelaarsstraat 24 9000 Gent',616,df_notfound)
overwriteAddress('Kortrijksestraat 405 8020 Waardamme',646,df_notfound)
df_notfound[70:80]
Out[28]:
In [29]:
df_notfound[80:95]
Out[29]:
In [30]:
overwriteAddress('Waterleidingsstraat 95 1050 Elsene',653,df_notfound)
overwriteAddress('Emile Jacqmainlaan 56 1000 Brussel',655,df_notfound)
#print(latlon_from_address('')) #wallonie => 50°38'33.6"N 5°34'09.1"E
df_notfound.set_value(663, 'latlon', (50.642521, 5.569165))
df_notfound.set_value(663, 'missing', False)
overwriteAddress('Alfons Smetsplein 3000 Leuven',670,df_notfound)
overwriteAddress('Boondaalse Steenweg 341 1050 Elsene',687,df_notfound)
overwriteAddress('Lakensestraat 34 1000 Brussel',688,df_notfound)
overwriteAddress('Kortrijksesteenweg 573 9000 Gent',692,df_notfound)
#print(latlon_from_address('')) #BAD Record
overwriteAddress('Hundelgemsesteenweg 182 9820 Merelbeke',710,df_notfound)
overwriteAddress('Kraankindersstraat 2 9000 Gent',721,df_notfound)
overwriteAddress('Pelgrimstraat 2 2000 Antwerpen',733,df_notfound)
In [31]:
df_notfound[80:95]
Out[31]:
In [37]:
df_found_final = all_data[~all_data['missing']]
df_found_final.shape
Out[37]:
In [38]:
df_notfound_final = df_notfound[~df_notfound['missing']]
df_notfound_final.shape
Out[38]:
In [41]:
df_all_final = pd.concat([df_found_final, df_notfound_final])
In [42]:
df_all_final.shape
Out[42]:
In [43]:
df_all_final.to_csv('EVA_restoswithlocationsANDfixes.csv', index=False)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: