Part 1 - Extracting Table from Wiki Page


In [2]:
import requests
import lxml

import pandas as pd
import numpy as np

from bs4 import BeautifulSoup

wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki_page, 'lxml')
table = soup.find('table')
# table

toronto_table = soup.find('table',{'class':'wikitable sortable'})
links = toronto_table.findAll('td')

pincodes = []
count = 0
for x in links:
    if count == 0:
        x1 = x.text
        count += 1
    elif count == 1:
        x2 = x.text
        count +=1
    elif count == 2:
        x3 = x.text
        x3 = x3.replace('\n','')
        count = 0
        if x3 == 'Not assigned':
            x3 = x2
        if x2 != 'Not assigned':            
            pincodes.append((x1,x2,x3))
# print (pincodes)

result = {}
for x in pincodes:
    if x[0] in result:
        result[x[0]] = [x[0], x[1], result[x[0]][1] + ', ' + x[2]]
    else:
        result[x[0]] = [x[0], x[1], x[2]]
            
results = {}
for count, x in enumerate(result):
    results[count] = [x, result[x][1], result[x][2]]
    
# print(results)

toronto_data = pd.DataFrame.from_dict(results, orient='index', columns=['PostalCode', 'Borough', 'Neighborhood'])
toronto_data


Out[2]:
PostalCode Borough Neighborhood
0 M4S Central Toronto Davisville
1 M5L Downtown Toronto Downtown Toronto, Victoria Hotel
2 M4L East Toronto East Toronto, India Bazaar
3 M8V Etobicoke Etobicoke, New Toronto
4 M5K Downtown Toronto Downtown Toronto, Toronto Dominion Centre
5 M3A North York Parkwoods
6 M1H Scarborough Cedarbrae
7 M6C York Humewood-Cedarvale
8 M1R Scarborough Scarborough, Wexford
9 M6R West Toronto West Toronto, Roncesvalles
10 M1P Scarborough Scarborough, Wexford Heights
11 M4X Downtown Toronto Downtown Toronto, St. James Town
12 M5B Downtown Toronto Downtown Toronto, Garden District
13 M5M North York North York, Lawrence Manor East
14 M9M North York North York, Humberlea
15 M4H East York Thorncliffe Park
16 M2P North York York Mills West
17 M6B North York Glencairn
18 M1J Scarborough Scarborough Village
19 M5X Downtown Toronto Downtown Toronto, Underground city
20 M2R North York Willowdale West
21 M5C Downtown Toronto St. James Town
22 M1B Scarborough Scarborough, Malvern
23 M4W Downtown Toronto Rosedale
24 M4M East Toronto Studio District
25 M5G Downtown Toronto Central Bay Street
26 M8W Etobicoke Etobicoke, Long Branch
27 M6H West Toronto West Toronto, Dufferin
28 M8Z Etobicoke Etobicoke, South of Bloor
29 M3B North York Don Mills North
... ... ... ...
73 M1C Scarborough Scarborough, Port Union
74 M3N North York Downsview Northwest
75 M6N York York, Runnymede
76 M5V Downtown Toronto Downtown Toronto, South Niagara
77 M1N Scarborough Scarborough, Cliffside West
78 M4T Central Toronto Central Toronto, Summerhill East
79 M3K North York North York, Downsview East
80 M9R Etobicoke Etobicoke, St. Phillips
81 M3M North York Downsview Central
82 M6E York Caledonia-Fairbanks
83 M5A Downtown Toronto Downtown Toronto, Regent Park
84 M6G Downtown Toronto Christie
85 M6S West Toronto West Toronto, Swansea
86 M6K West Toronto West Toronto, Parkdale Village
87 M4G East York Leaside
88 M6J West Toronto West Toronto, Trinity
89 M9P Etobicoke Westmount
90 M5R Central Toronto Central Toronto, Yorkville
91 M6A North York North York, Lawrence Manor
92 M1K Scarborough Scarborough, Kennedy Park
93 M9A Etobicoke Islington Avenue
94 M2J North York North York, Oriole
95 M9C Etobicoke Etobicoke, Old Burnhamthorpe
96 M2N North York Willowdale South
97 M5J Downtown Toronto Downtown Toronto, Union Station
98 M7A Queen's Park Queen's Park
99 M3C North York North York, Don Mills South
100 M5N Central Toronto Roselawn
101 M1X Scarborough Upper Rouge
102 M1L Scarborough Scarborough, Oakridge

103 rows × 3 columns


In [ ]:
# Trail - Not WOrking or taking too long time
import geocoder # import geocoder

update_results = {}
for postal_code in toronto_data['PostalCode']:

    lat_lng_coords = None
    while(lat_lng_coords is None):
      geo_info = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
      lat_lng_coords = geo_info.latlng

    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    update_results[postal_code] = {"latitude":latitude, "longitude":longitude}

In [20]:
toronto_data['PostalCode']


Out[20]:
103

Part 2 - Adding Latitude and Longitude


In [4]:
coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
final_result = pd.merge(toronto_data, coordinates, on='PostalCode')
final_result


Out[4]:
PostalCode Borough Neighborhood Latitude Longitude
0 M4S Central Toronto Davisville 43.704324 -79.388790
1 M5L Downtown Toronto Downtown Toronto, Victoria Hotel 43.648198 -79.379817
2 M4L East Toronto East Toronto, India Bazaar 43.668999 -79.315572
3 M8V Etobicoke Etobicoke, New Toronto 43.605647 -79.501321
4 M5K Downtown Toronto Downtown Toronto, Toronto Dominion Centre 43.647177 -79.381576
5 M3A North York Parkwoods 43.753259 -79.329656
6 M1H Scarborough Cedarbrae 43.773136 -79.239476
7 M6C York Humewood-Cedarvale 43.693781 -79.428191
8 M1R Scarborough Scarborough, Wexford 43.750072 -79.295849
9 M6R West Toronto West Toronto, Roncesvalles 43.648960 -79.456325
10 M1P Scarborough Scarborough, Wexford Heights 43.757410 -79.273304
11 M4X Downtown Toronto Downtown Toronto, St. James Town 43.667967 -79.367675
12 M5B Downtown Toronto Downtown Toronto, Garden District 43.657162 -79.378937
13 M5M North York North York, Lawrence Manor East 43.733283 -79.419750
14 M9M North York North York, Humberlea 43.724766 -79.532242
15 M4H East York Thorncliffe Park 43.705369 -79.349372
16 M2P North York York Mills West 43.752758 -79.400049
17 M6B North York Glencairn 43.709577 -79.445073
18 M1J Scarborough Scarborough Village 43.744734 -79.239476
19 M5X Downtown Toronto Downtown Toronto, Underground city 43.648429 -79.382280
20 M2R North York Willowdale West 43.782736 -79.442259
21 M5C Downtown Toronto St. James Town 43.651494 -79.375418
22 M1B Scarborough Scarborough, Malvern 43.806686 -79.194353
23 M4W Downtown Toronto Rosedale 43.679563 -79.377529
24 M4M East Toronto Studio District 43.659526 -79.340923
25 M5G Downtown Toronto Central Bay Street 43.657952 -79.387383
26 M8W Etobicoke Etobicoke, Long Branch 43.602414 -79.543484
27 M6H West Toronto West Toronto, Dufferin 43.669005 -79.442259
28 M8Z Etobicoke Etobicoke, South of Bloor 43.628841 -79.520999
29 M3B North York Don Mills North 43.745906 -79.352188
... ... ... ... ... ...
73 M1C Scarborough Scarborough, Port Union 43.784535 -79.160497
74 M3N North York Downsview Northwest 43.761631 -79.520999
75 M6N York York, Runnymede 43.673185 -79.487262
76 M5V Downtown Toronto Downtown Toronto, South Niagara 43.628947 -79.394420
77 M1N Scarborough Scarborough, Cliffside West 43.692657 -79.264848
78 M4T Central Toronto Central Toronto, Summerhill East 43.689574 -79.383160
79 M3K North York North York, Downsview East 43.737473 -79.464763
80 M9R Etobicoke Etobicoke, St. Phillips 43.688905 -79.554724
81 M3M North York Downsview Central 43.728496 -79.495697
82 M6E York Caledonia-Fairbanks 43.689026 -79.453512
83 M5A Downtown Toronto Downtown Toronto, Regent Park 43.654260 -79.360636
84 M6G Downtown Toronto Christie 43.669542 -79.422564
85 M6S West Toronto West Toronto, Swansea 43.651571 -79.484450
86 M6K West Toronto West Toronto, Parkdale Village 43.636847 -79.428191
87 M4G East York Leaside 43.709060 -79.363452
88 M6J West Toronto West Toronto, Trinity 43.647927 -79.419750
89 M9P Etobicoke Westmount 43.696319 -79.532242
90 M5R Central Toronto Central Toronto, Yorkville 43.672710 -79.405678
91 M6A North York North York, Lawrence Manor 43.718518 -79.464763
92 M1K Scarborough Scarborough, Kennedy Park 43.727929 -79.262029
93 M9A Etobicoke Islington Avenue 43.667856 -79.532242
94 M2J North York North York, Oriole 43.778517 -79.346556
95 M9C Etobicoke Etobicoke, Old Burnhamthorpe 43.643515 -79.577201
96 M2N North York Willowdale South 43.770120 -79.408493
97 M5J Downtown Toronto Downtown Toronto, Union Station 43.640816 -79.381752
98 M7A Queen's Park Queen's Park 43.662301 -79.389494
99 M3C North York North York, Don Mills South 43.725900 -79.340923
100 M5N Central Toronto Roselawn 43.711695 -79.416936
101 M1X Scarborough Upper Rouge 43.836125 -79.205636
102 M1L Scarborough Scarborough, Oakridge 43.711112 -79.284577

103 rows × 5 columns

Part 3 - Clustering


In [14]:
import matplotlib.pyplot as plt

lat_lons = []
lats = []
lons = []
for index, row in final_result.iterrows():
    lat_lons.append([row['Longitude'], row['Latitude']])
    lats.append(row['Latitude'])
    lons.append(row['Longitude'])

plt.scatter(lons, lats)
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Toronto Postal Codes Geo Location")
plt.show()


Above plots shows the regions in Toronto. However the clusters are not visible clearly through visual analysis. It requires detailes Clusteing algorithms like k-Means for a good analysis. Please refer the following code for more info.


In [18]:
# I have Referred some clustering examples from Kaggle
# https://www.kaggle.com/xxing9703/kmean-clustering-of-latitude-and-longitude

import folium 

toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# adding markers to map
for lat, lng, borough, neighborhood in zip(final_result['Latitude'], final_result['Longitude'], final_result['Borough'], final_result['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='#110000',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto


Out[18]:


In [ ]: