In [2]:
import requests
import lxml
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
wiki_page = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(wiki_page, 'lxml')
table = soup.find('table')
# table
toronto_table = soup.find('table',{'class':'wikitable sortable'})
links = toronto_table.findAll('td')
pincodes = []
count = 0
for x in links:
if count == 0:
x1 = x.text
count += 1
elif count == 1:
x2 = x.text
count +=1
elif count == 2:
x3 = x.text
x3 = x3.replace('\n','')
count = 0
if x3 == 'Not assigned':
x3 = x2
if x2 != 'Not assigned':
pincodes.append((x1,x2,x3))
# print (pincodes)
result = {}
for x in pincodes:
if x[0] in result:
result[x[0]] = [x[0], x[1], result[x[0]][1] + ', ' + x[2]]
else:
result[x[0]] = [x[0], x[1], x[2]]
results = {}
for count, x in enumerate(result):
results[count] = [x, result[x][1], result[x][2]]
# print(results)
toronto_data = pd.DataFrame.from_dict(results, orient='index', columns=['PostalCode', 'Borough', 'Neighborhood'])
toronto_data
Out[2]:
In [ ]:
# Trail - Not WOrking or taking too long time
import geocoder # import geocoder
update_results = {}
for postal_code in toronto_data['PostalCode']:
lat_lng_coords = None
while(lat_lng_coords is None):
geo_info = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
lat_lng_coords = geo_info.latlng
latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]
update_results[postal_code] = {"latitude":latitude, "longitude":longitude}
In [20]:
toronto_data['PostalCode']
Out[20]:
In [4]:
coordinates = pd.read_csv('http://cocl.us/Geospatial_data')
coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
final_result = pd.merge(toronto_data, coordinates, on='PostalCode')
final_result
Out[4]:
In [14]:
import matplotlib.pyplot as plt
lat_lons = []
lats = []
lons = []
for index, row in final_result.iterrows():
lat_lons.append([row['Longitude'], row['Latitude']])
lats.append(row['Latitude'])
lons.append(row['Longitude'])
plt.scatter(lons, lats)
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.title("Toronto Postal Codes Geo Location")
plt.show()
Above plots shows the regions in Toronto. However the clusters are not visible clearly through visual analysis. It requires detailes Clusteing algorithms like k-Means for a good analysis. Please refer the following code for more info.
In [18]:
# I have Referred some clustering examples from Kaggle
# https://www.kaggle.com/xxing9703/kmean-clustering-of-latitude-and-longitude
import folium
toronto_latitude = 43.6532; toronto_longitude = -79.3832
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)
# adding markers to map
for lat, lng, borough, neighborhood in zip(final_result['Latitude'], final_result['Longitude'], final_result['Borough'], final_result['Neighborhood']):
label = '{}, {}'.format(neighborhood, borough)
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lng],
radius=5,
popup=label,
color='red',
fill=True,
fill_color='#110000',
fill_opacity=0.7).add_to(map_toronto)
map_toronto
Out[18]:
In [ ]: