In [249]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup
In [250]:
metros=json.loads(open('metrosy.json','r').read())
In [251]:
metro=metros['https://www.metrolinemap.com/metro/budapest/']
In [252]:
!pip install area
In [253]:
from area import area
obj = {'type':'Polygon','coordinates':[[[-180,-90],[-180,90],[180,90],[180,-90],[-180,-90]]]}
area(obj) #earth area m^2, should be 511207893395811.06
Out[253]:
In [35]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
In [38]:
haversine( 144.963165,-37.814251,-0.126236,51.500153) #should be 16903
Out[38]:
In [229]:
systemlengths={}
areas={}
In [241]:
for m in metros:
metro=metros[m]
segments=[]
segarea={'type':'Polygon','coordinates':[[]]}
systemlength=0
for line in metro['lines']:
segments.append([(line['path'][i],line['path'][i+1]) for i in range(len(line['path'])-1)])
segmentlist=list(set([str(j) for i in segments for j in i]))
for s in segmentlist:
x=[float(i.replace('"','').replace('{','').replace('}','').replace('lat','').replace('lng','')\
.replace('(','').replace(')','').replace(':','').replace("'",'').strip()) for i in s.split(',')]
l=haversine(x[1],x[0],x[3],x[2])
systemlength+=l
segarea['coordinates'][0].append([x[1],x[0]])
systemlengths[m]=systemlength
areas[m]=area(segarea)
In [242]:
areas
Out[242]:
Population
In [114]:
pop={}
errors=[]
In [115]:
for m in metros:
city=m.split('/')[-2]
country=metros[m]['geo']['country'].lower()
url3='http://population.city/'+country+'/'+city+'/'
#print(url3)
response = requests.get(url3)
soup = BeautifulSoup(response.content)
em=soup.findAll('em')
if em:
pop[m]=float(em[0].text[:-1].replace(' ',''))
print('OK',city)
else:
print('ERROR',city)
errors.append(city)
In [144]:
def cc(c):
return c.replace(' ','-')
def cy(c):
if c=='kochi':return 'cochin'
if c=='milan':return 'milano'
if c=='nuremberg':return 'nuernberg'
if c=='naples':return 'napoli'
return c
In [159]:
#https://all-populations.com/
pop['https://www.metrolinemap.com/metro/baku/']=2150000.0
pop['https://www.metrolinemap.com/metro/hong-kong/']=7496981.0
pop['https://www.metrolinemap.com/metro/urumqi/']=3112559.0
pop['https://www.metrolinemap.com/metro/tbilisi/']=1062282.0
pop['https://www.metrolinemap.com/metro/isfahan/']=1602110.0
pop['https://www.metrolinemap.com/metro/mashhad/']=2427316.0
pop['https://www.metrolinemap.com/metro/shiraz/']=1227331.0
pop['https://www.metrolinemap.com/metro/tabriz/']=1398060.0
pop['https://www.metrolinemap.com/metro/tehran/']=7797520.0
pop['https://www.metrolinemap.com/metro/almaty/']=1534353.0
pop['https://www.metrolinemap.com/metro/pyongyang/']=3255288.0
pop['https://www.metrolinemap.com/metro/kuala-lumpur/']=1809699.0
pop['https://www.metrolinemap.com/metro/doha/']=796947.0
pop['https://www.metrolinemap.com/metro/mecca/']=1534731.0
pop['https://www.metrolinemap.com/metro/singapore/']=5469724.0
pop['https://www.metrolinemap.com/metro/kaohsiung/']=2769072.0
pop['https://www.metrolinemap.com/metro/taipei/']=2612605.0
pop['https://www.metrolinemap.com/metro/taoyuan/']=2245162.0
pop['https://www.metrolinemap.com/metro/tashkent/']=2135700.0
pop['https://www.metrolinemap.com/metro/nizhny-novgorod/']=1250615.0
pop['https://www.metrolinemap.com/metro/bilbao/']=353173.0
pop['https://www.metrolinemap.com/metro/dnipro/']=984423.0
pop['https://www.metrolinemap.com/metro/newcastle/']=271600.0
pop['https://www.metrolinemap.com/metro/santo-domingo/']=3339410.0
pop['https://www.metrolinemap.com/metro/puerto-rico/']=395326.0
pop['https://www.metrolinemap.com/metro/medellin/']=2486723.0
pop['https://www.metrolinemap.com/metro/caracas/']=1836286.0
pop['https://www.metrolinemap.com/metro/washington/']=702455.0
In [160]:
for m in metros:
if e in errors:
if m not in pop:
city=cy(m.split('/')[-2])
country=cc(metros[m]['geo']['country'].lower())
url3='http://population.city/'+country+'/'+city+'/'
#print(url3)
response = requests.get(url3)
soup = BeautifulSoup(response.content)
em=soup.findAll('em')
if em:
pop[m]=float(em[0].text[:-1].replace(' ',''))
print('OK',city)
else:
print('ERROR',city,country,m)
In [246]:
data=[]
In [247]:
for m in metros:
dummy={}
dummy['Population (M)']=np.round(pop[m]/1000000,2)
dummy['System length (km)']=np.round(systemlengths[m],2)
metro=metros[m]
dummy['Name']=metro['name'].replace('Metropolitan','').replace('Metro','').replace('Subway','').replace('Rail','').replace('Transit','').\
replace('Municipal','').replace('U-Bahn','').\
replace('Underground','').replace('City','').strip()
dummy['Year started']=metro['year']
dummy['Description']=metro['desc']
dummy['Continent']=metro['geo']['continent'].replace('Australia / Oceania','Australia')
dummy['Country']=metro['geo']['country'].replace('United States','USA').replace('United Kingdom','UK')
dummy['# of lines']=len(metro['lines'])
dummy['# of stations']=len(metro['stations'])
dummy['Full name']=metro['name']
dummy['(km) of lines/1M people']=np.round((dummy['System length (km)'])/dummy['Population (M)'],2)
dummy['# of stations/1M people']=np.round(dummy['# of stations']/dummy['Population (M)'],2)
dummy['Average line length (km)']=np.round(dummy['System length (km)']/dummy['# of lines'],2)
dummy['Average distance between stations (km)']=np.round(dummy['System length (km)']/dummy['# of stations'],2)
dummy['Total area covered (km²)']=np.round(areas[m]/1000000,2)
data.append(dummy)
In [248]:
pd.DataFrame(data).to_csv('data.csv')
In [ ]: