In [50]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup
In [51]:
url='https://www.metrolinemap.com/'
In [52]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
links=soup.find_all('a')[5:-4]
links=[i['href'] for i in links]
In [142]:
metros={}
good=[]
In [145]:
# for link in links[138:140]:
for link in links[:]:
if link not in good:
response = requests.get(link)
soup = BeautifulSoup(response.content)
name=soup.find('h1').text
print(name)
metros[link]={'name':name,'url':link}
metros[link]['desc']=soup.find('div',{'class':'callout-card-content'}).text.replace('\n','')
path_IDs=[i[:i.find('=')].strip() for i in soup.text.split('pathCoordinates')[1:] if '=' in i]
colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
#ends={i.text[:i.text.rfind('(')-1].strip():i.text[i.text.find('(')+1:i.text.find(')')] for i in soup.find_all('button')}
lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
linelist=list(lines.keys())
ends={i.text:i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')}
branches={linelist[i]:e for (i,e) in enumerate(ends)}
ends={linelist[i]:ends[e] for (i,e) in enumerate(ends)}
'''
for i in lines:
if '(' in lines[i]: lines[i]+=')'
if (link=='https://www.metrolinemap.com/metro/tehran/'):
lines['168']='Line 1 (Kahrizak Branch)'
lines['169']='Line 1 (Shahr-e Aftab Branch)'
ends['Line 1 (Kahrizak Branch)']='Tajrish - Kahrizak'
ends['Line 1 (Shahr-e Aftab Branch)']='Tajrish - Shahr-e Aftab'
ends['Line 2']='Tehran (Sadeghiyeh) - Farhangsara'
ends['Line 5']='Golshahr - Tehran (Sadeghiyeh)'
if (link=='https://www.metrolinemap.com/metro/nagoya/'):
lines['203']='Meikō Line (Nagoya Port)'
ends['Meikō Line (Nagoya Port)']='Kanayama - Nagoyakō'
if (link=='https://www.metrolinemap.com/metro/brussels/'):
ends['Line 2']='Simonis (Leopold II) - Simonis (Elisabeth)'
ends['Line 6']='Roi Baudouin/Koning Boudewijn - Simonis (Elisabeth)'
if (link=='https://www.metrolinemap.com/metro/copenhagen/'):
ends['M2']='Vanløse - Lufthavnen (Copenhagen Airport)'
ends['M3']='København H - København H (Circle Line)'
if (link=='https://www.metrolinemap.com/metro/barcelona/'):
ends['FM']='Paral - lel (Funicular)'
if (link=='https://www.metrolinemap.com/metro/istanbul/'):
ends['M1a']='Yenikapı - Atatürk Havalimanı (Airport)'
if (link=='https://www.metrolinemap.com/metro/san-francisco/'):
ends['Richmond–Warm Springs/South Fremont line']='Richmond - Warm Springs (South Fremont)'
'''
spaths={i[:i.find('=')].strip():(i[i.find('['):i.find(']')].strip()+']').replace(',]',']') \
for i in soup.text.split('pathCoordinates')[1:] if '=' in i and '#' not in i}
metros[link]['lines']=[{'path':json.loads(spaths[i].replace('lat','"lat"').replace('lng','"lng"')),
'color':colors[i],
'name':lines[i],
'ends':ends[i],
'branch':branches[i],
'id':i} for i in spaths]
stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]
buttonlines=[i.text for i in soup.findAll('div',{'class':'callout-card-content'})[1].findAll('button')]
buttonstations={}
for i in range(len(buttonlines)):
line=buttonlines[i]
for s in [k['href'] for k in soup.findAll('div',{'class':'panel'})[i].findAll('a')]:
if s not in buttonstations: buttonstations[s]=set()
buttonstations[s].add(line)
metros[link]['stations']=[{'name':s[0],'lat':s[1],'lon':s[2],'url':s[3],
'lines':list(set([i[1:i.find('class=color')-1] for i in s[4].split('title')[1:]])),
'branches':list(buttonstations[s[3]])} for s in stations],
good.append(link)
In [167]:
colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
# ends={i.text[:i.text.rfind('(')-1].strip():i.text[i.text.find('(')+1:i.text.find(')')] for i in soup.find_all('button')}
lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
linelist=list(lines.keys())
ends={i.text:i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')}
branches={linelist[i]:e for (i,e) in enumerate(ends)}
# ends={linelist[i]:ends[e] for (i,e) in enumerate(ends)}
In [171]:
Out[171]:
In [ ]:
open('metros.json','w').write(json.dumps(metros))
In [ ]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
In [ ]:
country=''
city=''
geo={}
s=soup.text.split('\n\n\n')[35:226]
for k in range(len(s)):
i=s[k]
if i:
#print(repr(i))
if i[0]=='\n':
continent=i.split('\n')[1].strip()
country=i.split('\n')[3].strip()
city=i.split('\n')[-1].replace('Map','').strip()
elif not s[k-1]:
continent=i.split('\n')[0]
country=i.split('\n')[2].strip()
city=i.split('\n')[-1].replace('Map','').strip()
elif i[0]==' ':
country=i.split('\n')[0].strip()
city=i.split('\n')[-1].replace('Map','').strip()
else:
city=i.replace('Map','').strip()
#print(continent,country,city)
geo[city]={'continent':continent,'country':country}
In [ ]:
geo['Cleveland RTA Rapid Transit']=geo['Cleveland RTA Rapid Transit']
geo['London Underground and DLR']=geo['London Underground']
In [ ]:
for metro in metros:
name=metros[metro]['name']
if name not in geo:
print(name)
else:
metros[metro]['geo']=geo[name]
In [ ]:
open('metrosg.json','w').write(json.dumps(metros))
In [ ]:
import zipfile
In [ ]:
zipfile.ZipFile('metrosg.zip', "w", zipfile.ZIP_DEFLATED).write('metrosg.json')