In [1]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup
In [2]:
url='https://www.metrolinemap.com/'
In [3]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
links=soup.find_all('a')[5:-4]
links=[i['href'] for i in links]
In [4]:
metros={}
good=[]
In [5]:
# for link in links[139:140]:
for link in links[:]:
if link not in good:
response = requests.get(link)
soup = BeautifulSoup(response.content)
name=soup.find('h1').text
print(name)
metros[link]={'name':name,'url':link}
metros[link]['desc']=soup.find('div',{'class':'callout-card-content'}).text.replace('\n','')
path_IDs=[i[:i.find('=')].strip() for i in soup.text.split('pathCoordinates')[1:] if '=' in i]
colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
linelist=list(lines.keys())
ends=[i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')]
branches={linelist[i]:e for (i,e) in enumerate(ends)}
spaths={i[:i.find('=')].strip():(i[i.find('['):i.find(']')].strip()+']').replace(',]',']') \
for i in soup.text.split('pathCoordinates')[1:] if '=' in i and '#' not in i}
metros[link]['lines']=[{'path':json.loads(spaths[i].replace('lat','"lat"').replace('lng','"lng"')),
'color':colors[i],
'name':lines[i],
'branch':branches[i],
'id':i} for i in spaths]
stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]
buttonstations={}
#buttonlines=[i.text for i in soup.findAll('div',{'class':'callout-card-content'})[1].findAll('button')]
#for i in range(len(buttonlines)):
#line=buttonlines[i]
for i in range(len(linelist)):
line=linelist[i]
for s in [k['href'] for k in soup.findAll('div',{'class':'panel'})[i].findAll('a')]:
if s not in buttonstations: buttonstations[s]=set()
buttonstations[s].add(line)
metros[link]['stations']=[{'name':s[0],'lat':s[1],'lon':s[2],'url':s[3],
'lines':list(set([i[1:i.find('class=color')-1] for i in s[4].split('title')[1:]])),
'branches':list(buttonstations[s[3]])} for s in stations]
good.append(link)
In [95]:
open('metros.json','w').write(json.dumps(metros))
Out[95]:
In [96]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
In [97]:
country=''
city=''
geo={}
s=soup.text.split('\n\n\n')[35:226]
for k in range(len(s)):
i=s[k]
if i:
#print(repr(i))
if i[0]=='\n':
continent=i.split('\n')[1].strip()
country=i.split('\n')[3].strip()
city=i.split('\n')[-1].replace('Map','').strip()
elif not s[k-1]:
continent=i.split('\n')[0]
country=i.split('\n')[2].strip()
city=i.split('\n')[-1].replace('Map','').strip()
elif i[0]==' ':
country=i.split('\n')[0].strip()
city=i.split('\n')[-1].replace('Map','').strip()
else:
city=i.replace('Map','').strip()
#print(continent,country,city)
geo[city]={'continent':continent,'country':country}
In [98]:
geo['Cleveland RTA Rapid Transit']=geo['Cleveland RTA Rapid Transit']
geo['London Underground and DLR']=geo['London Underground']
In [99]:
for metro in metros:
name=metros[metro]['name']
if name not in geo:
print(name)
else:
metros[metro]['geo']=geo[name]
In [100]:
open('metrosg.json','w').write(json.dumps(metros))
Out[100]:
In [101]:
import zipfile
In [102]:
zipfile.ZipFile('metrosg.zip', "w", zipfile.ZIP_DEFLATED).write('metrosg.json')