In [1]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup

In [2]:
url='https://www.metrolinemap.com/'

In [3]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
links=soup.find_all('a')[5:-4]
links=[i['href'] for i in links]

In [4]:
metros={}
good=[]

In [5]:
# for link in links[139:140]:
for link in links[:]:
    if link not in good:
        response = requests.get(link)
        soup = BeautifulSoup(response.content)
        name=soup.find('h1').text
        print(name)
        metros[link]={'name':name,'url':link}
        metros[link]['desc']=soup.find('div',{'class':'callout-card-content'}).text.replace('\n','')
        path_IDs=[i[:i.find('=')].strip() for i in soup.text.split('pathCoordinates')[1:] if '=' in i]
        colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
        lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
            for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
        linelist=list(lines.keys())
        ends=[i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')]
        branches={linelist[i]:e for (i,e) in enumerate(ends)}        
        spaths={i[:i.find('=')].strip():(i[i.find('['):i.find(']')].strip()+']').replace(',]',']') \
            for i in soup.text.split('pathCoordinates')[1:] if '=' in i and '#' not in i}
        metros[link]['lines']=[{'path':json.loads(spaths[i].replace('lat','"lat"').replace('lng','"lng"')),
                                'color':colors[i],
                                'name':lines[i],
                                'branch':branches[i],
                                'id':i} for i in spaths]
        stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
                     soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]
        buttonstations={}
        #buttonlines=[i.text for i in soup.findAll('div',{'class':'callout-card-content'})[1].findAll('button')]
        #for i in range(len(buttonlines)):
            #line=buttonlines[i]
        for i in range(len(linelist)):
            line=linelist[i]
            for s in [k['href'] for k in soup.findAll('div',{'class':'panel'})[i].findAll('a')]:
                if s not in buttonstations: buttonstations[s]=set()
                buttonstations[s].add(line)
        metros[link]['stations']=[{'name':s[0],'lat':s[1],'lon':s[2],'url':s[3],
                              'lines':list(set([i[1:i.find('class=color')-1] for i in s[4].split('title')[1:]])),
                              'branches':list(buttonstations[s[3]])} for s in stations]
        good.append(link)


London Underground and DLR

In [95]:
open('metros.json','w').write(json.dumps(metros))


Out[95]:
6422042

In [96]:
response = requests.get(url)
soup = BeautifulSoup(response.content)

In [97]:
country=''
city=''   
geo={}
s=soup.text.split('\n\n\n')[35:226]
for k in range(len(s)):
    i=s[k]
    if i:
        #print(repr(i))
        if i[0]=='\n':
            continent=i.split('\n')[1].strip()
            country=i.split('\n')[3].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif not s[k-1]:
            continent=i.split('\n')[0]
            country=i.split('\n')[2].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif i[0]==' ':
            country=i.split('\n')[0].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        else:
            city=i.replace('Map','').strip()
        #print(continent,country,city)
        geo[city]={'continent':continent,'country':country}

In [98]:
geo['Cleveland  RTA Rapid Transit']=geo['Cleveland RTA Rapid Transit']
geo['London Underground and DLR']=geo['London Underground']

In [99]:
for metro in metros:
    name=metros[metro]['name']
    if name not in geo:
        print(name)
    else:
        metros[metro]['geo']=geo[name]

In [100]:
open('metrosg.json','w').write(json.dumps(metros))


Out[100]:
6431391

In [101]:
import zipfile

In [102]:
zipfile.ZipFile('metrosg.zip', "w", zipfile.ZIP_DEFLATED).write('metrosg.json')