In [474]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup

In [475]:
url='https://www.metrolinemap.com/'

In [479]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
links=soup.find_all('a')[5:-4]
links=[i['href'] for i in links]

In [587]:
metros={}
good=[]

In [588]:
# for link in links[168:169]:
for link in links[:]:
    if link not in good:
        response = requests.get(link)
        soup = BeautifulSoup(response.content)
        name=soup.find('h1').text
        print(name)
        metros[link]={'name':name,'url':link}
        metros[link]['desc']=soup.find('div',{'class':'callout-card-content'}).text.replace('\n','')
        path_IDs=[i[:i.find('=')].strip() for i in soup.text.split('pathCoordinates')[1:] if '=' in i]
        colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
        lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
            for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
        linelist=list(lines.keys())
        linelist2=list({k: v for k, v in sorted(lines.items(), key=lambda item: item[1])}.keys()) #sorted ABC by values
        ends=[i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')]
        branches={linelist[i]:e for (i,e) in enumerate(ends)}        
        spaths={i[:i.find('=')].strip():(i[i.find('['):i.find(']')].strip()+']').replace(',]',']') \
            for i in soup.text.split('pathCoordinates')[1:] if '=' in i and '#' not in i}
        metros[link]['lines']=[{'path':json.loads(spaths[i].replace('lat','"lat"').replace('lng','"lng"')),
                                'color':colors[i],
                                'name':lines[i],
                                'branch':branches[i],
                                'id':i} for i in spaths]
        stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
                     soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]
        buttonstations={}
        buttonlines=[i['id'].replace('chkMetroLine','') for i in soup.findAll('input',{'name':'chkMetroLine'})]
        for i in range(len(buttonlines)):
            line=buttonlines[i]
#         for i in range(len(linelist)): #good for London, branches
#             line=linelist[i]
#         for i in range(len(linelist2)): #good for Santiago, non-ABC
#             line=linelist2[i]
            for s in [k['href'] for k in soup.findAll('div',{'class':'panel'})[i].findAll('a')]:
                if s not in buttonstations: buttonstations[s]=set()
                buttonstations[s].add(line)
        metros[link]['stations']=[{'name':s[0],'lat':s[1],'lon':s[2],'url':s[3],
                              'lines':list(set([i[1:i.find('class=color')-1].strip() for i in s[4].split('title')[1:]])),
                              'branches':list(buttonstations[s[3]])} for s in stations]
        good.append(link)


Algiers Metro
Cairo Metro
Yerevan Metro
Baku Metro
Beijing Subway
Changchun Subway
Changsha Metro
Chengdu Metro
Chongqing Rail Transit
Dalian Metro
Dongguan Rail Transit
Fuzhou Metro
Guangzhou Metro
Guiyang Metro
Hangzhou Metro
Harbin Metro
Hefei Metro
Hong Kong MTR
Jinan Metro
Kunming Rail Transit
Nanchang Metro
Nanjing Metro
Nanning Rail Transit
Ningbo Rail Transit
Qingdao Metro
Shanghai Metro
Shenyang Metro
Shenzhen Metro
Shijiazhuang Metro
Suzhou Rail Transit
Tianjin Metro
Ürümqi Metro
Wenzhou Metro
Wuhan Metro
Wuxi Metro
Xiamen Metro
Xi'an Metro
Zhengzhou Metro
Tbilisi Metro
Ahmedabad Metro
Bangalore Metro
Chennai Metro
Delhi Metro
Hyderabad Metro
Jaipur Metro
Kochi Metro
Kolkata Metro
Lucknow Metro
Mumbai Metro
Nagpur Metro
Noida Metro
Jakarta MRT
Isfahan Metro
Mashhad Urban Railway
Shiraz Metro
Tabriz Metro
Tehran Metro
Fukuoka City Subway
Hiroshima Rapid Transit Line
Kobe Municipal Subway
Kyoto Municipal Subway
Nagoya Municipal Subway
Osaka Municipal Subway
Sapporo Municipal Subway
Sendai Subway
Tokyo Metro
Yokohama Municipal Subway
Almaty Metro
Pyongyang Metro
Busan Metro
Daegu Metro
Daejeon Metro
Gwangju Metro
Seoul Metropolitan Subway
Kuala Lumpur Rapid Rail
Manila Metro Rail Transit
Doha Metro
Mecca Metro
Singapore Mass Rapid Transit
Kaohsiung Mass Rapid Transit
Taipei Metro
Taoyuan Metro
Bangkok Metro
Dubai Metro
Tashkent Metro
Sydney Metro
Vienna U-Bahn
Minsk Metro
Brussels Metro
Sofia Metro
Prague Metro
Copenhagen Metro
Helsinki Metro
Lille Metro
Lyon Metro
Marseille Metro
Paris Metro
Rennes Metro
Toulouse Metro
Berlin U-Bahn
Hamburg U-Bahn
Munich U-Bahn
Nuremberg U-Bahn
Athens Metro
Budapest Metro
Brescia Metro
Catania Metro
Genoa Metro
Milan Metro
Naples Metro
Rome Metro
Turin Metro
Amsterdam Metro
Rotterdam Metro
Oslo Metro
Warsaw Metro
Lisbon Metro
Bucharest Metro
Kazan Metro
Moscow Metro
Nizhny Novgorod Metro
Novosibirsk Metro
Saint Petersburg Metro
Samara Metro
Yekaterinburg Metro
Barcelona Metro
Metro Bilbao
Madrid Metro
Stockholm Metro
Lausanne Metro
Adana Metro
Ankara Metro
Bursaray
Istanbul Metro
İzmir Metro
Dnipro Metro
Kharkiv Metro
Kiev Metro
Glasgow Subway
London Underground and DLR
Tyne and Wear Metro
Montreal Metro
Toronto subway
Vancouver SkyTrain
Santo Domingo Metro
Mexico City Metro
Monterrey Metro
Panama Metro
Puerto Rico Tren Urbano
Atlanta Metro (MARTA)
Baltimore Metro Subway
Boston MBTA Subway
Chicago "L"
Cleveland  RTA Rapid Transit
Los Angeles Metro
Miami Metrorail
New York City Subway
Philadelphia SEPTA and PATCO
San Francisco BART
Washington Metro
Buenos Aires Underground
Belo Horizonte Metro
Brasília Metro
Porto Alegre Metro
Recife Metro
Rio de Janeiro Metro
Salvador Metro
São Paulo Metro
Santiago Metro
Medellín Metro
Lima Metro
Caracas Metro

In [589]:
open('metros.json','w').write(json.dumps(metros))


Out[589]:
6421999

In [590]:
response = requests.get(url)
soup = BeautifulSoup(response.content)

In [591]:
country=''
city=''   
geo={}
s=soup.text.split('\n\n\n')[35:226]
for k in range(len(s)):
    i=s[k]
    if i:
        #print(repr(i))
        if i[0]=='\n':
            continent=i.split('\n')[1].strip()
            country=i.split('\n')[3].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif not s[k-1]:
            continent=i.split('\n')[0]
            country=i.split('\n')[2].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif i[0]==' ':
            country=i.split('\n')[0].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        else:
            city=i.replace('Map','').strip()
        #print(continent,country,city)
        geo[city]={'continent':continent,'country':country}

In [592]:
geo['Cleveland  RTA Rapid Transit']=geo['Cleveland RTA Rapid Transit']
geo['London Underground and DLR']=geo['London Underground']

In [593]:
for metro in metros:
    name=metros[metro]['name']
    if name not in geo:
        print(name)
    else:
        metros[metro]['geo']=geo[name]

In [594]:
open('metrosg.json','w').write(json.dumps(metros))


Out[594]:
6431348

In [595]:
import zipfile

In [596]:
zipfile.ZipFile('metrosg.zip', "w", zipfile.ZIP_DEFLATED).write('metrosg.json')