In [50]:
import requests, pandas as pd, numpy as np, json
from requests import session
from bs4 import BeautifulSoup

In [51]:
url='https://www.metrolinemap.com/'

In [52]:
response = requests.get(url)
soup = BeautifulSoup(response.content)
links=soup.find_all('a')[5:-4]
links=[i['href'] for i in links]

In [142]:
metros={}
good=[]

In [145]:
# for link in links[138:140]:
for link in links[:]:
    if link not in good:
        response = requests.get(link)
        soup = BeautifulSoup(response.content)
        name=soup.find('h1').text
        print(name)
        metros[link]={'name':name,'url':link}
        metros[link]['desc']=soup.find('div',{'class':'callout-card-content'}).text.replace('\n','')
        path_IDs=[i[:i.find('=')].strip() for i in soup.text.split('pathCoordinates')[1:] if '=' in i]
        colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
        #ends={i.text[:i.text.rfind('(')-1].strip():i.text[i.text.find('(')+1:i.text.find(')')] for i in soup.find_all('button')}
        lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
            for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
        linelist=list(lines.keys())
        ends={i.text:i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')}
        branches={linelist[i]:e for (i,e) in enumerate(ends)}
        ends={linelist[i]:ends[e] for (i,e) in enumerate(ends)}
        
        '''
        for i in lines: 
            if '(' in lines[i]: lines[i]+=')'
            if (link=='https://www.metrolinemap.com/metro/tehran/'):
                lines['168']='Line 1 (Kahrizak Branch)'
                lines['169']='Line 1 (Shahr-e Aftab Branch)'
                ends['Line 1 (Kahrizak Branch)']='Tajrish - Kahrizak'
                ends['Line 1 (Shahr-e Aftab Branch)']='Tajrish - Shahr-e Aftab'
                ends['Line 2']='Tehran (Sadeghiyeh) - Farhangsara'
                ends['Line 5']='Golshahr - Tehran (Sadeghiyeh)'
            if (link=='https://www.metrolinemap.com/metro/nagoya/'):
                lines['203']='Meikō Line (Nagoya Port)'
                ends['Meikō Line (Nagoya Port)']='Kanayama - Nagoyakō'
            if (link=='https://www.metrolinemap.com/metro/brussels/'):
                ends['Line 2']='Simonis (Leopold II) - Simonis (Elisabeth)'
                ends['Line 6']='Roi Baudouin/Koning Boudewijn - Simonis (Elisabeth)'
            if (link=='https://www.metrolinemap.com/metro/copenhagen/'):
                ends['M2']='Vanløse - Lufthavnen (Copenhagen Airport)'
                ends['M3']='København H - København H (Circle Line)'
            if (link=='https://www.metrolinemap.com/metro/barcelona/'):
                ends['FM']='Paral - lel (Funicular)'
            if (link=='https://www.metrolinemap.com/metro/istanbul/'):
                ends['M1a']='Yenikapı - Atatürk Havalimanı (Airport)'
            if (link=='https://www.metrolinemap.com/metro/san-francisco/'):
                ends['Richmond–Warm Springs/South Fremont line']='Richmond - Warm Springs (South Fremont)'
        '''
                
        spaths={i[:i.find('=')].strip():(i[i.find('['):i.find(']')].strip()+']').replace(',]',']') \
            for i in soup.text.split('pathCoordinates')[1:] if '=' in i and '#' not in i}
        metros[link]['lines']=[{'path':json.loads(spaths[i].replace('lat','"lat"').replace('lng','"lng"')),
                                'color':colors[i],
                                'name':lines[i],
                                'ends':ends[i],
                                'branch':branches[i],
                                'id':i} for i in spaths]
        stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
                     soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]
        buttonlines=[i.text for i in soup.findAll('div',{'class':'callout-card-content'})[1].findAll('button')]
        buttonstations={}
        for i in range(len(buttonlines)):
            line=buttonlines[i]
            for s in [k['href'] for k in soup.findAll('div',{'class':'panel'})[i].findAll('a')]:
                if s not in buttonstations: buttonstations[s]=set()
                buttonstations[s].add(line)
        metros[link]['stations']=[{'name':s[0],'lat':s[1],'lon':s[2],'url':s[3],
                              'lines':list(set([i[1:i.find('class=color')-1] for i in s[4].split('title')[1:]])),
                              'branches':list(buttonstations[s[3]])} for s in stations],
                              
        good.append(link)


Paris Metro
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-145-f7d579ba3251> in <module>
     52                                 'ends':ends[i],
     53                                 'branch':branches[i],
---> 54                                 'id':i} for i in spaths]
     55         stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
     56                      soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]

<ipython-input-145-f7d579ba3251> in <listcomp>(.0)
     52                                 'ends':ends[i],
     53                                 'branch':branches[i],
---> 54                                 'id':i} for i in spaths]
     55         stations=[i for i in json.loads(soup.text[soup.text.find('var stations =')+15:\
     56                      soup.text.find(']\r\n]')+4].replace('\r','').replace('\n','').replace("\'",''))]

KeyError: '114'

In [167]:
colors={i[:i.find(',')]:i[i.find('#'):i.find('#')+7] for i in path_IDs if '#' in i}
# ends={i.text[:i.text.rfind('(')-1].strip():i.text[i.text.find('(')+1:i.text.find(')')] for i in soup.find_all('button')}
lines={k:[i[i.find('(')+1:i.find(')')].replace('"','').strip() \
    for i in soup.text.split('highlightedPoly'+k+' = poly'+k+';')][1] for k in colors}
linelist=list(lines.keys())
ends={i.text:i.text[i.text.find('(')+1:-1] for i in soup.find_all('button')}
branches={linelist[i]:e for (i,e) in enumerate(ends)}
# ends={linelist[i]:ends[e] for (i,e) in enumerate(ends)}

In [171]:



Out[171]:
['La Défense - Château de Vincennes',
 'Porte Dauphine - Nation',
 'Pont de Levallois Bécon - Gallieni',
 'Porte des Lilas - Gambetta',
 'Porte de Clignancourt - Mairie de Montrouge',
 'Bobigny Pablo Picasso - Place d’Italie',
 'Charles de Gaulle Etoile - Nation',
 'La Courneuve - 8 Mai 1945 - Villejuif - Louis Aragon',
 "La Courneuve - 8 Mai 1945 - Mairie d'Ivry",
 'Louis Blanc - Pré Saint-Gervais ',
 'Balard - Créteil – Pointe du Lac',
 'Pont de Sèvres - Mairie de Montreuil',
 "Boulogne – Pont de Saint-Cloud - Gare d'Austerlitz",
 "Boulogne – Pont de Saint-Cloud - Gare d'Austerlitz",
 'Mairie des Lilas -   Châtelet',
 "Front Populaire - Mairie d'Issy",
 'Saint-Denis — Université - Châtillon — Montrouge',
 'Asnières Gennevilliers Les Courtilles - Châtillon — Montrouge',
 'Saint-Lazare - Olympiades']

In [ ]:
open('metros.json','w').write(json.dumps(metros))

In [ ]:
response = requests.get(url)
soup = BeautifulSoup(response.content)

In [ ]:
country=''
city=''   
geo={}
s=soup.text.split('\n\n\n')[35:226]
for k in range(len(s)):
    i=s[k]
    if i:
        #print(repr(i))
        if i[0]=='\n':
            continent=i.split('\n')[1].strip()
            country=i.split('\n')[3].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif not s[k-1]:
            continent=i.split('\n')[0]
            country=i.split('\n')[2].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        elif i[0]==' ':
            country=i.split('\n')[0].strip()
            city=i.split('\n')[-1].replace('Map','').strip()
        else:
            city=i.replace('Map','').strip()
        #print(continent,country,city)
        geo[city]={'continent':continent,'country':country}

In [ ]:
geo['Cleveland  RTA Rapid Transit']=geo['Cleveland RTA Rapid Transit']
geo['London Underground and DLR']=geo['London Underground']

In [ ]:
for metro in metros:
    name=metros[metro]['name']
    if name not in geo:
        print(name)
    else:
        metros[metro]['geo']=geo[name]

In [ ]:
open('metrosg.json','w').write(json.dumps(metros))

In [ ]:
import zipfile

In [ ]:
zipfile.ZipFile('metrosg.zip', "w", zipfile.ZIP_DEFLATED).write('metrosg.json')