In [1]:
import pandas as pd, numpy as np, json

In [2]:
metros=json.loads(open('metrosg.json','r').read())

In [27]:
years={}

In [28]:
for m in metros:
    metro=metros[m]
    for i in range(1830,2021):
        if str(i) in metro['desc']:
            years[m]=i
            break

In [30]:
for m in metros:
    if m not in years:
        print(metros[m]['name'],m)


Hong Kong MTR https://www.metrolinemap.com/metro/hong-kong/
Bangkok Metro https://www.metrolinemap.com/metro/bangkok/
Copenhagen Metro https://www.metrolinemap.com/metro/copenhagen/
Toulouse Metro https://www.metrolinemap.com/metro/toulouse/
Philadelphia SEPTA and PATCO https://www.metrolinemap.com/metro/philadelphia/

In [31]:
years['https://www.metrolinemap.com/metro/hong-kong/']=1979
years['https://www.metrolinemap.com/metro/bangkok/']=2004
years['https://www.metrolinemap.com/metro/copenhagen/']=2002
years['https://www.metrolinemap.com/metro/toulouse/']=1993
years['https://www.metrolinemap.com/metro/philadelphia/']=1928

In [35]:
for m in metros:
    metros[m]['year']=years[m]

In [40]:
#manuallly prettify names
metros['https://www.metrolinemap.com/metro/london/']['name']='London Underground'
metros['https://www.metrolinemap.com/metro/atlanta/']['name']='Atlanta MARTA'
metros['https://www.metrolinemap.com/metro/cleveland/']['name']='Cleveland RTA'
metros['https://www.metrolinemap.com/metro/boston/']['name']='Boston T'
metros['https://www.metrolinemap.com/metro/chicago/']['name']='Chicago L'

In [41]:
open('metrosy.json','w').write(json.dumps(metros))


Out[41]:
6433754

In [42]:
import zipfile

In [43]:
zipfile.ZipFile('metrosy.zip', "w", zipfile.ZIP_DEFLATED).write('metrosy.json')

In [ ]: