In [77]:
import pandas as pd, numpy as np, json

In [78]:
metros=json.loads(open('metrosg.json','r').read())

In [79]:
years={}

In [80]:
for m in metros:
    metro=metros[m]
    for i in range(1830,2021):
        if str(i) in metro['desc']:
            years[m]=i
            break

In [81]:
for m in metros:
    if m not in years:
        print(metros[m]['name'],m)


Hong Kong MTR https://www.metrolinemap.com/metro/hong-kong/
Bangkok Metro https://www.metrolinemap.com/metro/bangkok/
Copenhagen Metro https://www.metrolinemap.com/metro/copenhagen/
Toulouse Metro https://www.metrolinemap.com/metro/toulouse/
Philadelphia SEPTA and PATCO https://www.metrolinemap.com/metro/philadelphia/

In [82]:
years['https://www.metrolinemap.com/metro/hong-kong/']=1979
years['https://www.metrolinemap.com/metro/bangkok/']=2004
years['https://www.metrolinemap.com/metro/copenhagen/']=2002
years['https://www.metrolinemap.com/metro/toulouse/']=1993
years['https://www.metrolinemap.com/metro/philadelphia/']=1928

In [83]:
for m in metros:
    metros[m]['year']=years[m]

In [84]:
#manuallly prettify names
metros['https://www.metrolinemap.com/metro/london/']['name']='London Underground'
metros['https://www.metrolinemap.com/metro/atlanta/']['name']='Atlanta MARTA'
metros['https://www.metrolinemap.com/metro/cleveland/']['name']='Cleveland RTA'
metros['https://www.metrolinemap.com/metro/boston/']['name']='Boston T'
metros['https://www.metrolinemap.com/metro/chicago/']['name']='Chicago L'

In [85]:
open('metrosy.json','w').write(json.dumps(metros))


Out[85]:
6433711

In [86]:
import zipfile

In [87]:
zipfile.ZipFile('metrosy.zip', "w", zipfile.ZIP_DEFLATED).write('metrosy.json')

In [ ]: