In [2]:
import pandas as pd, numpy as np

In [36]:
#load list of all circuits and years with races from wikipedia
df=pd.read_html('https://en.wikipedia.org/wiki/List_of_Grand_Prix_motorcycle_circuits', header=0)

In [38]:
df=df[0][['Circuit','Location','Season(s)','Type']]

In [ ]:
#geocode circuit names and create list with circuits
from pygeocoder import Geocoder
circs=[]
apikey='AIzaSyCJJD4hDxsENJOVohntPCqgvsuvQ-yRgLY'
for i in df.T.iteritems():
    circ={}
    circ['name']=i[1][0]
    if repr(i[1][2]).lower()=='nan':
        circs[-1]['races']=circs[-1]['races']+', '+i[1][3]
    else:
        circ['races']=i[1][2]
        g=i[1][1]
        if g=='Silverstone':
            g='Silverstone, United Kingdom'
        if g=='Elroy':
            g='Elroy, Texas'
        if g=='Ventnor':
            g='Ventnor, Australia'
        if g=='Le Mans':
            g='Le Mans, France'
        if g=='Koppl':
            g='Koppl, Austria'
        if g=='Monza':
            g='Monza, Italy'
        if g=='San Carlos':
            g='San Carlos, Venezuela'
        if g=='Oyama':
            g='Oyama, Japan'
        if g=='Orival':
            g='Orival, France'
        circ['place']=g
        circ['coord']=Geocoder(apikey).geocode(circ['place']).coordinates
        circs.append(circ)
    print circs[-1]


{'races': u'2000\u20132012', 'place': u'Cascais', 'name': u'Aut\xf3dromo do Estoril', 'coord': (38.6967571, -9.4207438)}
{'races': u'1969, 1972, 1974\u20131975, 1977, 1979, 1988,', 'place': u'Imola', 'name': u'Autodromo Enzo e Dino Ferrari', 'coord': (44.35999959999999, 11.7124294)}
{'races': u'1969, 1972, 1974\u20131975, 1977, 1979, 1988,, 1981, 1983', 'place': u'Imola', 'name': u'Autodromo Enzo e Dino Ferrari', 'coord': (44.35999959999999, 11.7124294)}
{'races': u'1969, 1972, 1974\u20131975, 1977, 1979, 1988,, 1981, 1983, 1996\u20131999', 'place': u'Imola', 'name': u'Autodromo Enzo e Dino Ferrari', 'coord': (44.35999959999999, 11.7124294)}
{'races': u'1987\u20131989', 'place': u'Goi\xe2nia', 'name': u'Aut\xf3dromo Internacional Ayrton Senna', 'coord': (-16.6868824, -49.26478849999999)}
{'races': u'1995\u20131997, 1999\u20132004', 'place': u'Rio de Janeiro', 'name': u'Aut\xf3dromo Internacional Nelson Piquet', 'coord': (-22.9068467, -43.1728965)}
{'races': u'1992', 'place': u'S\xe3o Paulo', 'name': u'Aut\xf3dromo Jos\xe9 Carlos Pace', 'coord': (-23.5505199, -46.63330939999999)}

In [ ]:
#manual fixes
circs[61]['races']='1996, 1997, 2017'

In [ ]:
calendar={i:[] for i in range(1949,2018)}
for i in range(len(circs)):
    for k in circs[i]['races'].replace(u'\u2013', '-').replace(" ", ",").replace(",,", ",").replace(",,", ",").split(","):
        r=k.find('-')
        if r==-1:
            calendar[np.int(k)].append(i)
        else:
            for j in range(np.int(k[:r]),np.int(k[r+1:])+1):
                calendar[j].append(i)

In [ ]:
#save data
import json
file('calendar_mgp2018.json','w').write(json.dumps(calendar))
file('circs_mgp2018.json','w').write(json.dumps(circs))

In [ ]: