In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from jupyterthemes import jtplot
jtplot.style('solarized-light')
In [2]:
w=pd.read_html('https://en.wikipedia.org/wiki/List_of_the_most_intense_tropical_cyclones')
In [3]:
n_atl=w[0].T.set_index(0).T.dropna()
In [4]:
e_pac=w[1].T.set_index(0).T.dropna()
In [5]:
nw_pac=w[2].T.set_index(0).T.dropna()
In [6]:
n_ind=w[3].T.set_index(0).T.dropna()
In [7]:
sw_ind=w[4].T.set_index(0).T.dropna()
In [8]:
aus=w[5].T.set_index(0).T.dropna()
In [9]:
s_pac=w[6].T.set_index(0).T.dropna()
In [10]:
s_atl=w[7].T.set_index(0).T.dropna()
In [11]:
df=[n_atl,s_atl,e_pac,nw_pac,n_ind,sw_ind,aus,s_pac]
In [27]:
data=[]
regions=['North Atlantic','South Atlantic','East Pacific','Northwest Pacific','North Indian','South Indian','Australian','South Pacific']
for j in range(7):
for i in df[j].T.iteritems():
d={"region":regions[j]}
d["name"]=i[1][0].replace('"','')
d["pressure"]=int(i[1][3][i[1][3].find('hPa')-4:i[1][3].find('hPa')].strip())
if u'\u2013' in i[1][1]:
d["year"]=int(i[1][1][:i[1][1].find(u'\u2013')])
elif u'-' in i[1][1]:
d["year"]=int(i[1][1][:i[1][1].find(u'-')])
else: d["year"]=int(i[1][1])
k=i[1][2][i[1][2].find('!')+1:i[1][2][i[1][2].find('!')+1:].strip().find(' ')].strip()
if u'\u2660' in k:
d["speed"]=int(k[k.find(u'\u2660')+1:k.find(u'\xa0')])
elif k!='':d["speed"]=int(k)
data.append(d)
In [29]:
import json
In [30]:
file('data.json','w').write(json.dumps(data))