In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#from jupyterthemes import jtplot
#jtplot.style('solarized-light')
In [2]:
w=pd.read_csv('Allstorms.ibtracs_wmo.v03r09.csv',skiprows=1)
In [3]:
w=w[1:].set_index('Serial_Num')
In [4]:
w['time']=pd.to_datetime(w['ISO_time'])
In [5]:
w=w.drop(['Num','ISO_time','Nature','Center','Wind(WMO) Percentile','Pres(WMO) Percentile','Track_type'],axis=1)
In [6]:
w.head(10)
Out[6]:
In [7]:
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 3956 # Radius of earth in kilometers. Use 3956 for miles, 6371 for km
return c * r
In [8]:
f=[0]
for i in range(1,len(w)):
if w.index[i]==w.index[i-1]:
h=haversine(float(w['Longitude'][i]),\
float(w['Latitude'][i]),\
float(w['Longitude'][i-1]),\
float(w['Latitude'][i-1]))
else: h=0
f.append(h)
In [9]:
w['travel']=f
In [10]:
w
Out[10]:
In [11]:
z=w.drop(['Name','Basin','Sub_basin','Latitude','Longitude'],axis=1)
In [334]:
def converter(df):
if 'ind' in df.columns:
df=df.drop('ind',axis=1)
temp=pd.concat([df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).max(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).min(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).mean(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).median(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().max(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().min(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().mean(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().median(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].max(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].min(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].mean(),\
df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].median(),\
],axis=1)
labels=['max','min','mean','median','dmax','dmin','dmean','dmedian','pmax','pmin','pmean','pmedian']
temp.columns=labels
return temp
In [315]:
z['ind']=z.index
In [316]:
converter(z.loc[z.index[0]])
Out[316]:
In [317]:
%%time
a=z.groupby('ind').apply(converter)
In [318]:
a.to_csv('a.csv')
In [319]:
b=a.unstack()
In [320]:
b.columns=[' '.join(col).strip() for col in b.columns.values]
In [321]:
del b.index.name
In [322]:
b['basin']=[w.loc[[i]]['Basin'].unique()[0] for i in b.index]
In [323]:
b['subbasin']=[w.loc[[i]]['Sub_basin'].unique()[0] for i in b.index]
In [324]:
b['year']=[pd.to_datetime(z.loc[[i]]['Season'].unique()[0],format='%Y') for i in b.index]
In [325]:
b.to_csv('b.csv')
In [2]:
b=pd.read_csv('b.csv')
b=b.set_index('Unnamed: 0')
del b.index.name
In [22]:
b.head()
Out[22]:
In [327]:
sns.stripplot(x='basin',y='max Wind(WMO)',data=b,jitter=True,size=3)
plt.show()
In [337]:
sns.swarmplot(x='min Pres(WMO)',y='mean travel',data=b.head(5000),hue='basin')
plt.show()
In [338]:
sns.violinplot(x='basin', y='dmax Wind(WMO)', data=b)
plt.show()
In [341]:
sns.violinplot(x='basin', y='max Wind(WMO)', data=b)
plt.show()
In [356]:
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[b['year']<pd.to_datetime(1950)].replace(0,np.nan))
plt.xlim(10,200)
plt.ylim(800,1100)
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[b['year']>pd.to_datetime(1950)].replace(0,np.nan))
plt.xlim(10,200)
plt.ylim(800,1100)
plt.show()
In [3]:
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[(b['min Pres(WMO)']>800)&\
(b['max Wind(WMO)']>10)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()
In [4]:
sns.jointplot(x='median Wind(WMO)',y='median Pres(WMO)',data=b[(b['min Pres(WMO)']>800)&\
(b['max Wind(WMO)']>10)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()
In [12]:
sns.jointplot(x='dmax Wind(WMO)',y='dmin Pres(WMO)',data=b[(b['pmin Pres(WMO)']<0.1)&\
(b['pmax Wind(WMO)']<0.5)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()
In [26]:
sns.pairplot(b[['max Wind(WMO)','mean travel']])
In [24]:
sns.pairplot(b[['max Wind(WMO)','basin','year','min Pres(WMO)','mean travel']],kind='reg',hue='basin')
Out[24]:
In [ ]:
sns.heatmap(b[['max Wind(WMO)','min Pres(WMO)','mean travel']])