In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#from jupyterthemes import jtplot
#jtplot.style('solarized-light')

In [2]:
w=pd.read_csv('Allstorms.ibtracs_wmo.v03r09.csv',skiprows=1)


C:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (1,2,8,9,10,11,13,14) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [3]:
w=w[1:].set_index('Serial_Num')

In [4]:
w['time']=pd.to_datetime(w['ISO_time'])

In [5]:
w=w.drop(['Num','ISO_time','Nature','Center','Wind(WMO) Percentile','Pres(WMO) Percentile','Track_type'],axis=1)

In [6]:
w.head(10)


Out[6]:
Season Basin Sub_basin Name Latitude Longitude Wind(WMO) Pres(WMO) time
Serial_Num
1848011S09080 1848 SI MM XXXX848003 -8.60 79.80 0.0 0.0 1848-01-11 06:00:00
1848011S09080 1848 SI MM XXXX848003 -9.00 78.90 0.0 0.0 1848-01-12 06:00:00
1848011S09080 1848 SI MM XXXX848003 -10.40 73.20 0.0 0.0 1848-01-13 06:00:00
1848011S09080 1848 SI MM XXXX848003 -12.80 69.90 0.0 0.0 1848-01-14 06:00:00
1848011S09080 1848 SI MM XXXX848003 -13.90 68.90 0.0 0.0 1848-01-15 06:00:00
1848011S09080 1848 SI MM XXXX848003 -15.30 67.70 0.0 0.0 1848-01-16 06:00:00
1848011S09080 1848 SI MM XXXX848003 -16.50 67.00 0.0 0.0 1848-01-17 06:00:00
1848011S09080 1848 SI MM XXXX848003 -18.00 67.40 0.0 0.0 1848-01-18 06:00:00
1848011S09080 1848 SI MM XXXX848003 -20.60 69.80 0.0 0.0 1848-01-19 06:00:00
1848011S09080 1848 SI MM XXXX848003 -22.80 72.00 0.0 0.0 1848-01-20 06:00:00

In [7]:
from math import radians, cos, sin, asin, sqrt

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 3956 # Radius of earth in kilometers. Use 3956 for miles, 6371 for km
    return c * r

In [8]:
f=[0]
for i in range(1,len(w)):
    if w.index[i]==w.index[i-1]:
        h=haversine(float(w['Longitude'][i]),\
                    float(w['Latitude'][i]),\
                    float(w['Longitude'][i-1]),\
                    float(w['Latitude'][i-1]))
    else: h=0
    f.append(h)

In [9]:
w['travel']=f

In [10]:
w


Out[10]:
Season Basin Sub_basin Name Latitude Longitude Wind(WMO) Pres(WMO) time travel
Serial_Num
1848011S09080 1848 SI MM XXXX848003 -8.60 79.80 0.0 0.0 1848-01-11 06:00:00 0.000000
1848011S09080 1848 SI MM XXXX848003 -9.00 78.90 0.0 0.0 1848-01-12 06:00:00 67.333730
1848011S09080 1848 SI MM XXXX848003 -10.40 73.20 0.0 0.0 1848-01-13 06:00:00 399.778434
1848011S09080 1848 SI MM XXXX848003 -12.80 69.90 0.0 0.0 1848-01-14 06:00:00 277.969027
1848011S09080 1848 SI MM XXXX848003 -13.90 68.90 0.0 0.0 1848-01-15 06:00:00 101.396632
1848011S09080 1848 SI MM XXXX848003 -15.30 67.70 0.0 0.0 1848-01-16 06:00:00 125.586793
1848011S09080 1848 SI MM XXXX848003 -16.50 67.00 0.0 0.0 1848-01-17 06:00:00 95.001881
1848011S09080 1848 SI MM XXXX848003 -18.00 67.40 0.0 0.0 1848-01-18 06:00:00 106.873429
1848011S09080 1848 SI MM XXXX848003 -20.60 69.80 0.0 0.0 1848-01-19 06:00:00 238.075831
1848011S09080 1848 SI MM XXXX848003 -22.80 72.00 0.0 0.0 1848-01-20 06:00:00 207.336767
1848011S09080 1848 SI MM XXXX848003 -27.20 75.80 0.0 0.0 1848-01-21 06:00:00 385.730622
1848011S15057 1848 SI MM XXXX848002 -15.20 57.40 0.0 0.0 1848-01-11 06:00:00 0.000000
1848011S15057 1848 SI MM XXXX848002 -15.80 56.60 0.0 0.0 1848-01-12 06:00:00 67.448604
1848011S15057 1848 SI MM XXXX848002 -20.30 49.90 0.0 0.0 1848-01-13 06:00:00 538.368465
1848011S15057 1848 SI MM XXXX848002 -26.10 46.70 0.0 0.0 1848-01-14 06:00:00 448.948169
1848011S15057 1848 SI MM XXXX848002 -33.20 50.40 0.0 0.0 1848-01-15 06:00:00 538.033216
1848011S15057 1848 SI MM XXXX848002 -34.10 51.10 0.0 0.0 1848-01-16 06:00:00 74.027605
1848061S12075 1848 SI MM XXXX848001 -11.40 75.50 0.0 0.0 1848-03-01 06:00:00 0.000000
1848061S12075 1848 SI MM XXXX848001 -11.70 74.00 0.0 0.0 1848-03-02 06:00:00 103.562974
1848061S12075 1848 SI MM XXXX848001 -14.40 64.60 0.0 0.0 1848-03-03 06:00:00 659.073528
1848061S12075 1848 SI MM XXXX848001 -17.60 57.60 0.0 0.0 1848-03-04 06:00:00 514.367072
1848061S12075 1848 SI MM XXXX848001 -24.50 56.50 0.0 0.0 1848-03-05 06:00:00 481.646993
1848061S12075 1848 SI MM XXXX848001 -29.60 59.90 0.0 0.0 1848-03-06 06:00:00 409.455541
1848061S12075 1848 SI MM XXXX848001 -34.50 69.30 0.0 0.0 1848-03-07 06:00:00 645.354943
1851080S15063 1851 SI MM XXXX851006 -15.10 62.50 0.0 0.0 1851-03-21 06:00:00 0.000000
1851080S15063 1851 SI MM XXXX851006 -15.90 61.00 0.0 0.0 1851-03-22 06:00:00 114.066033
1851080S15063 1851 SI MM XXXX851006 -16.80 59.00 0.0 0.0 1851-03-23 06:00:00 146.351394
1851080S15063 1851 SI MM XXXX851006 -17.10 57.00 0.0 0.0 1851-03-24 06:00:00 133.705188
1851080S15063 1851 SI MM XXXX851006 -17.40 54.60 0.0 0.0 1851-03-25 06:00:00 159.603536
1851080S15063 1851 SI MM XXXX851006 -18.00 52.00 0.0 0.0 1851-03-26 06:00:00 175.963235
... ... ... ... ... ... ... ... ... ... ...
2016040S17154 2016 SP EA TATIANA -16.48 156.82 30 998 2016-02-10 18:00:00 32.401859
2016040S17154 2016 SP EA TATIANA -16.75 157.38 30 996 2016-02-11 00:00:00 41.476520
2016040S17154 2016 SP EA TATIANA -16.92 157.9 35 992 2016-02-11 06:00:00 36.314053
2016040S17154 2016 SP EA TATIANA -16.8 158.26 35 996 2016-02-11 12:00:00 25.189491
2016040S17154 2016 SP EA TATIANA -17.08 158.75 40 990 2016-02-11 18:00:00 37.698672
2016040S17154 2016 SP EA TATIANA -17.23 159.38 45 987 2016-02-12 00:00:00 42.834152
2016040S17154 2016 SP EA TATIANA -18.15 159.8 50 983 2016-02-12 06:00:00 69.269518
2016040S17154 2016 SP MM TATIANA -18.95 160 50 982 2016-02-12 12:00:00 56.766383
2016040S17154 2016 SP MM TATIANA -20.01 160.21 50 983 2016-02-12 18:00:00 74.453494
2016040S17154 2016 SP MM TATIANA -21 160.5 50 984 2016-02-13 00:00:00 70.880837
2016040S17154 2016 SP MM TATIANA -22.64 160.27 40 990 2016-02-13 06:00:00 114.189760
2016040S17154 2016 SP MM TATIANA -24.09 160.29 30 997 2016-02-13 12:00:00 100.123601
2016040S17154 2016 SP EA TATIANA -25.32 159.71 30 996 2016-02-13 18:00:00 92.389635
2016040S17154 2016 SP EA TATIANA -26.02 159.2 30 995 2016-02-14 00:00:00 57.820469
2016040S17154 2016 SP EA TATIANA -26 159 -1 -1 2016-02-14 06:00:00 12.487019
2016040S17154 2016 SP EA TATIANA -25.6 158.65 -1 -1 2016-02-14 12:00:00 35.158501
2016040S17154 2016 SP EA TATIANA -25.2 158.3 -1 -1 2016-02-14 18:00:00 35.203658
2016040S17154 2016 SP EA TATIANA -24.8 158.4 -1 -1 2016-02-15 00:00:00 28.318130
2016040S17154 2016 SP EA TATIANA -24.4 158.7 -1 -1 2016-02-15 06:00:00 33.428401
2016040S17154 2016 SP EA TATIANA -24.2 158.5 -1 -1 2016-02-15 12:00:00 18.683870
2016074S16137 2016 SP EA NONAME -15.5 137.4 -1 1007 2016-03-14 00:00:00 0.000000
2016074S16137 2016 SP EA NONAME -15.8 137.5 -1 1003 2016-03-14 06:00:00 21.754426
2016074S16137 2016 SP EA NONAME -15.9 136.9 -1 1005 2016-03-14 12:00:00 40.445752
2016074S16137 2016 SP EA NONAME -15.8 137.2 -1 1002 2016-03-14 18:00:00 21.088367
2016074S16137 2016 SP EA NONAME -15.8 137.8 -1 1003 2016-03-15 00:00:00 39.861921
2016074S16137 2016 SP EA NONAME -15.5 137.9 25 999 2016-03-15 06:00:00 21.754426
2016074S16137 2016 SP EA NONAME -15.6 138.4 25 1000 2016-03-15 12:00:00 33.968090
2016074S16137 2016 SP EA NONAME -16 139.4 25 998 2016-03-15 18:00:00 71.948208
2016074S16137 2016 SP EA NONAME -16.4 140.3 30 998 2016-03-16 00:00:00 65.754397
2016074S16137 2016 SP EA NONAME -16.55 141.1 30 999 2016-03-16 06:00:00 53.971349

196684 rows × 10 columns


In [11]:
z=w.drop(['Name','Basin','Sub_basin','Latitude','Longitude'],axis=1)

In [334]:
def converter(df):
    if 'ind' in df.columns:
        df=df.drop('ind',axis=1)
    temp=pd.concat([df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).max(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).min(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).mean(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).median(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().max(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().min(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().mean(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).diff().median(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].max(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].min(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].mean(),\
    df.set_index('time').drop('Season',axis=1).astype('float').resample('6h').interpolate().replace(0,np.nan).pct_change()[2:].median(),\
    ],axis=1)
    labels=['max','min','mean','median','dmax','dmin','dmean','dmedian','pmax','pmin','pmean','pmedian']
    temp.columns=labels
    return temp

In [315]:
z['ind']=z.index

In [316]:
converter(z.loc[z.index[0]])


Out[316]:
max min mean median dmax dmin dmean dmedian pmax pmin pmean pmedian
Wind(WMO) 0.000000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
Pres(WMO) 0.000000 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
travel 643.829223 27.109656 299.616432 260.025324 133.84765 -71.090921 15.233232 4.779678 1.234317 -0.303306 0.115549 0.03124

In [317]:
%%time
a=z.groupby('ind').apply(converter)


Wall time: 9min 55s

In [318]:
a.to_csv('a.csv')

In [319]:
b=a.unstack()

In [320]:
b.columns=[' '.join(col).strip() for col in b.columns.values]

In [321]:
del b.index.name

In [322]:
b['basin']=[w.loc[[i]]['Basin'].unique()[0] for i in b.index]

In [323]:
b['subbasin']=[w.loc[[i]]['Sub_basin'].unique()[0] for i in b.index]

In [324]:
b['year']=[pd.to_datetime(z.loc[[i]]['Season'].unique()[0],format='%Y') for i in b.index]

In [325]:
b.to_csv('b.csv')

In [2]:
b=pd.read_csv('b.csv')
b=b.set_index('Unnamed: 0')
del b.index.name

In [22]:
b.head()


Out[22]:
max Wind(WMO) max Pres(WMO) max travel min Wind(WMO) min Pres(WMO) min travel mean Wind(WMO) mean Pres(WMO) mean travel median Wind(WMO) ... pmin travel pmean Wind(WMO) pmean Pres(WMO) pmean travel pmedian Wind(WMO) pmedian Pres(WMO) pmedian travel basin subbasin year
1848011S09080 0.0 0.0 643.829223 NaN NaN 27.109656 NaN NaN 299.616432 NaN ... -0.303306 NaN NaN 0.115549 NaN NaN 0.031240 SI MM 1848-01-01 00:00:00
1848011S15057 0.0 0.0 867.023632 NaN NaN 27.155906 NaN NaN 527.931628 NaN ... -0.610441 NaN NaN 0.179515 NaN NaN 0.045130 SI MM 1848-01-01 00:00:00
1848061S12075 0.0 0.0 1061.414925 NaN NaN 41.696139 NaN NaN 690.205878 NaN ... -0.065711 NaN NaN 0.191131 NaN NaN -0.015903 SI MM 1848-01-01 00:00:00
1851080S15063 0.0 0.0 331.878643 NaN NaN 45.924842 NaN NaN 225.080577 NaN ... -0.336216 NaN NaN 0.055033 NaN NaN 0.039411 SI MM 1851-01-01 00:00:00
1851080S21060 0.0 0.0 482.626910 NaN NaN 47.257844 NaN NaN 245.336597 NaN ... 0.179359 NaN NaN 0.414176 NaN NaN 0.333333 SI MM 1851-01-01 00:00:00

5 rows × 39 columns


In [327]:
sns.stripplot(x='basin',y='max Wind(WMO)',data=b,jitter=True,size=3)
plt.show()



In [337]:
sns.swarmplot(x='min Pres(WMO)',y='mean travel',data=b.head(5000),hue='basin')
plt.show()



In [338]:
sns.violinplot(x='basin', y='dmax Wind(WMO)', data=b)
plt.show()



In [341]:
sns.violinplot(x='basin', y='max Wind(WMO)', data=b)
plt.show()



In [356]:
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[b['year']<pd.to_datetime(1950)].replace(0,np.nan))
plt.xlim(10,200)
plt.ylim(800,1100)
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[b['year']>pd.to_datetime(1950)].replace(0,np.nan))
plt.xlim(10,200)
plt.ylim(800,1100)
plt.show()



In [3]:
sns.jointplot(x='max Wind(WMO)',y='min Pres(WMO)',data=b[(b['min Pres(WMO)']>800)&\
                                                          (b['max Wind(WMO)']>10)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()



In [4]:
sns.jointplot(x='median Wind(WMO)',y='median Pres(WMO)',data=b[(b['min Pres(WMO)']>800)&\
                                                          (b['max Wind(WMO)']>10)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()



In [12]:
sns.jointplot(x='dmax Wind(WMO)',y='dmin Pres(WMO)',data=b[(b['pmin Pres(WMO)']<0.1)&\
                                                          (b['pmax Wind(WMO)']<0.5)].replace(0,np.nan),kind='hex')
#plt.xlim(10,200)
#plt.ylim(800,1100)
plt.show()



In [26]:
sns.pairplot(b[['max Wind(WMO)','mean travel']])


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-26-d986e33f3dc9> in <module>()
----> 1 sns.pairplot(b[['max Wind(WMO)','mean travel']])

C:\Anaconda2\lib\site-packages\seaborn\linearmodels.pyc in pairplot(data, hue, hue_order, palette, vars, x_vars, y_vars, kind, diag_kind, markers, size, aspect, dropna, plot_kws, diag_kws, grid_kws)
   1607     if grid.square_grid:
   1608         if diag_kind == "hist":
-> 1609             grid.map_diag(plt.hist, **diag_kws)
   1610         elif diag_kind == "kde":
   1611             diag_kws["legend"] = False

C:\Anaconda2\lib\site-packages\seaborn\axisgrid.pyc in map_diag(self, func, **kwargs)
   1346                 else:
   1347                     func(vals, color=self.palette, histtype="barstacked",
-> 1348                          **kwargs)
   1349             else:
   1350                 for k, label_k in enumerate(self.hue_names):

C:\Anaconda2\lib\site-packages\matplotlib\pyplot.pyc in hist(x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, hold, data, **kwargs)
   2956                       histtype=histtype, align=align, orientation=orientation,
   2957                       rwidth=rwidth, log=log, color=color, label=label,
-> 2958                       stacked=stacked, data=data, **kwargs)
   2959     finally:
   2960         ax.hold(washold)

C:\Anaconda2\lib\site-packages\matplotlib\__init__.pyc in inner(ax, *args, **kwargs)
   1809                     warnings.warn(msg % (label_namer, func.__name__),
   1810                                   RuntimeWarning, stacklevel=2)
-> 1811             return func(ax, *args, **kwargs)
   1812         pre_doc = inner.__doc__
   1813         if pre_doc is None:

C:\Anaconda2\lib\site-packages\matplotlib\axes\_axes.pyc in hist(self, x, bins, range, normed, weights, cumulative, bottom, histtype, align, orientation, rwidth, log, color, label, stacked, **kwargs)
   6008             # this will automatically overwrite bins,
   6009             # so that each histogram uses the same bins
-> 6010             m, bins = np.histogram(x[i], bins, weights=w[i], **hist_kwargs)
   6011             m = m.astype(float)  # causes problems later if it's an int
   6012             if mlast is None:

C:\Anaconda2\lib\site-packages\numpy\lib\function_base.pyc in histogram(a, bins, range, normed, weights, density)
    500     if mn > mx:
    501         raise ValueError(
--> 502             'max must be larger than min in range parameter.')
    503     if not np.all(np.isfinite([mn, mx])):
    504         raise ValueError(

ValueError: max must be larger than min in range parameter.

In [24]:
sns.pairplot(b[['max Wind(WMO)','basin','year','min Pres(WMO)','mean travel']],kind='reg',hue='basin')


C:\Anaconda2\lib\site-packages\numpy\lib\function_base.py:583: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= mn)
C:\Anaconda2\lib\site-packages\numpy\lib\function_base.py:584: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= mx)
Out[24]:
<seaborn.axisgrid.PairGrid at 0xe754fd0>

In [ ]:
sns.heatmap(b[['max Wind(WMO)','min Pres(WMO)','mean travel']])


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-29-36104855bd1d> in <module>()
----> 1 sns.heatmap(b[['max Wind(WMO)','min Pres(WMO)','mean travel']])

C:\Anaconda2\lib\site-packages\seaborn\matrix.pyc in heatmap(data, vmin, vmax, cmap, center, robust, annot, fmt, annot_kws, linewidths, linecolor, cbar, cbar_kws, cbar_ax, square, ax, xticklabels, yticklabels, mask, **kwargs)
    494     if square:
    495         ax.set_aspect("equal")
--> 496     plotter.plot(ax, cbar_ax, kwargs)
    497     return ax
    498 

C:\Anaconda2\lib\site-packages\seaborn\matrix.pyc in plot(self, ax, cax, kws)
    272         if axis_ticklabels_overlap(xtl):
    273             plt.setp(xtl, rotation="vertical")
--> 274         if axis_ticklabels_overlap(ytl):
    275             plt.setp(ytl, rotation="horizontal")
    276 

C:\Anaconda2\lib\site-packages\seaborn\utils.pyc in axis_ticklabels_overlap(labels)
    472     try:
    473         bboxes = [l.get_window_extent() for l in labels]
--> 474         overlaps = [b.count_overlaps(bboxes) for b in bboxes]
    475         return max(overlaps) > 1
    476     except RuntimeError:

C:\Anaconda2\lib\site-packages\matplotlib\transforms.pyc in count_overlaps(self, bboxes)
    668         """
    669         return count_bboxes_overlapping_bbox(
--> 670             self, np.atleast_3d([np.array(x) for x in bboxes]))
    671 
    672     def expanded(self, sw, sh):

KeyboardInterrupt: