Please see previous notebook "N-Year Storms" to see how N-Year storms were calculated, and a little more information about how often these occur. Building off this, this notebook will break the time into buckets, and use that to see if these storms are happening more or less frequently
In [118]:
from __future__ import absolute_import, division, print_function, unicode_literals
import pandas as pd
from datetime import datetime, timedelta
import operator
import matplotlib.pyplot as plt
import numpy as np
from collections import namedtuple
%matplotlib inline
In [9]:
n_year_storms = pd.read_csv('data/n_year_storms_ohare_noaa.csv')
n_year_storms['start_time'] = pd.to_datetime(n_year_storms['start_time'])
n_year_storms['end_time'] = pd.to_datetime(n_year_storms['end_time'])
n_year_storms = n_year_storms.set_index('start_time')
n_year_storms.head()
Out[9]:
In [10]:
# Based on previous notebooks, we should have 83 n-year events in this timeframe.
len(n_year_storms)
Out[10]:
In [15]:
ns_by_year = {year: {n: 0 for n in list(n_year_storms['n'].unique())} for year in range(1970, 2017)}
for index, event in n_year_storms.iterrows():
ns_by_year[event['year']][int(event['n'])] += 1
ns_by_year = pd.DataFrame(ns_by_year).transpose()
ns_by_year.head()
Out[15]:
In [16]:
# Double check that we still have 83 events
ns_by_year.sum().sum()
Out[16]:
Looking at the "N-Year Storms" notebook, it is pretty obvious when the big storms are happening -- for the most part more recently. However, there are so many 1 and 2 year events, that it is tough to tell when they are happening. Let's create a graph with only those events.
In [21]:
all_years = [i for i in range(1970, 2016)]
small_events = ns_by_year[(ns_by_year[1] > 0) | (ns_by_year[2] > 0)][[1,2]]
small_events = small_events.reindex(all_years, fill_value=0)
small_events.columns = [str(n) + '-year' for n in small_events.columns]
small_events.head()
Out[21]:
In [29]:
# Number of 1 and 2 year events per year
small_events.cumsum().plot(kind='line', stacked=False, title="1- and 2-year Storms by Year - Cumulative Total over Time")
Out[29]:
In [89]:
# Divide into buckets using resampling
n_year_storms.resample('15A',how={'year':'count'})
Out[89]:
In [81]:
# Using the resample method is not really want giving me what I want. Do this brute force
# TODO: Play around with resample to do this more efficiently
In [129]:
# I'd like to try and be a little more explicit in how I'm breaking this up
def find_bucket(year):
if year < 1986:
return '1970-1985'
elif year <= 2000:
return '1986-2000'
else:
return '2001-2015'
ns_by_year['year'] = ns_by_year.index.values
ns_by_year['bucket3'] = ns_by_year['year'].apply(find_bucket)
ns_by_year = ns_by_year.drop('year', 1)
ns_by_year.head()
Out[129]:
In [131]:
bucket3 = ns_by_year.groupby('bucket3').sum()
bucket3.head()
Out[131]:
In [133]:
# Make sure there are 83 storms
bucket3.sum().sum().sum()
Out[133]:
In [134]:
bucket3.plot(kind='bar', stacked=True, title="N-Year Storms across 3 time intervals")
Out[134]:
In [136]:
ns_by_year.head()
Out[136]:
In [137]:
def find_bucket(year):
if year < 1976:
return "1970-1975"
elif year < 1981:
return '1976-1980'
elif year < 1986:
return '1981-1985'
elif year < 1991:
return '1986-1990'
elif year < 1996:
return '1991-1995'
elif year < 2001:
return '1996-2000'
elif year < 2006:
return '2001-2005'
elif year < 2011:
return '2006-2010'
else:
return '2011-2015'
ns_by_year['year'] = ns_by_year.index.values
ns_by_year['bucket8'] = ns_by_year['year'].apply(find_bucket)
ns_by_year = ns_by_year.drop('year', 1)
ns_by_year.head()
Out[137]:
In [138]:
bucket8 = ns_by_year.drop('bucket3',1).groupby('bucket8').sum()
bucket8.head()
Out[138]:
In [139]:
bucket8.sum().sum().sum()
Out[139]:
In [140]:
bucket8.plot(kind='bar', stacked=True, title="N-Year Storms across 8 Intervals")
Out[140]:
In [ ]: