This notebook outlines the data processing approach for the JOC article

To access the data and run the notebooks, you must perform the following steps:

1) Navigate to http://tiny.cc/JOC-XX-XXXX, where the XX-XXXX is the manuscript number on manuscript central.

2) Download the file, named data.tar.gz. It has the following checksum information

3) ungz and untar the folder 'data' into the mcs_future folder. These files represent post-processed data and not the entire dataset. This is due to the very large size of the three datasets, the raw derived data, and the amount of time it takes to run the training and classification. However, applying the methods of this paper to the publically available data will give you the same results. If you downloaded the entire github project, the directories for the data folder and its subfolders should be:

/MCS/mcs_future/data/raw_data

/MCS/mcs_future/data/shapefiles

/MCS/mcs_future/data/slice_data

/MCS/mcs_future/data/wrf_data


In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import sys
sys.path.append('C:/users/ahaberlie1/documents/github/MCS/')

data_dir = "../data/slice_data/"

data = {'mcs':{'OBS':None, 'CTRL':None, 'PGW':None},
        'qlcs':{'OBS':None, 'CTRL':None, 'PGW':None},
        'non_qlcs':{'OBS':None, 'CTRL':None, 'PGW':None}}

for subset in ['mcs', 'qlcs', 'non_qlcs']:
    
    filename = data_dir + "new_{}_project.csv".format(subset)
    
    df = pd.read_csv(filename)
    df['datetime'] = pd.to_datetime(df.datetime)
    df = df.set_index('datetime')
    
    for dataset in ['OBS', 'CTRL', 'PGW']:

        data[subset][dataset] = df[df.run.values==dataset].copy()

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import cartopy
import cartopy.crs as ccrs
import cartopy.io.shapereader as shpreader

def draw_states(ax):
        
    shapename = 'admin_1_states_provinces_lakes_shp'
    states_shp = shpreader.natural_earth(resolution='50m',
                                     category='cultural', name=shapename)

    for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()):
        if info.attributes['admin'] == 'United States of America':

            ax.add_geometries([state], ccrs.PlateCarree(),
                              facecolor='None', edgecolor='k')
                              
def draw_midwest(ax):
    
    shapename = "../data/shapefiles/map/midwest_outline_latlon_grids"
    shp = shpreader.Reader(shapename)
    for outline, info in zip(shp.geometries(), shp.records()):
        ax.add_geometries([outline], ccrs.PlateCarree(),
                          facecolor='None', edgecolor='k', linewidth=4)
        
def generate_view(w_lon, e_lon, n_lat, s_lat, from_proj, to_proj):

    view = plt.axes([0,0,1,1], projection=to_proj)

    view.set_extent([w_lon, e_lon, s_lat, n_lat])

    shapename = 'admin_1_states_provinces_lakes_shp'
    states_shp = shpreader.natural_earth(resolution='50m',
                                         category='cultural', name=shapename)

    for state, info in zip(shpreader.Reader(states_shp).geometries(), shpreader.Reader(states_shp).records()):
        if info.attributes['admin'] == 'United States of America':

            view.add_geometries([state], ccrs.PlateCarree(),
                              facecolor='None', edgecolor='k')
            
    return view

Draw all June - August OBS tracks


In [3]:
from netCDF4 import Dataset

#grid lats and lons
nc = Dataset("../data/wrf_data/RALconus4km_wrf_constants.nc")
lons = nc.variables['XLONG'][:,:]
lats = nc.variables['XLAT'][:,:]

In [4]:
sys.path.append('C:/users/ahaberlie/documents/github/MCS/mcs_future')

import cartopy
import cartopy.crs as ccrs
from mcs_future.utils.mapping_help import wrf_to_lon_lat
import matplotlib.pyplot as plt
from netCDF4 import Dataset
%matplotlib inline
plt.rcParams['figure.figsize'] = 10, 10

from_proj = ccrs.PlateCarree()
to_proj = ccrs.AlbersEqualArea(central_longitude=-95, central_latitude=38.0000)

view = generate_view(-120, -73, 20, 50, from_proj, to_proj)

subset = 'mcs'
dataset = 'OBS'
df_ = data[subset][dataset].copy()
df_ = df_[df_.index.month.isin([6,7,8])]

for sid, swath in df_.groupby('storm_num'):

    xp = np.array([np.mean([x1, x2]) for (x1, x2) in zip(swath.xmin.values, swath.xmax.values)])
    yp = np.array([np.mean([y1, y2]) for (y1, y2) in zip(swath.ymin.values, swath.ymax.values)])

    xp, yp = wrf_to_lon_lat(lons, lats, xp.astype(int), yp.astype(int))

    view.plot(xp, yp, 'k-', lw=0.5, transform=from_proj)
    view.set_title("Subset: " + subset + " Dataset: " + dataset)


Draw all June - August CTRL Tracks


In [5]:
import cartopy
import cartopy.crs as ccrs
from mcs_future.utils.mapping_help import wrf_to_lon_lat
import matplotlib.pyplot as plt
from netCDF4 import Dataset
%matplotlib inline
plt.rcParams['figure.figsize'] = 10, 10

from_proj = ccrs.PlateCarree()
to_proj = ccrs.AlbersEqualArea(central_longitude=-95, central_latitude=38.0000)

view = generate_view(-120, -73, 20, 50, from_proj, to_proj)

subset = 'mcs'
dataset = 'CTRL'
df_ = data[subset][dataset]
df_ = df_[df_.index.month.isin([6,7,8])]

for sid, swath in df_.groupby('storm_num'):

    xp = np.array([np.mean([x1, x2]) for (x1, x2) in zip(swath.xmin.values, swath.xmax.values)])
    yp = np.array([np.mean([y1, y2]) for (y1, y2) in zip(swath.ymin.values, swath.ymax.values)])

    if dataset == 'OBS':
        xp, yp = NOWrad_to_lon_lat(xp, yp)
    else:
        xp, yp = wrf_to_lon_lat(lons, lats, xp.astype(int), yp.astype(int))

    view.plot(xp, yp, 'k-', lw=0.5, transform=from_proj)
    view.set_title("Subset: " + subset + " Dataset: " + dataset)


Get only those tracks that are within the Midwest


In [6]:
import sys
import geopandas as gpd
from mcs_future.utils.mapping_help import get_point_subset

mw_shp_dir = "../data/shapefiles/map/"
wrf_dir = "../data/wrf_data/"
wrf_data_file = "RALconus4km_wrf_constants.nc"

mw_data = {'mcs':{'OBS':None, 'CTRL':None, 'PGW':None},
           'qlcs':{'OBS':None, 'CTRL':None, 'PGW':None},
           'non_qlcs':{'OBS':None, 'CTRL':None, 'PGW':None}}

outline = gpd.read_file(mw_shp_dir + "midwest_outline_latlon_grids.shp")

for sub_key, sub_value in data.items():
    
    for dset_key, dset_val in sub_value.items():
        mw_data[sub_key][dset_key] = get_point_subset(dset_val.copy(), 
                                                      outline, wrf_dir + wrf_data_file)

Draw OBS QLCS tracks that are within the Midwest


In [7]:
view = generate_view(-120, -73, 20, 50, from_proj, to_proj)

subset = 'qlcs'
dataset = 'OBS'
df_ = mw_data[subset][dataset].copy()
df_ = df_[df_.index.month.isin([6,7,8])]

for sid, swath in df_.groupby('storm_num'):

    xp = np.array([np.mean([x1, x2]) for (x1, x2) in zip(swath.xmin.values, swath.xmax.values)])
    yp = np.array([np.mean([y1, y2]) for (y1, y2) in zip(swath.ymin.values, swath.ymax.values)])

    xp, yp = wrf_to_lon_lat(lons, lats, xp.astype(int), yp.astype(int))

    view.plot(xp, yp, 'k-', lw=0.5, transform=from_proj)
    view.set_title("Midwest -- Subset: " + subset + " Dataset: " + dataset)
    
    draw_midwest(view)


Draw CTRL QLCS tracks that intersect Midwest


In [8]:
view = generate_view(-120, -73, 20, 50, from_proj, to_proj)

subset = 'qlcs'
dataset = 'CTRL'
df_ = mw_data[subset][dataset]
df_ = df_[df_.index.month.isin([6,7,8])]

for sid, swath in df_.groupby('storm_num'):

    xp = np.array([np.mean([x1, x2]) for (x1, x2) in zip(swath.xmin.values, swath.xmax.values)])
    yp = np.array([np.mean([y1, y2]) for (y1, y2) in zip(swath.ymin.values, swath.ymax.values)])

    if dataset == 'OBS':
        xp, yp = NOWrad_to_lon_lat(xp, yp)
    else:
        xp, yp = wrf_to_lon_lat(lons, lats, xp.astype(int), yp.astype(int))

    view.plot(xp, yp, 'k-', lw=0.5, transform=from_proj)
    view.set_title("Midwest -- Subset: " + subset + " Dataset: " + dataset)
    draw_midwest(view)


Get counts of days from Midwest


In [9]:
import datetime
from scipy.misc import imread

def calc_day_count(dframe):
    
    #Resample and calculate how many rows each day has
    days = dframe.resample('D').count()
    
    #Give me the count of days with at least one row
    return np.sum(days['storm_num'].values > 0)

Example of how Days vs. Event Counts Calculated


In [233]:
#Read in all observed qlcs events
dataset = 'OBS'
subset = 'qlcs'

#Read in a copy of this dataset.  This is important because
#we will be modifying the index (a datetime) to simplify the
#Day Count calculation
a = mw_data[subset][dataset].copy()

#Add 6 hours to the datetime so 18z is now centered at 00z
a.index = a.index + datetime.timedelta(hours=6)

#set year, month, and range of days
year = 2010
month = 6

#look at 
day_range = np.array(list(range(10, 16)))

#make temporary month/year dataframe
b = a[(a.index.year==year) & ((a.index.month==month)) & (a.index.day.isin(day_range))]

#try to select 8 or fewer days or this won't work that well
colors = ['b', 'c', 'g', 'k', 'm', 'r', 'grey', 'y']

lines = []
labels = []

#set up view centered on midwest
view = generate_view(-105, -80, 30, 50, from_proj, to_proj)

#plot tracks by day
for color, day in zip(colors, day_range):

    df_ = b[b.index.day==day]
    for sid, sw in df_.groupby('storm_num'):

        if len(sw) > 0:
            xp = np.array([np.mean([x1, x2]) for (x1, x2) in zip(sw.xmin.values, sw.xmax.values)])
            yp = np.array([np.mean([y1, y2]) for (y1, y2) in zip(sw.ymin.values, sw.ymax.values)])

            xp, yp = wrf_to_lon_lat(lons, lats, xp.astype(int), yp.astype(int))
            
            view.plot(xp, yp, 'x', markersize=15, color=color, transform=from_proj)

            view.plot(xp, yp, '-', lw=3, color=color, transform=from_proj, 
                      label=subset.upper() + " " + sid + " " + str(year) + "-" + str(month).zfill(2) + "-" + str(day).zfill(2))

    view.set_title(subset.upper() + " tracks (colored by day)")

    draw_midwest(view)

c = a[(a.index.year==year) & (a.index.month==month) & (a.index.day.isin(day_range))]

print(subset + " day counts:", calc_day_count(c), "; percent of days examined:", calc_day_count(c) / len(day_range))
print(subset + " swath counts:", c.storm_num.nunique())

view.legend(loc=3)


qlcs day counts: 6 ; percent of days examined: 1.0
qlcs swath counts: 10
Out[233]:
<matplotlib.legend.Legend at 0x1482ebc2a90>

In [199]:
import numpy as np

for sub_key in ['mcs', 'qlcs', 'non_qlcs']:
    
    for dset_key in ['OBS', 'CTRL', 'PGW']:
        
        #copy the desired dataset and subset
        dset_val = mw_data[sub_key][dset_key].copy()
        
        #Get running count of slices per storm
        dset_val['storm_run'] = dset_val.groupby(['storm_num']).cumcount()
        
        #Shift 18z to 00z for analysis purposes
        dset_val.index = dset_val.index + datetime.timedelta(hours=6)
        
        total_count = calc_day_count(dset_val)
        
        seasons = {'djf':(12, 1, 2), 'mam':(3, 4, 5), 'jja':(6, 7, 8), 'son':(9, 10, 11)}
        
        for season_id, season_vals in seasons.items():
            
            dset_ = dset_val[dset_val.index.month.isin(season_vals)].copy()
            perc = "{0:.2f}".format(100*(calc_day_count(dset_) / total_count)) 
            seasons[season_id] = perc
        
        print(sub_key, dset_key, "Total Count:", total_count, 
              ", % DJF", seasons['djf'], ', % MAM', seasons['mam'], 
              ', % JJA', seasons['jja'], ', % SON', seasons['son'], '\n')


mcs OBS Total Count: 1800 , % DJF 3.61 , % MAM 28.89 , % JJA 49.39 , % SON 18.11 

mcs CTRL Total Count: 1738 , % DJF 4.32 , % MAM 33.37 , % JJA 46.66 , % SON 15.65 

mcs PGW Total Count: 1850 , % DJF 6.76 , % MAM 35.14 , % JJA 38.59 , % SON 19.51 

qlcs OBS Total Count: 1200 , % DJF 3.67 , % MAM 28.25 , % JJA 52.42 , % SON 15.67 

qlcs CTRL Total Count: 743 , % DJF 3.90 , % MAM 38.49 , % JJA 44.15 , % SON 13.46 

qlcs PGW Total Count: 848 , % DJF 6.49 , % MAM 43.87 , % JJA 33.37 , % SON 16.27 

non_qlcs OBS Total Count: 1180 , % DJF 2.46 , % MAM 28.73 , % JJA 51.10 , % SON 17.71 

non_qlcs CTRL Total Count: 1445 , % DJF 3.60 , % MAM 31.76 , % JJA 49.69 , % SON 14.95 

non_qlcs PGW Total Count: 1584 , % DJF 6.25 , % MAM 34.09 , % JJA 40.66 , % SON 19.00 

Show monthly percentages of events


In [12]:
from calendar import month_name as mname

col_names = ['key'] + [str(x) for x in range(1, 13)]

months = {x:[] for x in col_names}

for sub_key in ['mcs', 'qlcs', 'non_qlcs']:
    for dset_key in ['OBS', 'CTRL', 'PGW']:

        dset_val = mw_data[sub_key][dset_key].copy()
        
        #Shift 18z to 00z for analysis purposes
        dset_val.index = dset_val.index + datetime.timedelta(hours=6)
        
        #Total count of event days in each subset / dataset key pair
        total_count = calc_day_count(dset_val)

        for month in list(range(1, 13)):

            #only data from the particular month
            dset_ = dset_val[dset_val.index.month==month].copy()
            
            #append the percentage to the appropriate dictionary location
            months[str(month)].append(100*(calc_day_count(dset_) / total_count))
            
        
        months['key'].append(sub_key + "_" + dset_key)

df = pd.DataFrame.from_dict(months)
df = df.set_index('key')

for gid, group in df.groupby(df.index):
    
    max_idx = mname[int(group.idxmax(axis=1).values[0])]
    min_idx = mname[int(group.idxmin(axis=1).values[0])]
    
    max_val = group.max(axis=1).values[0]
    min_val = group.min(axis=1).values[0]
    
    print(gid, "\nMax Month:", max_idx, "\nMax Percentage:", "{:0.2f}".format(max_val), "%",
               "\nMin Month:", min_idx, "\nMin Percentage:", "{:0.2f}".format(min_val), "%", 
               "\nMax Percentage Diff", "{:0.2f}".format(max_val - min_val), "%\n")
    
df[col_names[1:]]


mcs_CTRL 
Max Month: June 
Max Percentage: 17.61 % 
Min Month: January 
Min Percentage: 1.27 % 
Max Percentage Diff 16.34 %

mcs_OBS 
Max Month: July 
Max Percentage: 16.83 % 
Min Month: December 
Min Percentage: 1.11 % 
Max Percentage Diff 15.72 %

mcs_PGW 
Max Month: June 
Max Percentage: 16.76 % 
Min Month: January 
Min Percentage: 1.89 % 
Max Percentage Diff 14.86 %

non_qlcs_CTRL 
Max Month: June 
Max Percentage: 18.34 % 
Min Month: January 
Min Percentage: 0.90 % 
Max Percentage Diff 17.44 %

non_qlcs_OBS 
Max Month: July 
Max Percentage: 18.81 % 
Min Month: February 
Min Percentage: 0.59 % 
Max Percentage Diff 18.22 %

non_qlcs_PGW 
Max Month: June 
Max Percentage: 16.79 % 
Min Month: January 
Min Percentage: 1.77 % 
Max Percentage Diff 15.03 %

qlcs_CTRL 
Max Month: June 
Max Percentage: 25.44 % 
Min Month: December 
Min Percentage: 1.08 % 
Max Percentage Diff 24.36 %

qlcs_OBS 
Max Month: June 
Max Percentage: 19.25 % 
Min Month: December 
Min Percentage: 0.92 % 
Max Percentage Diff 18.33 %

qlcs_PGW 
Max Month: June 
Max Percentage: 21.11 % 
Min Month: January 
Min Percentage: 1.89 % 
Max Percentage Diff 19.22 %

Out[12]:
1 2 3 4 5 6 7 8 9 10 11 12
key
mcs_OBS 1.333333 1.166667 5.222222 9.500000 14.166667 16.722222 16.833333 15.833333 10.333333 5.277778 2.500000 1.111111
mcs_CTRL 1.265823 1.611047 6.271577 11.449942 15.650173 17.606444 15.937860 13.118527 7.364787 4.833142 3.452244 1.438435
mcs_PGW 1.891892 2.432432 7.675676 11.891892 15.567568 16.756757 12.054054 9.783784 8.324324 6.918919 4.270270 2.432432
qlcs_OBS 1.333333 1.416667 4.500000 8.833333 14.916667 19.250000 16.916667 16.250000 8.666667 4.750000 2.250000 0.916667
qlcs_CTRL 1.345895 1.480485 6.729475 11.843876 19.919246 25.437416 9.959623 8.748318 4.979812 5.652759 2.826380 1.076716
qlcs_PGW 1.886792 2.476415 8.254717 15.448113 20.165094 21.108491 7.311321 4.952830 6.014151 6.132075 4.127358 2.122642
non_qlcs_OBS 0.932203 0.593220 4.661017 9.661017 14.406780 15.593220 18.813559 16.694915 11.016949 4.745763 1.949153 0.932203
non_qlcs_CTRL 0.899654 1.384083 5.467128 10.657439 15.640138 18.339100 17.301038 14.048443 7.889273 3.875433 3.183391 1.314879
non_qlcs_PGW 1.767677 2.209596 6.944444 11.489899 15.656566 16.792929 13.194444 10.669192 8.522727 6.628788 3.851010 2.272727

Draw cumulative count plots


In [191]:
import datetime
from pandas.plotting import register_matplotlib_converters as pdtc
import matplotlib.dates as mdates
import warnings
import matplotlib.cbook
warnings.filterwarnings("ignore",category=matplotlib.cbook.mplDeprecation)
pdtc()
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams['figure.figsize'] = 15, 30
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['axes.labelsize'] = 20

month_doy = {'Jan':1, 'Feb':32, 'Mar':60, 
             'Apr':91, 'May':121, 'Jun':152, 
             'Jul':182, 'Aug':213, 'Sep':244, 
             'Oct':274, 'Nov':305, 'Dec':335}

ax = plt.subplot(1, 1, 1)

drange = pd.date_range(start=datetime.datetime(2000, 10, 1), end=datetime.datetime(2013, 12, 31), freq='D')

colors = [plt.cm.plasma(x/3) for x in range(0, 3)]

def draw_cumu_labels(ax):
    
    ax.legend(loc=4, prop={'size': 20})
    
    ax.axvspan(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 3, 1), facecolor='0.5', alpha=0.1)
    ax.axvspan(datetime.datetime(2012, 6, 1), datetime.datetime(2012, 9, 1), facecolor='0.5', alpha=0.1)
    ax.axvspan(datetime.datetime(2012, 12, 1), datetime.datetime(2012, 12, 31), facecolor='0.5', alpha=0.1)
    ax.grid()
    ax.set_ylabel("Cumulative Count")
    return ax

aug_obs = 0
may_obs = 0
feb_obs = 0

aug_ctrl = 0
may_ctrl = 0
feb_ctrl = 0

for subplot, subset in enumerate(['mcs', 'qlcs', 'non_qlcs']):
    ctrl_ = None
    obs_ = None
    maxyear = 0
    for dset_num, dataset in enumerate(['OBS', 'CTRL', 'PGW']):
        
        ax = plt.subplot(3, 1, subplot+1)
        
        #Get run and dataset from Midwest subset
        df_ = mw_data[subset][dataset].copy()
        df_.index = df_.index + datetime.timedelta(hours=6)
        df_ = df_[~((df_.index.year == 2000) | (df_.index.year == 2013))]
    
        years = df_.groupby(df_.index.year)

        dset = []

        for yid, year in years:

            a = year.resample('D').count()
            rng = pd.DataFrame(index=pd.date_range(start=str(yid) + '-01-01', end=str(yid) + '-12-31', freq='1D'))
            rng = rng.join(a[['storm_num']])
            rng = rng.fillna(0)
            rng['storm_num'] = 1*(rng['storm_num'].values>0)

            dset.append(rng.cumsum())

        dset = pd.concat(dset)
        dset['doy'] = dset.index.dayofyear

        p25 = []
        p75 = []
        mean = []
        for did, doy in dset.groupby('doy'):
            p25.append(np.percentile(doy.storm_num, 25, interpolation='linear'))
            p75.append(np.percentile(doy.storm_num, 75, interpolation='linear'))
            mean.append(np.mean(doy.storm_num))

        mean[-1] = mean[-2]
        p25[-1] = p25[-2]
        p75[-1] = p75[-2]
        
        ax.fill_between(rng.index.to_pydatetime(), p25, p75, color=colors[dset_num], alpha=0.2)
        ax.plot(rng.index.to_pydatetime(), mean, '-', color=colors[dset_num], linewidth=4, label=dataset)
        draw_cumu_labels(ax)
        
        if dataset == 'OBS':
            aug_obs = dset[(dset.index.month==8) & (dset.index.day==31)]['storm_num'].mean()
            
            feb_obs = []
            for year in range(2001, 2013):

                if year not in [2004, 2008, 2012]:
                    feb_obs.append(dset[(dset.index.year==year) & 
                                        (dset.index.month==2) & 
                                        (dset.index.day==28)]['storm_num'].values[0])
                else:
                    feb_obs.append(dset[(dset.index.year==year) & 
                                        (dset.index.month==2) & 
                                        (dset.index.day==29)]['storm_num'].values[0])
                
            feb_obs = np.mean(feb_obs)
                
            may_obs = dset[(dset.index.month==5) & (dset.index.day==31)]['storm_num'].mean()
            
        elif dataset == 'CTRL':
            aug_ctrl = dset[(dset.index.month==8) & (dset.index.day==31)]['storm_num'].mean()
            
            feb_ctrl = []
            for year in range(2001, 2013):

                if year not in [2004, 2008, 2012]:
                    feb_ctrl.append(dset[(dset.index.year==year) & 
                                        (dset.index.month==2) & 
                                        (dset.index.day==28)]['storm_num'].values[0])
                else:
                    feb_ctrl.append(dset[(dset.index.year==year) & 
                                        (dset.index.month==2) & 
                                        (dset.index.day==29)]['storm_num'].values[0])
                
            feb_ctrl = np.mean(feb_ctrl)
                
            may_ctrl = dset[(dset.index.month==5) & (dset.index.day==31)]['storm_num'].mean()

    jan_sep_dif_obs = aug_obs
    mar_may_dif_obs = may_obs - feb_obs
    aug_jun_dif_obs = aug_obs - may_obs
    
    jan_sep_dif_ctrl = aug_ctrl
    mar_may_dif_ctrl = may_ctrl - feb_ctrl
    aug_jun_dif_ctrl = aug_ctrl - may_ctrl
    
    print("JJA OBS: ", subset, aug_jun_dif_obs)
    print("JJA CTRL: ", subset, aug_jun_dif_ctrl)
    
    print("Jan-Sept OBS - CTRL Diff for:", subset, jan_sep_dif_obs - jan_sep_dif_ctrl)
    print("MAM OBS - CTRL Diff for:", subset, mar_may_dif_obs - mar_may_dif_ctrl)
    print("JJA OBS - CTRL Diff for:", subset, aug_jun_dif_obs - aug_jun_dif_ctrl)
        
    ax.set_xlim(rng.index[0], rng.index[-1])
    ax.set_ylabel("Cumulative " + subset.upper() + " Days", fontsize=20)
    locator = mdates.MonthLocator()
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    
plt.savefig("Figure1.tif", dpi=400, bbox_inches='tight')


3.5
JJA OBS:  mcs 69.33333333333334
JJA CTRL:  mcs 61.49999999999999
Jan-Sept OBS - CTRL Diff for: mcs 3.333333333333343
MAM OBS - CTRL Diff for: mcs -4.416666666666664
JJA OBS - CTRL Diff for: mcs 7.83333333333335
1.5
JJA OBS:  qlcs 49.08333333333333
JJA CTRL:  qlcs 25.25
Jan-Sept OBS - CTRL Diff for: qlcs 29.25
MAM OBS - CTRL Diff for: qlcs 4.416666666666668
JJA OBS - CTRL Diff for: qlcs 23.83333333333333
2.25
JJA OBS:  non_qlcs 47.16666666666667
JJA CTRL:  non_qlcs 54.33333333333333
Jan-Sept OBS - CTRL Diff for: non_qlcs -16.916666666666657
MAM OBS - CTRL Diff for: non_qlcs -8.916666666666668
JJA OBS - CTRL Diff for: non_qlcs -7.166666666666657

Calculate hourly percentages (JJA)


In [16]:
col_names = ['key'] + [str(x) for x in range(0, 24)]

hours = {x:[] for x in col_names}

for sub_key in ['mcs', 'qlcs', 'non_qlcs']:
    for dset_key in ['OBS', 'CTRL', 'PGW']:

        dset_val = mw_data[sub_key][dset_key].copy()
        dset_val = dset_val[dset_val.index.month.isin([6,7,8])]
        total_count = 1196

        for hr in list(range(0, 24)):
            
            dset_ = dset_val[dset_val.index.hour==hr]
            
            hours[str(hr)].append(100*(calc_day_count(dset_) / total_count))
            
        hours['key'].append(sub_key + "_" + dset_key)

df = pd.DataFrame.from_dict(hours)
df = df.set_index('key')

for gid, group in df.groupby(df.index):
    
    max_idx = group.idxmax(axis=1).values[0]
    min_idx = group.idxmin(axis=1).values[0]
    
    max_val = group.max(axis=1).values[0]
    min_val = group.min(axis=1).values[0]
    
    print(gid, "\nMax:", max_idx.zfill(2), "z - Max %", max_val, 
               "\nMin:", min_idx.zfill(2), "z - Min %", min_val, '\nMax % Diff', max_val - min_val, '\n')
df


mcs_CTRL 
Max: 03 z - Max % 39.046822742474916 
Min: 17 z - Min % 16.806020066889634 
Max % Diff 22.240802675585282 

mcs_OBS 
Max: 07 z - Max % 47.82608695652174 
Min: 19 z - Min % 18.645484949832774 
Max % Diff 29.180602006688968 

mcs_PGW 
Max: 03 z - Max % 31.605351170568564 
Min: 17 z - Min % 17.224080267558527 
Max % Diff 14.381270903010037 

non_qlcs_CTRL 
Max: 03 z - Max % 27.675585284280935 
Min: 17 z - Min % 11.2876254180602 
Max % Diff 16.387959866220733 

non_qlcs_OBS 
Max: 08 z - Max % 19.39799331103679 
Min: 19 z - Min % 7.441471571906355 
Max % Diff 11.956521739130434 

non_qlcs_PGW 
Max: 03 z - Max % 23.662207357859533 
Min: 17 z - Min % 11.538461538461538 
Max % Diff 12.123745819397994 

qlcs_CTRL 
Max: 03 z - Max % 16.722408026755854 
Min: 16 z - Min % 6.270903010033445 
Max % Diff 10.45150501672241 

qlcs_OBS 
Max: 07 z - Max % 33.11036789297659 
Min: 17 z - Min % 12.290969899665551 
Max % Diff 20.81939799331104 

qlcs_PGW 
Max: 05 z - Max % 13.461538461538462 
Min: 17 z - Min % 6.438127090301003 
Max % Diff 7.023411371237459 

Out[16]:
0 1 10 11 12 13 14 15 16 17 ... 21 22 23 3 4 5 6 7 8 9
key
mcs_OBS 34.448161 38.377926 43.311037 40.301003 37.625418 33.779264 30.267559 25.836120 22.324415 20.066890 ... 24.749164 28.177258 31.354515 41.889632 44.565217 45.484950 46.822742 47.826087 46.822742 44.732441
mcs_CTRL 35.953177 38.628763 26.086957 25.334448 22.826087 20.903010 18.979933 17.391304 17.307692 16.806020 ... 26.421405 28.344482 33.528428 39.046823 37.374582 35.033445 34.531773 32.525084 30.769231 27.926421
mcs_PGW 28.929766 30.016722 26.923077 24.581940 24.163880 23.244147 21.655518 19.481605 18.645485 17.224080 ... 20.986622 24.665552 26.337793 31.605351 31.270903 30.685619 29.347826 28.177258 27.257525 27.424749
qlcs_OBS 24.080268 27.675585 28.511706 25.167224 22.575251 20.234114 17.558528 14.966555 12.876254 12.290970 ... 15.301003 18.561873 21.571906 30.183946 32.023411 32.274247 33.026756 33.110368 32.274247 30.183946
qlcs_CTRL 14.464883 15.886288 10.869565 9.782609 8.779264 7.608696 6.772575 6.354515 6.270903 6.438127 ... 9.113712 10.284281 12.959866 16.722408 16.220736 15.635452 14.548495 14.130435 13.043478 11.872910
qlcs_PGW 10.702341 11.287625 11.454849 10.200669 9.197324 8.612040 8.444816 7.190635 7.107023 6.438127 ... 7.107023 8.026756 10.200669 12.374582 12.709030 13.461538 12.959866 13.127090 13.377926 12.541806
non_qlcs_OBS 12.959866 14.966555 18.060201 18.561873 17.642140 15.384615 14.632107 12.374582 10.200669 8.612040 ... 11.204013 11.956522 12.207358 16.137124 17.558528 17.725753 18.561873 19.230769 19.397993 18.394649
non_qlcs_CTRL 25.752508 27.508361 17.391304 17.140468 15.468227 14.464883 13.043478 11.956522 11.705686 11.287625 ... 19.397993 21.153846 23.996656 27.675585 26.421405 24.247492 23.745819 21.488294 20.150502 18.227425
non_qlcs_PGW 21.070234 22.240803 18.143813 17.056856 17.391304 16.471572 14.632107 13.545151 12.290970 11.538462 ... 15.301003 18.311037 18.394649 23.662207 22.742475 21.237458 19.648829 17.809365 17.391304 18.143813

9 rows × 24 columns

Calculate hourly min/max differences in Days per year (JJA)


In [198]:
shrs = [str(x) for x in range(0, 24)]
col_names = ['subset', 'dset'] + shrs

hours = {x:[] for x in col_names}

for sub_key in ['mcs', 'qlcs', 'non_qlcs']:
    for dset_key in ['OBS', 'CTRL', 'PGW']:

        dset_val = mw_data[sub_key][dset_key].copy()
        dset_val = dset_val[dset_val.index.month.isin([6,7,8])].copy()
        total_count = 1196

        for hr in list(range(0, 24)):
            
            dset_ = dset_val[dset_val.index.hour==hr].copy()
            
            hours[str(hr)].append(calc_day_count(dset_))
            
        hours['subset'].append(sub_key)
        hours['dset'].append(dset_key)

df = pd.DataFrame.from_dict(hours)

for sset in ['mcs', 'qlcs', 'non_qlcs']:
    
    obs = df[(df.dset=='OBS') & (df.subset==sset)]
    ctrl = df[(df.dset=='CTRL') & (df.subset==sset)]
    
    diff = np.abs(obs[shrs].values - ctrl[shrs].values)
    
    max_idx = np.argmax(diff)
    min_idx = np.argmin(diff)
    
    max_val = np.max(diff) / 13
    min_val = np.min(diff) / 13
    
    print(sset, "\nMax:", max_idx, "z - Max ", max_val, 
               "\nMin:", min_idx, "z - Min ", min_val)

print("\nTotals")
print("\n", df[col_names[:10]])
print("\n", df[col_names[:2] + col_names[10:18]])
print("\n", df[col_names[:2] + col_names[18:]])


mcs 
Max: 10 z - Max  15.846153846153847 
Min: 22 z - Min  0.15384615384615385
qlcs 
Max: 8 z - Max  17.692307692307693 
Min: 19 z - Min  5.230769230769231
non_qlcs 
Max: 0 z - Max  11.76923076923077 
Min: 9 z - Min  0.15384615384615385

Totals

      subset  dset    0    1    2    3    4    5    6    7
0       mcs   OBS  412  459  488  501  533  544  560  572
1       mcs  CTRL  430  462  466  467  447  419  413  389
2       mcs   PGW  346  359  367  378  374  367  351  337
3      qlcs   OBS  288  331  343  361  383  386  395  396
4      qlcs  CTRL  173  190  190  200  194  187  174  169
5      qlcs   PGW  128  135  144  148  152  161  155  157
6  non_qlcs   OBS  155  179  197  193  210  212  222  230
7  non_qlcs  CTRL  308  329  326  331  316  290  284  257
8  non_qlcs   PGW  252  266  271  283  272  254  235  213

      subset  dset    8    9   10   11   12   13   14   15
0       mcs   OBS  560  535  518  482  450  404  362  309
1       mcs  CTRL  368  334  312  303  273  250  227  208
2       mcs   PGW  326  328  322  294  289  278  259  233
3      qlcs   OBS  386  361  341  301  270  242  210  179
4      qlcs  CTRL  156  142  130  117  105   91   81   76
5      qlcs   PGW  160  150  137  122  110  103  101   86
6  non_qlcs   OBS  232  220  216  222  211  184  175  148
7  non_qlcs  CTRL  241  218  208  205  185  173  156  143
8  non_qlcs   PGW  208  217  217  204  208  197  175  162

      subset  dset   16   17   18   19   20   21   22   23
0       mcs   OBS  267  240  238  223  251  296  337  375
1       mcs  CTRL  207  201  218  246  273  316  339  401
2       mcs   PGW  223  206  206  212  224  251  295  315
3      qlcs   OBS  154  147  147  149  161  183  222  258
4      qlcs  CTRL   75   77   75   81   85  109  123  155
5      qlcs   PGW   85   77   80   77   82   85   96  122
6  non_qlcs   OBS  122  103  103   89  109  134  143  146
7  non_qlcs  CTRL  140  135  155  182  208  232  253  287
8  non_qlcs   PGW  147  138  140  149  159  183  219  220

Draw hourly counts of events


In [20]:
plt.rcParams['figure.figsize'] = 15, 30
plt.rcParams['xtick.labelsize'] = 20
plt.rcParams['ytick.labelsize'] = 20
plt.rcParams['axes.labelsize'] = 20

def draw_hour_labels(ax, lab):
    ax.set_xlim(0, 23)
    ax.legend(prop={'size': 20})

    ax.set_ylabel(lab + " Days Per Hour")
    ax.set_xlabel("Hour of Day (UTC)")
    ax.set_xticks(list(range(0, 24)))
    #ax.set_title(lab + " Swath Count by Hour of Day", fontsize=20)
    ax.grid()

colors = [plt.cm.plasma(x/3) for x in range(0, 3)]

for subplot, subset in enumerate(['mcs', 'qlcs', 'non_qlcs']):
    for dset_num, dataset in enumerate(['OBS', 'CTRL', 'PGW']):
        
        ax = plt.subplot(3, 1, subplot+1)
        
        df_ = mw_data[subset][dataset].copy()
        df_ = df_[df_.index.month.isin([6,7,8])].copy()

        years = df_.groupby(df_.index.year)
        year_data = np.zeros((13, 24), dtype=int)
        
        for yid, year in years:
            hour_data = []
            for hid in range(0, 24):

                hour = year[year.index.hour==hid]
                hour_data.append(calc_day_count(hour))
                
            year_data[yid-2001, :] = np.array(hour_data)

        p25 = np.percentile(year_data, 25, axis=0)
        p75 = np.percentile(year_data, 75, axis=0)
        plt.fill_between(list(range(24)), p25, p75, color=colors[dset_num], alpha=0.2)
        plt.plot(np.mean(year_data, axis=0), '-', color=colors[dset_num], linewidth=4, label=dataset)
        
        ax = draw_hour_labels(ax, subset.upper())
        
plt.savefig("Figure2.tif", dpi=400, bbox_inches='tight')