Download swath performance data

Download data.tar.gz from https://tiny.cc/ + the full manuscript ID for part 1 (case sensitive), and untar and ungzip this into the directory "MCS/mcs" and make sure the output folder is "data" and it has a folder named "track_data". Examine the code to see the proper location if you are getting an error (i.e., "../data/track_data/")


In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import pickle
from scipy.stats import ks_2samp

plt.style.use('seaborn-poster')
plt.rcParams['figure.figsize'] = 12,15

def draw_boxes(ax, data1, data2):
    
    meanpointprops = dict(marker='.', markeredgecolor='black',
                      markerfacecolor='black')

    bplot = ax.boxplot(data1, showmeans=True, meanprops=meanpointprops, vert=False)

    ax.plot(data2, range(1,49), '.', color='grey', markersize=8)

    plt.setp(bplot['medians'], color='black')

    [[item.set_color('black') for item in bplot['means']]]
    ax.set_yticks([])
    ax.set_yticklabels([])
    
    return ax

def draw_labels(ax, ssr_x, crsr_x, xlab, df):
    for i in range(0, 51, 8):

        ax.axhspan((i-4)+.5, i+.5, color='k', alpha=0.1)

    ax.plot([-150, 150], [12.5, 12.5], color='k',linewidth=2)
    ax.plot([-150, 150], [24.5, 24.5], color='k',linewidth=2)
    ax.plot([-150, 150], [36.5, 36.5], color='k',linewidth=2)

    CRSR = df['CRSR'].values
    SSR = df['SSR'].values
    P = df['MCS_proba'].values

    for i in range(len(CRSR)):
        ax.annotate("CRSR: " +str(CRSR[i]).zfill(2) + " km",
                     xy=(crsr_x, i+.75), fontsize=12)

    SSR = [192, 96, 48] * 4

    for n, s in enumerate(SSR):

        plt.annotate("SSR: " + str(s) + " km", xy=(ssr_x, (n*4)+2.5), fontsize=15)

    locs = [6, 18, 30, 42]

    labs = ["P$_{mcs}$=\n" + l for l in ['0.95', '0.90', '0.50', '0.00']]
    
    ax.set_xlabel(xlab)
    ax.set_yticks(locs)
    ax.set_yticklabels(labs)
    ax.grid(which='major', linestyle='-', linewidth='0.15', color='black',zorder=0)
    plt.gca().yaxis.grid(False)   
    return ax

def load_data(year, var_name):
    
    dr = "../data/track_data/sum_stats/" + str(year) + "/" + str(year) + "_"

    df_un = pickle.load(open(dr + var_name + "_unmatched_master.pkl", 'rb'))
    df_re = pickle.load(open(dr + var_name + "_rematched_master.pkl", 'rb'))

    df_un = df_un.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=False)
    df_re = df_re.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=False)
    
    return df_un, df_re

def sig_diffs(unmatched, rematched):
    
    data1 = unmatched['Distribution'].values
    data2 = rematched['Distribution'].values
    
    crsr = rematched['CRSR'].values
    ssr = rematched['SSR'].values
    prob = rematched['MCS_proba'].values
    
    d = {'CRSR':[], 'SSR':[], 'MCS_proba':[], 'ks_stat':[], 'p_val':[], 'Significant': []}
    
    for idx, (x1, x2) in enumerate(zip(data1, data2)):
    
        ks_s, p = ks_2samp(x1, x2)

        d['CRSR'].append(crsr[idx])
        d['SSR'].append(ssr[idx])
        d['MCS_proba'].append(prob[idx])
        d['ks_stat'].append(ks_s)
        d['p_val'].append(p)
        
        if p < 0.001:
            d['Significant'].append("YES")
        if p >= 0.001:
            d['Significant'].append("--")
    
    return d

df_un, df_re = load_data(2015, 'mean_dur')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 13.5, 16.6, "Swath Duration (hrs)", df_re)

ax.set_xlim(0, 19)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 1.69168050653 CRSR: 48 SSR: 96 MCS_Proba: 0.9
Min Diff (hrs): 0.909192022141 CRSR: 24 SSR: 48 MCS_Proba: 0.0
    CRSR  SSR  MCS_proba   ks_stat         p_val Significant
47     6   48       0.00  0.184357  1.404588e-50         YES
46    12   48       0.00  0.178077  7.337071e-59         YES
45    24   48       0.00  0.165432  1.536446e-64         YES
44    48   48       0.00  0.171573  2.216584e-83         YES
43     6   96       0.00  0.191056  1.387887e-49         YES
42    12   96       0.00  0.180184  5.382281e-56         YES
41    24   96       0.00  0.168985  3.883257e-63         YES
40    48   96       0.00  0.173515  6.577654e-80         YES
39     6  192       0.00  0.181681  1.285631e-42         YES
38    12  192       0.00  0.178444  1.242194e-52         YES
37    24  192       0.00  0.166647  1.816972e-59         YES
36    48  192       0.00  0.173979  1.591326e-78         YES
35     6   48       0.50  0.202529  1.110140e-25         YES
34    12   48       0.50  0.187510  3.495510e-24         YES
33    24   48       0.50  0.176113  4.860107e-23         YES
32    48   48       0.50  0.171137  1.168207e-23         YES
31     6   96       0.50  0.207889  8.205037e-31         YES
30    12   96       0.50  0.204213  3.989274e-33         YES
29    24   96       0.50  0.198537  1.515126e-33         YES
28    48   96       0.50  0.191994  6.952020e-33         YES
27     6  192       0.50  0.217996  8.468890e-35         YES
26    12  192       0.50  0.205102  3.154260e-34         YES
25    24  192       0.50  0.211821  2.696460e-39         YES
24    48  192       0.50  0.208077  5.244080e-40         YES
23     6   48       0.90  0.220275  8.571308e-19         YES
22    12   48       0.90  0.197519  7.676827e-17         YES
21    24   48       0.90  0.189353  1.670935e-17         YES
20    48   48       0.90  0.194950  1.147902e-21         YES
19     6   96       0.90  0.199439  9.824298e-21         YES
18    12   96       0.90  0.199976  1.614758e-22         YES
17    24   96       0.90  0.193024  7.879763e-23         YES
16    48   96       0.90  0.202954  2.692422e-27         YES
15     6  192       0.90  0.217246  9.071029e-27         YES
14    12  192       0.90  0.211766  1.602499e-27         YES
13    24  192       0.90  0.194448  7.540039e-25         YES
12    48  192       0.90  0.218659  1.088288e-32         YES
11     6   48       0.95  0.217233  1.803758e-16         YES
10    12   48       0.95  0.214106  7.197833e-18         YES
9     24   48       0.95  0.193131  1.782377e-16         YES
8     48   48       0.95  0.197345  1.318024e-19         YES
7      6   96       0.95  0.212822  5.938335e-21         YES
6     12   96       0.95  0.197933  9.576489e-20         YES
5     24   96       0.95  0.206136  1.237032e-22         YES
4     48   96       0.95  0.216133  2.177330e-26         YES
3      6  192       0.95  0.230515  5.066272e-27         YES
2     12  192       0.95  0.205395  9.645885e-23         YES
1     24  192       0.95  0.192684  5.358054e-21         YES
0     48  192       0.95  0.221570  9.043592e-28         YES

In [2]:
df_un, df_re = load_data(2016, 'mean_dur')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 13.5, 16.6, "Swath Duration (hrs)", df_re)

ax.set_xlim(0, 19)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 1.65915461796 CRSR: 24 SSR: 48 MCS_Proba: 0.95
Min Diff (hrs): 0.932125219162 CRSR: 6 SSR: 48 MCS_Proba: 0.0
    CRSR  SSR  MCS_proba   ks_stat         p_val Significant
47     6   48       0.00  0.181804  1.334652e-51         YES
46    12   48       0.00  0.183245  3.141849e-64         YES
45    24   48       0.00  0.174367  1.153082e-73         YES
44    48   48       0.00  0.177928  9.439442e-93         YES
43     6   96       0.00  0.189969  3.799047e-51         YES
42    12   96       0.00  0.184938  3.173125e-60         YES
41    24   96       0.00  0.175102  1.312222e-69         YES
40    48   96       0.00  0.182671  4.305834e-92         YES
39     6  192       0.00  0.194029  1.553600e-50         YES
38    12  192       0.00  0.184702  7.445389e-58         YES
37    24  192       0.00  0.175102  4.429525e-68         YES
36    48  192       0.00  0.186287  5.012759e-94         YES
35     6   48       0.50  0.198692  4.666628e-25         YES
34    12   48       0.50  0.191837  1.410733e-25         YES
33    24   48       0.50  0.174464  3.302213e-23         YES
32    48   48       0.50  0.191457  3.549876e-30         YES
31     6   96       0.50  0.214450  9.629288e-34         YES
30    12   96       0.50  0.199414  1.706956e-31         YES
29    24   96       0.50  0.187039  3.251712e-30         YES
28    48   96       0.50  0.186602  1.726313e-32         YES
27     6  192       0.50  0.223628  1.560506e-37         YES
26    12  192       0.50  0.200535  6.555126e-33         YES
25    24  192       0.50  0.194015  1.054737e-33         YES
24    48  192       0.50  0.196973  4.238785e-37         YES
23     6   48       0.90  0.241176  7.290478e-23         YES
22    12   48       0.90  0.209727  2.007106e-19         YES
21    24   48       0.90  0.205286  5.459452e-21         YES
20    48   48       0.90  0.203796  6.148597e-24         YES
19     6   96       0.90  0.216565  1.031797e-25         YES
18    12   96       0.90  0.185336  7.399805e-20         YES
17    24   96       0.90  0.183018  5.384429e-21         YES
16    48   96       0.90  0.184305  4.352244e-24         YES
15     6  192       0.90  0.217791  3.633058e-28         YES
14    12  192       0.90  0.197481  7.730099e-25         YES
13    24  192       0.90  0.209177  5.860458e-30         YES
12    48  192       0.90  0.181187  1.247842e-24         YES
11     6   48       0.95  0.245344  8.193690e-21         YES
10    12   48       0.95  0.216470  1.545333e-18         YES
9     24   48       0.95  0.212735  5.877123e-20         YES
8     48   48       0.95  0.190121  4.181361e-18         YES
7      6   96       0.95  0.219958  1.695144e-23         YES
6     12   96       0.95  0.198056  4.739712e-20         YES
5     24   96       0.95  0.181754  4.356215e-18         YES
4     48   96       0.95  0.176311  3.391494e-18         YES
3      6  192       0.95  0.223111  1.145892e-25         YES
2     12  192       0.95  0.187171  1.662168e-19         YES
1     24  192       0.95  0.182604  1.471105e-19         YES
0     48  192       0.95  0.190483  8.347632e-22         YES

In [3]:
df_un, df_re = load_data(2015, 'std_refl')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 12.1, 13.75, "Intensity Error (dBZ)", df_re)

ax.set_xlim(5, 15)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 0.0636792535986 CRSR: 6 SSR: 192 MCS_Proba: 0.0
Min Diff (hrs): -0.037824220682 CRSR: 48 SSR: 192 MCS_Proba: 0.9
    CRSR  SSR  MCS_proba   ks_stat     p_val Significant
47     6   48       0.00  0.016246  0.933750          --
46    12   48       0.00  0.015691  0.889930          --
45    24   48       0.00  0.020127  0.462299          --
44    48   48       0.00  0.015089  0.687856          --
43     6   96       0.00  0.025909  0.508443          --
42    12   96       0.00  0.025499  0.383010          --
41    24   96       0.00  0.021205  0.437675          --
40    48   96       0.00  0.019202  0.421551          --
39     6  192       0.00  0.048182  0.024215          --
38    12  192       0.00  0.041056  0.033313          --
37    24  192       0.00  0.027224  0.181040          --
36    48  192       0.00  0.023814  0.194852          --
35     6   48       0.50  0.021782  0.972686          --
34    12   48       0.50  0.012882  0.999990          --
33    24   48       0.50  0.023225  0.898414          --
32    48   48       0.50  0.018880  0.968952          --
31     6   96       0.50  0.024750  0.871834          --
30    12   96       0.50  0.025494  0.806324          --
29    24   96       0.50  0.021125  0.913439          --
28    48   96       0.50  0.030920  0.464598          --
27     6  192       0.50  0.028906  0.712701          --
26    12  192       0.50  0.025392  0.790061          --
25    24  192       0.50  0.020791  0.910686          --
24    48  192       0.50  0.023525  0.773405          --
23     6   48       0.90  0.040516  0.675376          --
22    12   48       0.90  0.041524  0.566655          --
21    24   48       0.90  0.026287  0.937814          --
20    48   48       0.90  0.023901  0.941864          --
19     6   96       0.90  0.027273  0.906085          --
18    12   96       0.90  0.019472  0.993891          --
17    24   96       0.90  0.017359  0.997286          --
16    48   96       0.90  0.015686  0.998698          --
15     6  192       0.90  0.028056  0.849039          --
14    12  192       0.90  0.023467  0.938146          --
13    24  192       0.90  0.020322  0.974726          --
12    48  192       0.90  0.027918  0.735746          --
11     6   48       0.95  0.038424  0.797588          --
10    12   48       0.95  0.033394  0.867509          --
9     24   48       0.95  0.031630  0.850867          --
8     48   48       0.95  0.030830  0.810525          --
7      6   96       0.95  0.029199  0.892116          --
6     12   96       0.95  0.019844  0.996182          --
5     24   96       0.95  0.024331  0.944344          --
4     48   96       0.95  0.022741  0.959842          --
3      6  192       0.95  0.040789  0.479694          --
2     12  192       0.95  0.029885  0.815630          --
1     24  192       0.95  0.027537  0.855919          --
0     48  192       0.95  0.035559  0.563277          --

In [4]:
df_un, df_re = load_data(2016, 'std_refl')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 12.1, 13.75, "Intensity Error (dBZ)", df_re)

ax.set_xlim(5, 15)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 0.0473031947824 CRSR: 6 SSR: 192 MCS_Proba: 0.0
Min Diff (hrs): -0.0352306507284 CRSR: 48 SSR: 192 MCS_Proba: 0.95
    CRSR  SSR  MCS_proba   ks_stat     p_val Significant
47     6   48       0.00  0.024633  0.508427          --
46    12   48       0.00  0.022939  0.451361          --
45    24   48       0.00  0.016453  0.696379          --
44    48   48       0.00  0.013417  0.793448          --
43     6   96       0.00  0.035045  0.168885          --
42    12   96       0.00  0.032237  0.133171          --
41    24   96       0.00  0.016387  0.739784          --
40    48   96       0.00  0.013800  0.797808          --
39     6  192       0.00  0.041231  0.076438          --
38    12  192       0.00  0.033746  0.115835          --
37    24  192       0.00  0.020672  0.462569          --
36    48  192       0.00  0.017042  0.559153          --
35     6   48       0.50  0.027927  0.821178          --
34    12   48       0.50  0.023050  0.928292          --
33    24   48       0.50  0.028942  0.676658          --
32    48   48       0.50  0.024297  0.816211          --
31     6   96       0.50  0.017000  0.995715          --
30    12   96       0.50  0.019885  0.958074          --
29    24   96       0.50  0.013802  0.999139          --
28    48   96       0.50  0.015231  0.993001          --
27     6  192       0.50  0.014381  0.999630          --
26    12  192       0.50  0.023214  0.864952          --
25    24  192       0.50  0.015233  0.994782          --
24    48  192       0.50  0.016287  0.981606          --
23     6   48       0.90  0.048874  0.438874          --
22    12   48       0.90  0.034208  0.789003          --
21    24   48       0.90  0.026894  0.923826          --
20    48   48       0.90  0.024707  0.930207          --
19     6   96       0.90  0.024058  0.957749          --
18    12   96       0.90  0.022236  0.969483          --
17    24   96       0.90  0.023996  0.910470          --
16    48   96       0.90  0.019346  0.977855          --
15     6  192       0.90  0.022426  0.967897          --
14    12  192       0.90  0.028055  0.802268          --
13    24  192       0.90  0.013487  0.999933          --
12    48  192       0.90  0.017714  0.989002          --
11     6   48       0.95  0.055096  0.348033          --
10    12   48       0.95  0.043101  0.580192          --
9     24   48       0.95  0.037121  0.681412          --
8     48   48       0.95  0.032036  0.785670          --
7      6   96       0.95  0.029965  0.855073          --
6     12   96       0.95  0.020523  0.992484          --
5     24   96       0.95  0.023140  0.960500          --
4     48   96       0.95  0.018907  0.994832          --
3      6  192       0.95  0.032480  0.753193          --
2     12  192       0.95  0.031642  0.730939          --
1     24  192       0.95  0.025112  0.910248          --
0     48  192       0.95  0.026409  0.874043          --

In [5]:
df_un, df_re = load_data(2015, 'lin_err')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 95, 115, "Linearity Error (km)", df_re)

ax.set_xlim(0, 131)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 6.84185265466 CRSR: 48 SSR: 192 MCS_Proba: 0.95
Min Diff (hrs): 2.99178234102 CRSR: 48 SSR: 48 MCS_Proba: 0.0
    CRSR  SSR  MCS_proba   ks_stat     p_val Significant
47     6   48       0.00  0.068710  0.000062         YES
46    12   48       0.00  0.070969  0.000002         YES
45    24   48       0.00  0.063366  0.000001         YES
44    48   48       0.00  0.051501  0.000014         YES
43     6   96       0.00  0.070098  0.000101         YES
42    12   96       0.00  0.070087  0.000008         YES
41    24   96       0.00  0.060557  0.000009         YES
40    48   96       0.00  0.054699  0.000007         YES
39     6  192       0.00  0.060492  0.001903          --
38    12  192       0.00  0.070827  0.000010         YES
37    24  192       0.00  0.060680  0.000013         YES
36    48  192       0.00  0.054976  0.000008         YES
35     6   48       0.50  0.094060  0.000309         YES
34    12   48       0.50  0.081044  0.001556          --
33    24   48       0.50  0.073494  0.002817          --
32    48   48       0.50  0.070097  0.002543          --
31     6   96       0.50  0.084377  0.000545         YES
30    12   96       0.50  0.082663  0.000357         YES
29    24   96       0.50  0.081924  0.000165         YES
28    48   96       0.50  0.066034  0.002726          --
27     6  192       0.50  0.085331  0.000399         YES
26    12  192       0.50  0.083479  0.000209         YES
25    24  192       0.50  0.089768  0.000016         YES
24    48  192       0.50  0.078463  0.000117         YES
23     6   48       0.90  0.087456  0.015661          --
22    12   48       0.90  0.064552  0.100768          --
21    24   48       0.90  0.071163  0.030529          --
20    48   48       0.90  0.086126  0.001375          --
19     6   96       0.90  0.068827  0.033899          --
18    12   96       0.90  0.083989  0.002522          --
17    24   96       0.90  0.085053  0.000956         YES
16    48   96       0.90  0.094280  0.000058         YES
15     6  192       0.90  0.078817  0.005494          --
14    12  192       0.90  0.098768  0.000083         YES
13    24  192       0.90  0.100697  0.000023         YES
12    48  192       0.90  0.097507  0.000021         YES
11     6   48       0.95  0.083534  0.038529          --
10    12   48       0.95  0.060510  0.191568          --
9     24   48       0.95  0.090095  0.004779          --
8     48   48       0.95  0.076406  0.013501          --
7      6   96       0.95  0.063326  0.086404          --
6     12   96       0.95  0.079182  0.009763          --
5     24   96       0.95  0.095157  0.000414         YES
4     48   96       0.95  0.112296  0.000007         YES
3      6  192       0.95  0.093732  0.001148          --
2     12  192       0.95  0.100959  0.000204         YES
1     24  192       0.95  0.097475  0.000200         YES
0     48  192       0.95  0.119610  0.000002         YES

In [6]:
df_un, df_re = load_data(2016, 'lin_err')

fig, ax = plt.subplots(1)

ax = draw_boxes(ax, df_re['Distribution'].values, df_un['mean'].values)

ax = draw_labels(ax, 95, 115, "Linearity Error (km)", df_re)

ax.set_xlim(0, 131)
ax.set_ylim(.5, 48.5)

max_diff = np.max(df_re['mean'].values - df_un['mean'].values)
min_diff = np.min(df_re['mean'].values - df_un['mean'].values)
max_loc = np.argmax(df_re['mean'].values - df_un['mean'].values)
min_loc = np.argmin(df_re['mean'].values - df_un['mean'].values)

print('Max Diff (hrs):', max_diff,
      'CRSR:', df_re['CRSR'].values[max_loc], 
      'SSR:', df_re['SSR'].values[max_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[max_loc])

print('Min Diff (hrs):', min_diff,
      'CRSR:', df_re['CRSR'].values[min_loc], 
      'SSR:', df_re['SSR'].values[min_loc],
      'MCS_Proba:', df_re['MCS_proba'].values[min_loc])

sig_test = pd.DataFrame.from_dict(sig_diffs(df_un, df_re))
sig_test = sig_test.sort_values(by=['MCS_proba', 'SSR', 'CRSR'], ascending=True)

print(sig_test[['CRSR', 'SSR', 'MCS_proba', 'ks_stat', 'p_val', 'Significant']])


Max Diff (hrs): 6.17534253831 CRSR: 48 SSR: 48 MCS_Proba: 0.9
Min Diff (hrs): 2.44075534771 CRSR: 48 SSR: 96 MCS_Proba: 0.0
    CRSR  SSR  MCS_proba   ks_stat         p_val Significant
47     6   48       0.00  0.065254  1.513812e-04         YES
46    12   48       0.00  0.073534  5.138043e-07         YES
45    24   48       0.00  0.064649  3.641846e-07         YES
44    48   48       0.00  0.051597  7.747679e-06         YES
43     6   96       0.00  0.066308  2.877902e-04         YES
42    12   96       0.00  0.066633  1.881806e-05         YES
41    24   96       0.00  0.066016  5.377164e-07         YES
40    48   96       0.00  0.047282  1.102714e-04         YES
39     6  192       0.00  0.072504  8.261662e-05         YES
38    12  192       0.00  0.071454  5.682505e-06         YES
37    24  192       0.00  0.072207  4.087830e-08         YES
36    48  192       0.00  0.050973  2.762061e-05         YES
35     6   48       0.50  0.080880  2.528464e-03          --
34    12   48       0.50  0.062415  2.590232e-02          --
33    24   48       0.50  0.061036  1.971735e-02          --
32    48   48       0.50  0.056977  2.401826e-02          --
31     6   96       0.50  0.066538  1.093922e-02          --
30    12   96       0.50  0.063499  1.020271e-02          --
29    24   96       0.50  0.061449  8.565864e-03          --
28    48   96       0.50  0.052407  2.604133e-02          --
27     6  192       0.50  0.063531  1.513856e-02          --
26    12  192       0.50  0.063915  8.604088e-03          --
25    24  192       0.50  0.079489  1.445055e-04         YES
24    48  192       0.50  0.065846  1.646143e-03          --
23     6   48       0.90  0.092837  8.740132e-03          --
22    12   48       0.90  0.084835  1.072715e-02          --
21    24   48       0.90  0.082220  7.148136e-03          --
20    48   48       0.90  0.095552  3.010266e-04         YES
19     6   96       0.90  0.079336  7.109604e-03          --
18    12   96       0.90  0.070472  1.580774e-02          --
17    24   96       0.90  0.073043  5.764294e-03          --
16    48   96       0.90  0.073628  2.920896e-03          --
15     6  192       0.90  0.084317  2.039493e-03          --
14    12  192       0.90  0.082221  1.634259e-03          --
13    24  192       0.90  0.092197  1.001484e-04         YES
12    48  192       0.90  0.085396  2.040118e-04         YES
11     6   48       0.95  0.103271  4.377130e-03          --
10    12   48       0.95  0.103795  1.784202e-03          --
9     24   48       0.95  0.096972  1.765644e-03          --
8     48   48       0.95  0.105012  2.033175e-04         YES
7      6   96       0.95  0.084179  5.978280e-03          --
6     12   96       0.95  0.075691  1.287928e-02          --
5     24   96       0.95  0.079143  5.098789e-03          --
4     48   96       0.95  0.091085  5.976709e-04         YES
3      6  192       0.95  0.090884  1.611574e-03          --
2     12  192       0.95  0.080484  4.367133e-03          --
1     24  192       0.95  0.091468  4.583721e-04         YES
0     48  192       0.95  0.085083  1.367094e-03          --

In [7]:
def get_normalized(df1, df2, df3):
    
    data1 = df1['mean'].values
    data2 = df2['mean'].values
    data3 = df3['mean'].values
    
    return ((data1/np.max(data1))+(data2/np.max(data2))-(data3/np.max(data3)))

_, df_dur = load_data(2015, 'mean_dur')
_, df_std = load_data(2015, 'std_refl')
_, df_lin = load_data(2015, 'lin_err')

vals = get_normalized(df_std, df_lin, df_dur)

fig, ax = plt.subplots(1)

ax.plot(vals - np.mean(vals), list(range(1,49)), 'k.')

ax = draw_labels(ax, .11, .17, "Normalized Error (Difference From Mean)", df_dur)

plt.plot([0, 0], [0, 50], 'k--',linewidth=2)
ax.set_xlim(-.2, .22)
ax.set_ylim(.5, 48.5)


Out[7]:
(0.5, 48.5)

In [8]:
_, df_dur = load_data(2016, 'mean_dur')
_, df_std = load_data(2016, 'std_refl')
_, df_lin = load_data(2016, 'lin_err')

vals = get_normalized(df_std, df_lin, df_dur)

fig, ax = plt.subplots(1)

ax.plot(vals - np.mean(vals), list(range(1,49)), 'k.')

ax = draw_labels(ax, .11, .17, "Normalized Error (Difference From Mean)", df_dur)

plt.plot([0, 0], [0, 50], 'k--',linewidth=2)
ax.set_xlim(-.2, .22)
ax.set_ylim(.5, 48.5)


Out[8]:
(0.5, 48.5)