Analysis of RMSZ values


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
pd.__version__ # need 0.14.0 for multiindex slicing


Populating the interactive namespace from numpy and matplotlib
Out[1]:
'0.14.1'

Read files


In [2]:
o_raw = pd.read_table("rmsz_orig_4var.txt")
r_raw = pd.read_table("rmsz_recon_4var.txt")

# fix ensemble number and add indices
o_raw["ENSEMBLE"] = o_raw["ENSEMBLE"].apply(lambda x: int(x.split(".")[1]))
r_raw["ENSEMBLE"] = r_raw["ENSEMBLE"].apply(lambda x: int(x.split(".")[0]))

In [3]:
o = o_raw.set_index(["VARIABLE","ENSEMBLE"]).sort_index(0)
r = r_raw.set_index(["VARIABLE","ENSEMBLE"]).sort_index(0)

RMSZ-RMSZ plots


In [17]:
def plot_var(var):
    o_var = o.loc[(var,slice(None)),"RMSZ"]
    r_var = r.loc[(var,slice(None)),"RMSZ"]
    
    param = np.polyfit(o_var, r_var, 1)
    linear_fn = np.poly1d(param)
    
    plt.plot(o_var, linear_fn(o_var), "-r")
    plt.scatter(o_var, r_var)
    plt.grid(True)
    plt.axis("equal")
    plt.xlabel("RMSZ original")
    plt.ylabel("RMSZ reconstructed")
    plt.title("RMSZ for variable {} (slope={:.3}, intercept={:.3})".format(var,param[0],param[1]))

In [18]:
plot_var("U")



In [6]:
plot_var("FSDSC")



In [7]:
plot_var("Z3")



In [8]:
plot_var("CCN3")



In [24]:
def conf_int(var):
    o_var = o.loc[(var,slice(None)),"RMSZ"]
    r_var = r.loc[(var,slice(None)),"RMSZ"]
    
    param = np.polyfit(o_var, r_var, 1, cov=True)
    lower = param[0]-2*np.sqrt(np.diagonal(param[1]))
    upper = param[0]+2*np.sqrt(np.diagonal(param[1]))
    return ((lower[0],upper[0]),(lower[1],upper[1]))
print(conf_int("U"),conf_int("FSDSC"),conf_int("Z3"),conf_int("CCN3"),sep="\n")


((0.99380658069931349, 1.0001326044569279), (0.0003468844778542631, 0.0068002392893087692))
((0.86691783469425632, 0.91901624851025709), (0.082871008321665551, 0.13604556496657366))
((0.74884045531968568, 0.83715504502706883), (0.16878957236967526, 0.2588805776429467))
((0.95982233504953751, 0.99293501818793162), (0.008132626976276508, 0.041971935512091108))

In [31]:
np.abs(o["RMSZ"]-r["RMSZ"]).unstack("VARIABLE").max(axis=0)


Out[31]:
VARIABLE
CCN3        0.035275
FSDSC       0.033504
U           0.005838
Z3          0.118035
dtype: float64