In [1]:
import os
import sys
# check the scripts directory is present
if not os.path.exists("../scripts/"):
    print "The scripts directory is missing"
    sys.exit()
# since the scripts directory is there, try importing the modules
sys.path.append('../scripts')
import dateutil
import qcio
import qcutils
import qcts
import qcplot
import statsmodels.api as sm

In [2]:
def gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,si_match,ei_match):
    startdate = access_info["startdate"]
    enddate = access_info["enddate"]
    ts = access_info["time_step"]
    # get the indices of the start and end datetimes in the tower and the ACCESS data.
    si_tower = qcutils.GetDateIndex(ldt_tower,startdate,ts=ts,match=si_match)
    ei_tower = qcutils.GetDateIndex(ldt_tower,enddate,ts=ts,match=ei_match)
    si_access = qcutils.GetDateIndex(ldt_access,startdate,ts=ts,match=si_match)
    ei_access = qcutils.GetDateIndex(ldt_access,enddate,ts=ts,match=ei_match)
    # now make sure the start and end datetime match
    sdt = max([ldt_tower[si_tower],ldt_access[si_access]])
    edt = min([ldt_tower[ei_tower],ldt_access[ei_access]])
    # and get the start and end indices again in case they didn't
    si_tower = qcutils.GetDateIndex(ldt_tower,str(sdt),ts=ts,match=si_match)
    ei_tower = qcutils.GetDateIndex(ldt_tower,str(edt),ts=ts,match=ei_match)
    si_access = qcutils.GetDateIndex(ldt_access,str(sdt),ts=ts,match=si_match)
    ei_access = qcutils.GetDateIndex(ldt_access,str(edt),ts=ts,match=ei_match)
    return {"si":si_tower,"ei":ei_tower},{"si":si_access,"ei":ei_access}

In [3]:
def gfACCESS_getACCESSvarlist(ds_access,label):
    access_var_list = [item for item in ds_access.series.keys() if label in item]
    # remove any extraneous Fn labels (ACCESS has Fn_lw and Fn_sw)
    if label=="Fn":
        access_var_list = [item for item in access_var_list if "lw" not in item]
        access_var_list = [item for item in access_var_list if "sw" not in item]
    # check the series in the ACCESS data
    if len(access_var_list)==0:
        print "gfACCESS_getACCESSvarlist: series "+label+" not in ACCESS data file"
    return access_var_list

In [14]:
def gfACCESS_getACCESSvaratmaxr(access_var_list,data_tower,ds_access,access_info):
    """ Get the name of the ACCESS variable that has the highest correlation with the tower data."""
    # local pointers to the start and end indices
    si = access_info["access"]["exact"]["si"]
    ei = access_info["access"]["exact"]["ei"]
    # creat an array for the correlations
    r = numpy.ones(len(access_var_list))*float(-9999)
    # check that the number of tower data points is greater than the minimum
    if numpy.ma.count(data_tower)>access_info["min_points"]:
        # loop over the variables in the ACCESS file
        for idx,var in enumerate(access_var_list):
            # get the ACCESS data
            data_access,flag,attr = qcutils.GetSeriesasMA(ds_access,var,si=si,ei=ei)
            # check the lengths of the tower and ACCESS data are the same
            if len(data_access)!=len(data_tower):
                raise ValueError('gfACCESS_getACCESSvaratmaxr: data_tower and data_access lengths differ')
            # put the correlation into the r array
            r[idx] = numpy.ma.corrcoef(data_tower,data_access)[0,1]
    # get the index of the maximum r value
    maxidx = numpy.ma.argmax(r)
    # save the correlation array for later plotting
    access_info["r"] = r
    # return the name of the ACCESS variable that has the highest correlation with the tower data
    return access_var_list[maxidx]

In [16]:
def gfACCESS_getlagcorrecteddata(ds_tower,ds_access,label_tower,label_access,access_info):
    # local pointers to the start and end indices
    si_tower = access_info["tower"]["exact"]["si"]
    ei_tower = access_info["tower"]["exact"]["ei"]
    si_access = access_info["access"]["exact"]["si"]
    ei_access = access_info["access"]["exact"]["ei"]
    # get the data
    data_tower,f,a = qcutils.GetSeriesasMA(ds_tower,label_tower,si=si_tower,ei=ei_tower)
    data_access,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=si_access,ei=ei_access)
    lags,corr = qcts.get_laggedcorrelation(data_tower,data_access,maxlags=access_info["nperday"])
    nLags = numpy.argmax(corr)-maxlags
    si_access_lagcorr = si_access - nLags
    ei_access_lagcorr = ei_access - nLags
    data_access_lagcorr,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=si_access_lagcorr,ei=ei_access_lagcorr,mode="pad")
    return data_access_lagcorr,f,a

In [25]:
def gfACCESS_getolscorrecteddata(x_in,y_in,results,thru0=False):
    """
    Calculate the ordinary least squares fit between 2 1D arrays.
    """
    if numpy.ma.isMA(x_in)!=numpy.ma.isMA(y_in):
        raise ValueError('qcts.getolscorrecteddata: one of x or y is a masked array, the other is not')
    if numpy.ma.isMA(x_in) and numpy.ma.isMA(y_in):
        mask = numpy.ma.mask_or(x_in.mask,y_in.mask)
        x = numpy.ma.array(x_in,mask=mask)
        y = numpy.ma.array(y_in,mask=mask)
        if numpy.ma.count(x)==0:
            raise ValueError('qcts.getolscorrecteddata: x or y all masked')
        x = numpy.ma.compressed(x)
        y = numpy.ma.compressed(y)
    nx = len(x)
    if nx!=len(y):
        raise ValueError('qcts.getolscorrecteddata: x and y must be equal length')
    if thru0:
        resols = sm.OLS(x,y).fit()
        y_out = resols.params[0]*x_in
    else:
        resols = sm.OLS(x,sm.add_constant(y,prepend=False)).fit()
        y_out = resols.params[0]*x_in+resols.params[1]
    results["ols"] = resols
    return y_out

In [26]:
def gfACCESS_getwholedaysdataas2D(ldt,odt,data,si,ei,nperday):
    si_wholedays = odt.index(ldt[si])
    ei_wholedays = odt.index(ldt[ei])
    data_wholedays = data[si_wholedays:ei_wholedays+1]
    ndays = len(data_wholedays)/nperday
    data_2d = numpy.ma.reshape(data_wholedays,[ndays,nperday])
    return data_2d

In [8]:
ds_tower = qcio.nc_read_series(qcio.get_filename_dialog())
ds_access = qcio.nc_read_series(qcio.get_filename_dialog())
ts_tower = int(ds_tower.globalattributes["time_step"])
ts_access = int(ds_access.globalattributes["time_step"])
if ts_tower!=ts_access: raise ValueError('gfACCESS: time stpes differ for tower and ACCESS data')
ldt_tower = ds_tower.series["DateTime"]["Data"]
ldt_access = ds_access.series["DateTime"]["Data"]
overlap_startdate = max([ldt_tower[0],ldt_access[0]])
overlap_enddate = min([ldt_tower[-1],ldt_access[-1]])

In [12]:
access_info = {"overlap_startdate":overlap_startdate.strftime("%Y-%m-%d %H:%M"),
                    "overlap_enddate":overlap_enddate.strftime("%Y-%m-%d %H:%M")}
access_info["peropt"] = 1
access_info["overwrite"] = True
access_info["min_points"] = 100
access_info["startdate"] = "2012-12-01 01:00"
access_info["enddate"] = "2012-12-31 23:00"
access_info["time_step"] = int(ds_tower.globalattributes["time_step"])
access_info["nperhr"] = int(float(60)/access_info["time_step"]+0.5)
access_info["nperday"] = int(float(24)*access_info["nperhr"]+0.5)
access_info["tower"] = {}
access_info["access"] = {}

In [28]:
label_tower = "Fsd"
gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,"exact","exact")
data_tower,f,a = qcutils.GetSeriesasMA(ds_tower,"Fsd",si=access_info["tower"]["exact"]["si"],ei=access_info["tower"]["exact"]["ei"])
odt_tower = ldt_tower[access_info["tower"]["exact"]["si"]:access_info["tower"]["exact"]["ei"]+1]
odt_access = ldt_access[access_info["access"]["exact"]["si"]:access_info["access"]["exact"]["ei"]+1]
access_var_list = gfACCESS_getACCESSvarlist(ds_access,"Fsd")
label_access = gfACCESS_getACCESSvaratmaxr(access_var_list,data_tower,ds_access,access_info)
data_access,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=access_info["access"]["exact"]["si"],ei=access_info["access"]["exact"]["ei"])
maxlags = 24
data_access_lagcorr,f,a = gfACCESS_getlagcorrecteddata(ds_tower,ds_access,label_tower,label_access,access_info)
data_access_lagolscorr = gfACCESS_getolscorrecteddata(data_access_lagcorr,data_tower,access_info,thru0=True)
access_info["tower"]["whole_days"],access_info["access"]["whole_days"] = gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,"startnextday","endpreviousday")
si_tower = access_info["tower"]["whole_days"]["si"]
ei_tower = access_info["tower"]["whole_days"]["ei"]
si_access = access_info["access"]["whole_days"]["si"]
ei_access = access_info["access"]["whole_days"]["ei"]
nperday = access_info["nperday"]
data_tower_2D = gfACCESS_getwholedaysdataas2D(ldt_tower,odt_tower,data_tower,si_tower,ei_tower,nperday)
data_access_2D = gfACCESS_getwholedaysdataas2D(ldt_access,odt_access,data_access,si_access,ei_access,nperday)
data_access_lagolscorr_2D = gfACCESS_getwholedaysdataas2D(ldt_access,odt_access,data_access_lagolscorr,si_access,ei_access,nperday)


2

In [19]:
fig=figure()
plot(odt_tower,data_tower,'ro')
plot(odt_access,data_access,'b-')
plot(odt_access,data_access_lagolscorr,'g-')


Out[19]:
[<matplotlib.lines.Line2D at 0x3d2f710>]

In [ ]: