In [1]:
import os
import sys
# check the scripts directory is present
if not os.path.exists("../scripts/"):
print "The scripts directory is missing"
sys.exit()
# since the scripts directory is there, try importing the modules
sys.path.append('../scripts')
import dateutil
import qcio
import qcutils
import qcts
import qcplot
import statsmodels.api as sm
In [2]:
def gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,si_match,ei_match):
startdate = access_info["startdate"]
enddate = access_info["enddate"]
ts = access_info["time_step"]
# get the indices of the start and end datetimes in the tower and the ACCESS data.
si_tower = qcutils.GetDateIndex(ldt_tower,startdate,ts=ts,match=si_match)
ei_tower = qcutils.GetDateIndex(ldt_tower,enddate,ts=ts,match=ei_match)
si_access = qcutils.GetDateIndex(ldt_access,startdate,ts=ts,match=si_match)
ei_access = qcutils.GetDateIndex(ldt_access,enddate,ts=ts,match=ei_match)
# now make sure the start and end datetime match
sdt = max([ldt_tower[si_tower],ldt_access[si_access]])
edt = min([ldt_tower[ei_tower],ldt_access[ei_access]])
# and get the start and end indices again in case they didn't
si_tower = qcutils.GetDateIndex(ldt_tower,str(sdt),ts=ts,match=si_match)
ei_tower = qcutils.GetDateIndex(ldt_tower,str(edt),ts=ts,match=ei_match)
si_access = qcutils.GetDateIndex(ldt_access,str(sdt),ts=ts,match=si_match)
ei_access = qcutils.GetDateIndex(ldt_access,str(edt),ts=ts,match=ei_match)
return {"si":si_tower,"ei":ei_tower},{"si":si_access,"ei":ei_access}
In [3]:
def gfACCESS_getACCESSvarlist(ds_access,label):
access_var_list = [item for item in ds_access.series.keys() if label in item]
# remove any extraneous Fn labels (ACCESS has Fn_lw and Fn_sw)
if label=="Fn":
access_var_list = [item for item in access_var_list if "lw" not in item]
access_var_list = [item for item in access_var_list if "sw" not in item]
# check the series in the ACCESS data
if len(access_var_list)==0:
print "gfACCESS_getACCESSvarlist: series "+label+" not in ACCESS data file"
return access_var_list
In [14]:
def gfACCESS_getACCESSvaratmaxr(access_var_list,data_tower,ds_access,access_info):
""" Get the name of the ACCESS variable that has the highest correlation with the tower data."""
# local pointers to the start and end indices
si = access_info["access"]["exact"]["si"]
ei = access_info["access"]["exact"]["ei"]
# creat an array for the correlations
r = numpy.ones(len(access_var_list))*float(-9999)
# check that the number of tower data points is greater than the minimum
if numpy.ma.count(data_tower)>access_info["min_points"]:
# loop over the variables in the ACCESS file
for idx,var in enumerate(access_var_list):
# get the ACCESS data
data_access,flag,attr = qcutils.GetSeriesasMA(ds_access,var,si=si,ei=ei)
# check the lengths of the tower and ACCESS data are the same
if len(data_access)!=len(data_tower):
raise ValueError('gfACCESS_getACCESSvaratmaxr: data_tower and data_access lengths differ')
# put the correlation into the r array
r[idx] = numpy.ma.corrcoef(data_tower,data_access)[0,1]
# get the index of the maximum r value
maxidx = numpy.ma.argmax(r)
# save the correlation array for later plotting
access_info["r"] = r
# return the name of the ACCESS variable that has the highest correlation with the tower data
return access_var_list[maxidx]
In [16]:
def gfACCESS_getlagcorrecteddata(ds_tower,ds_access,label_tower,label_access,access_info):
# local pointers to the start and end indices
si_tower = access_info["tower"]["exact"]["si"]
ei_tower = access_info["tower"]["exact"]["ei"]
si_access = access_info["access"]["exact"]["si"]
ei_access = access_info["access"]["exact"]["ei"]
# get the data
data_tower,f,a = qcutils.GetSeriesasMA(ds_tower,label_tower,si=si_tower,ei=ei_tower)
data_access,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=si_access,ei=ei_access)
lags,corr = qcts.get_laggedcorrelation(data_tower,data_access,maxlags=access_info["nperday"])
nLags = numpy.argmax(corr)-maxlags
si_access_lagcorr = si_access - nLags
ei_access_lagcorr = ei_access - nLags
data_access_lagcorr,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=si_access_lagcorr,ei=ei_access_lagcorr,mode="pad")
return data_access_lagcorr,f,a
In [25]:
def gfACCESS_getolscorrecteddata(x_in,y_in,results,thru0=False):
"""
Calculate the ordinary least squares fit between 2 1D arrays.
"""
if numpy.ma.isMA(x_in)!=numpy.ma.isMA(y_in):
raise ValueError('qcts.getolscorrecteddata: one of x or y is a masked array, the other is not')
if numpy.ma.isMA(x_in) and numpy.ma.isMA(y_in):
mask = numpy.ma.mask_or(x_in.mask,y_in.mask)
x = numpy.ma.array(x_in,mask=mask)
y = numpy.ma.array(y_in,mask=mask)
if numpy.ma.count(x)==0:
raise ValueError('qcts.getolscorrecteddata: x or y all masked')
x = numpy.ma.compressed(x)
y = numpy.ma.compressed(y)
nx = len(x)
if nx!=len(y):
raise ValueError('qcts.getolscorrecteddata: x and y must be equal length')
if thru0:
resols = sm.OLS(x,y).fit()
y_out = resols.params[0]*x_in
else:
resols = sm.OLS(x,sm.add_constant(y,prepend=False)).fit()
y_out = resols.params[0]*x_in+resols.params[1]
results["ols"] = resols
return y_out
In [26]:
def gfACCESS_getwholedaysdataas2D(ldt,odt,data,si,ei,nperday):
si_wholedays = odt.index(ldt[si])
ei_wholedays = odt.index(ldt[ei])
data_wholedays = data[si_wholedays:ei_wholedays+1]
ndays = len(data_wholedays)/nperday
data_2d = numpy.ma.reshape(data_wholedays,[ndays,nperday])
return data_2d
In [8]:
ds_tower = qcio.nc_read_series(qcio.get_filename_dialog())
ds_access = qcio.nc_read_series(qcio.get_filename_dialog())
ts_tower = int(ds_tower.globalattributes["time_step"])
ts_access = int(ds_access.globalattributes["time_step"])
if ts_tower!=ts_access: raise ValueError('gfACCESS: time stpes differ for tower and ACCESS data')
ldt_tower = ds_tower.series["DateTime"]["Data"]
ldt_access = ds_access.series["DateTime"]["Data"]
overlap_startdate = max([ldt_tower[0],ldt_access[0]])
overlap_enddate = min([ldt_tower[-1],ldt_access[-1]])
In [12]:
access_info = {"overlap_startdate":overlap_startdate.strftime("%Y-%m-%d %H:%M"),
"overlap_enddate":overlap_enddate.strftime("%Y-%m-%d %H:%M")}
access_info["peropt"] = 1
access_info["overwrite"] = True
access_info["min_points"] = 100
access_info["startdate"] = "2012-12-01 01:00"
access_info["enddate"] = "2012-12-31 23:00"
access_info["time_step"] = int(ds_tower.globalattributes["time_step"])
access_info["nperhr"] = int(float(60)/access_info["time_step"]+0.5)
access_info["nperday"] = int(float(24)*access_info["nperhr"]+0.5)
access_info["tower"] = {}
access_info["access"] = {}
In [28]:
label_tower = "Fsd"
gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,"exact","exact")
data_tower,f,a = qcutils.GetSeriesasMA(ds_tower,"Fsd",si=access_info["tower"]["exact"]["si"],ei=access_info["tower"]["exact"]["ei"])
odt_tower = ldt_tower[access_info["tower"]["exact"]["si"]:access_info["tower"]["exact"]["ei"]+1]
odt_access = ldt_access[access_info["access"]["exact"]["si"]:access_info["access"]["exact"]["ei"]+1]
access_var_list = gfACCESS_getACCESSvarlist(ds_access,"Fsd")
label_access = gfACCESS_getACCESSvaratmaxr(access_var_list,data_tower,ds_access,access_info)
data_access,f,a = qcutils.GetSeriesasMA(ds_access,label_access,si=access_info["access"]["exact"]["si"],ei=access_info["access"]["exact"]["ei"])
maxlags = 24
data_access_lagcorr,f,a = gfACCESS_getlagcorrecteddata(ds_tower,ds_access,label_tower,label_access,access_info)
data_access_lagolscorr = gfACCESS_getolscorrecteddata(data_access_lagcorr,data_tower,access_info,thru0=True)
access_info["tower"]["whole_days"],access_info["access"]["whole_days"] = gfACCESS_getdateindices(ldt_tower,ldt_access,access_info,"startnextday","endpreviousday")
si_tower = access_info["tower"]["whole_days"]["si"]
ei_tower = access_info["tower"]["whole_days"]["ei"]
si_access = access_info["access"]["whole_days"]["si"]
ei_access = access_info["access"]["whole_days"]["ei"]
nperday = access_info["nperday"]
data_tower_2D = gfACCESS_getwholedaysdataas2D(ldt_tower,odt_tower,data_tower,si_tower,ei_tower,nperday)
data_access_2D = gfACCESS_getwholedaysdataas2D(ldt_access,odt_access,data_access,si_access,ei_access,nperday)
data_access_lagolscorr_2D = gfACCESS_getwholedaysdataas2D(ldt_access,odt_access,data_access_lagolscorr,si_access,ei_access,nperday)
In [19]:
fig=figure()
plot(odt_tower,data_tower,'ro')
plot(odt_access,data_access,'b-')
plot(odt_access,data_access_lagolscorr,'g-')
Out[19]:
In [ ]: