In [1]:
%run basics
%matplotlib


Using matplotlib backend: Qt4Agg

In [11]:
# get the control file contents
cf = qcio.load_controlfile(path="controlfiles")
# get the input filename
csv_filename = qcio.get_infilenamefromcf(cf)
# header, units and first data row numbers
header_row = int(cf["Files"]["in_headerrow"])
first_data_row = int(cf["Files"]["in_firstdatarow"])
units_row = -1
if "in_unitsrow" in cf["Files"]: units_row = int(cf["Files"]["in_unitsrow"])

In [12]:
# sniff the file to try and find out what we are dealing with and
# get the header and units lines
csv_file = open(csv_filename,'rb')
dialect = csv.Sniffer().sniff(csv_file.readline(), [' ',',','\t'])
csv_file.seek(0)
csv_reader = csv.reader(csv_file,dialect)
# read the header rows
for i in range(1,first_data_row):
    line = csv_reader.next()
    if i==header_row: header = line
    if units_row!=-1:
        if i==units_row: units = line
# read the first line of data
data_line = csv_reader.next()
csv_file.close()

In [13]:
# get a list of series to be read from CSV file and check
# to make sure the requested variables are in the csv file,
# dump them if they aren't
var_list = []
csv_list = []
for item in cf["Variables"].keys():
    if "csv" in cf["Variables"][item].keys():
        csv_varname = cf["Variables"][item]["csv"]["name"]
        if csv_varname in header:
            csv_list.append(csv_varname)
            var_list.append(item)
    elif "xl" in cf["Variables"][item].keys():
        csv_varname = cf["Variables"][item]["xl"]["name"]
        if csv_varname in header:
            csv_list.append(csv_varname)
            var_list.append(item)
    else:
        print " No csv or xl section in control file for "+item+", skipping ..."


 No csv or xl section in control file for Ah_IRGA_Av, skipping ...
 No csv or xl section in control file for Ah_HC2S3_3m, skipping ...

In [14]:
# read the csv file using numpy's genfromtxt
print "Reading from "+csv_filename
skip = first_data_row-1
data = numpy.genfromtxt(csv_filename,delimiter=dialect.delimiter,skip_header=skip,names=header,
                        usecols=csv_list,filling_values=float(-9999),dtype=None)


Reading from ../Sites/GatumPasture/Data/Processed/2015/GatumPasture_2015_L1_ED1.csv

In [15]:
# get a data structure
ds = qcio.DataStructure()
# set some global attributes
nRecs = len(data[header[0]])
ds.globalattributes["nc_nrecs"] = str(nRecs)
ds.globalattributes['featureType'] = 'timeseries'
ds.globalattributes['csv_filename'] = csv_filename
ds.globalattributes['xl_datemode'] = str(0)
s = os.stat(csv_filename)
t = time.localtime(s.st_mtime)
ds.globalattributes['csv_moddatetime'] = str(datetime.datetime(t[0],t[1],t[2],t[3],t[4],t[5]))

In [16]:
# get the variables andput them into the data structure
var_list = cf["Variables"].keys()
# we'll deal with DateTime and xlDateTime separately
for item in ["xlDateTime","DateTime"]:
    if item in var_list: var_list.remove(item)
for var in var_list:
    if "csv" in cf["Variables"][var]:
        csv_var_name = cf["Variables"][var]["csv"]["name"]
    elif "xl" in cf["Variables"][var]:
        csv_var_name = cf["Variables"][var]["xl"]["name"]
    else:
        print " No csv or xl section found in control file for "+var+", skipping ..."
        #log.error(" No csv or xl section found in control file for "+var+", skipping ...")
        continue
    csv_var_name = csv_var_name.replace(".","")
    if csv_var_name not in data.dtype.names:
        print "Requested variable "+csv_var_name+" not found in CSV file, skipping ..."
        #log.warning("Requested variable "+csv_var_name+" not found in CSV file, skipping ...")
        continue
    ds.series[var] = {}
    ds.series[var]["Data"] = data[csv_var_name]
    ds.series[var]["Flag"] = numpy.zeros(nRecs,dtype=numpy.int32)


 No csv or xl section found in control file for Ah_IRGA_Av, skipping ...
 No csv or xl section found in control file for Ah_HC2S3_3m, skipping ...

In [17]:
# now deal with the datetime
if "DateTime" not in cf["Variables"]:
    msg = "No [[DateTime]] section in control file ..."
    raise Exception(msg)
ds.series["DateTime"] = {}
if "Function" in cf["Variables"]["DateTime"]:
    function_string = cf["Variables"]["DateTime"]["Function"]["func"]
    function_name = function_string.split("(")[0]
    function_args = function_string.split("(")[1].replace(")","").split(",")
    result = getattr(qcfunc,function_name)(ds,*function_args)
else:
    try:
        dt = [dateutil.parser.parse(x) for x in data[header[0]]]
        ds.series["DateTime"]["Data"] = dt
        ds.series["DateTime"]["Flag"] = numpy.zeros(nRecs,dtype=numpy.int32)
        ds.series["DateTime"]["Attr"] = {}
        ds.series["DateTime"]["Attr"]["long_name"] = "Datetime in local timezone"
        ds.series["DateTime"]["Attr"]["units"] = "None"
    except:
        msg = "Unable to parse the first column in CSV file as a datetime string"
        raise Exception(msg)

In [18]:
print ds.series.keys()


['Fc_CR3', 'UyT', 'Ux_Sd', 'Fe_CR3', 'Signal_H2O', 'CO2_IRGA_Av', 'Tv_CSAT_Sd', 'UyC', 'Precip', 'UyH', 'ustar_CR3', 'ps', 'RH_HC2S3_3m', 'Diag_CPEC', 'UyUz', 'CellTemp_EC155', 'Ta_HC2S3_3m', 'Fg_10cmb', 'Diag_CSAT', 'Diag_IRGA', 'H2O_IRGA_Av', 'DiffPress_EC155', 'UxUy', 'UxUz', 'UxT', 'UzH', 'Ux', 'Uz', 'Uz_Sd', 'UzC', 'CellPress_EC155', 'Fh_CR3', 'UxC', 'Ws_CSAT', 'UzT', 'DateTime', 'Wd_CSAT', 'UxH', 'H2O_IRGA_Sd', 'Fg_10cma', 'Signal_CO2', 'CO2_IRGA_Sd', 'Uy', 'Tpanel', 'Fn_NR', 'Tv_CSAT', 'Uy_Sd', 'Ts_10cma', 'Vbat']

In [10]:
ldt=ds.series["DateTime"]["Data"]
NEE,f,a=qcutils.GetSeriesasMA(ds,"NEE")
fig=plt.figure()
plt.plot(ldt,NEE,'b.')
plt.show()

In [19]:
ldt=ds.series["DateTime"]["Data"]
NEE,f,a=qcutils.GetSeriesasMA(ds,"Fc_CR3")
fig=plt.figure()
plt.plot(ldt,NEE,'b.')
plt.show()

In [ ]: