In [1]:
%run basics
%matplotlib
In [11]:
# get the control file contents
cf = qcio.load_controlfile(path="controlfiles")
# get the input filename
csv_filename = qcio.get_infilenamefromcf(cf)
# header, units and first data row numbers
header_row = int(cf["Files"]["in_headerrow"])
first_data_row = int(cf["Files"]["in_firstdatarow"])
units_row = -1
if "in_unitsrow" in cf["Files"]: units_row = int(cf["Files"]["in_unitsrow"])
In [12]:
# sniff the file to try and find out what we are dealing with and
# get the header and units lines
csv_file = open(csv_filename,'rb')
dialect = csv.Sniffer().sniff(csv_file.readline(), [' ',',','\t'])
csv_file.seek(0)
csv_reader = csv.reader(csv_file,dialect)
# read the header rows
for i in range(1,first_data_row):
line = csv_reader.next()
if i==header_row: header = line
if units_row!=-1:
if i==units_row: units = line
# read the first line of data
data_line = csv_reader.next()
csv_file.close()
In [13]:
# get a list of series to be read from CSV file and check
# to make sure the requested variables are in the csv file,
# dump them if they aren't
var_list = []
csv_list = []
for item in cf["Variables"].keys():
if "csv" in cf["Variables"][item].keys():
csv_varname = cf["Variables"][item]["csv"]["name"]
if csv_varname in header:
csv_list.append(csv_varname)
var_list.append(item)
elif "xl" in cf["Variables"][item].keys():
csv_varname = cf["Variables"][item]["xl"]["name"]
if csv_varname in header:
csv_list.append(csv_varname)
var_list.append(item)
else:
print " No csv or xl section in control file for "+item+", skipping ..."
In [14]:
# read the csv file using numpy's genfromtxt
print "Reading from "+csv_filename
skip = first_data_row-1
data = numpy.genfromtxt(csv_filename,delimiter=dialect.delimiter,skip_header=skip,names=header,
usecols=csv_list,filling_values=float(-9999),dtype=None)
In [15]:
# get a data structure
ds = qcio.DataStructure()
# set some global attributes
nRecs = len(data[header[0]])
ds.globalattributes["nc_nrecs"] = str(nRecs)
ds.globalattributes['featureType'] = 'timeseries'
ds.globalattributes['csv_filename'] = csv_filename
ds.globalattributes['xl_datemode'] = str(0)
s = os.stat(csv_filename)
t = time.localtime(s.st_mtime)
ds.globalattributes['csv_moddatetime'] = str(datetime.datetime(t[0],t[1],t[2],t[3],t[4],t[5]))
In [16]:
# get the variables andput them into the data structure
var_list = cf["Variables"].keys()
# we'll deal with DateTime and xlDateTime separately
for item in ["xlDateTime","DateTime"]:
if item in var_list: var_list.remove(item)
for var in var_list:
if "csv" in cf["Variables"][var]:
csv_var_name = cf["Variables"][var]["csv"]["name"]
elif "xl" in cf["Variables"][var]:
csv_var_name = cf["Variables"][var]["xl"]["name"]
else:
print " No csv or xl section found in control file for "+var+", skipping ..."
#log.error(" No csv or xl section found in control file for "+var+", skipping ...")
continue
csv_var_name = csv_var_name.replace(".","")
if csv_var_name not in data.dtype.names:
print "Requested variable "+csv_var_name+" not found in CSV file, skipping ..."
#log.warning("Requested variable "+csv_var_name+" not found in CSV file, skipping ...")
continue
ds.series[var] = {}
ds.series[var]["Data"] = data[csv_var_name]
ds.series[var]["Flag"] = numpy.zeros(nRecs,dtype=numpy.int32)
In [17]:
# now deal with the datetime
if "DateTime" not in cf["Variables"]:
msg = "No [[DateTime]] section in control file ..."
raise Exception(msg)
ds.series["DateTime"] = {}
if "Function" in cf["Variables"]["DateTime"]:
function_string = cf["Variables"]["DateTime"]["Function"]["func"]
function_name = function_string.split("(")[0]
function_args = function_string.split("(")[1].replace(")","").split(",")
result = getattr(qcfunc,function_name)(ds,*function_args)
else:
try:
dt = [dateutil.parser.parse(x) for x in data[header[0]]]
ds.series["DateTime"]["Data"] = dt
ds.series["DateTime"]["Flag"] = numpy.zeros(nRecs,dtype=numpy.int32)
ds.series["DateTime"]["Attr"] = {}
ds.series["DateTime"]["Attr"]["long_name"] = "Datetime in local timezone"
ds.series["DateTime"]["Attr"]["units"] = "None"
except:
msg = "Unable to parse the first column in CSV file as a datetime string"
raise Exception(msg)
In [18]:
print ds.series.keys()
In [10]:
ldt=ds.series["DateTime"]["Data"]
NEE,f,a=qcutils.GetSeriesasMA(ds,"NEE")
fig=plt.figure()
plt.plot(ldt,NEE,'b.')
plt.show()
In [19]:
ldt=ds.series["DateTime"]["Data"]
NEE,f,a=qcutils.GetSeriesasMA(ds,"Fc_CR3")
fig=plt.figure()
plt.plot(ldt,NEE,'b.')
plt.show()
In [ ]: