In [1]:
import os
import pandas
from scripts import wqt_timestamp_match
from datetime import datetime
from scripts import wq_gain
In [2]:
# set wd to Arcproject-wq-processing folder
wd = os.path.abspath(os.path.join(os.path.dirname(os.path.dirname("__file__"))))
# example gain file
wq_file = os.path.join(wd, "scripts", "tests", "testfiles", "Arc_040413\\Arc_040413_WQ\\Arc_040413_wqp_cc1.csv")
print(wq_file)
In [3]:
wq_gain_df = wqt_timestamp_match.wq_from_file(wq_file)
print(wq_gain_df.head())
In [4]:
# Convert water quality columns to dtype floats
# if field is dtype string convert to numeric
for column in list(wq_gain_df.columns.values):
if wq_gain_df[column].dtype == object:
wq_gain_df[column] = wq_gain_df[column].convert_objects(convert_numeric=True)
In [5]:
# Create variable with TRUE if depth is greater than 0 and less than 1
depth1m = (wq_gain_df['DEP25'] > 0) & (wq_gain_df['DEP25'] < 1 )
# Select all cases where depth1m is TRUE
wq_gain_1m = wq_gain_df[depth1m]
print(wq_gain_1m)
In [6]:
# get mean for depth1m
avg_1m = wq_gain_1m.mean()
print(avg_1m)
# convert series to dataframe
avg_1m_df = avg_1m.to_frame().transpose()
print(avg_1m_df)
In [7]:
# get date from the first row of the dataframe
date = wq_gain_df["Date_Time"][1]
print(date)
length = len(wq_gain_df.index)
end = wq_gain_df["Date_Time"][length]
print(end)
In [8]:
# add gain and site columns to results dataframe
avg_1m_df['Start_Time'] = date
avg_1m_df['End_Time'] = end
avg_1m_df['Site'] = "CC1"
avg_1m_df['Gain'] = "g0"
avg_1m_df
Out[8]:
In [9]:
# example gain file
gain_shp = os.path.join(wd, "scripts", "tests", "testfiles", "Arc_040413\\Arc_040413_GPS\\040413_ZoopChlW.shp")
gain_df = wqt_timestamp_match.wqtshp2pd(gain_shp)
print(gain_df)
In [10]:
ex_avg = avg_1m_df
t1 = wqt_timestamp_match.wqtshp2pd(gain_shp)
print(t1)
In [11]:
# convert both site columns to UPPER
t1['Site'] = t1['Site'].str.upper()
ex_avg['Site'] = ex_avg['Site'].str.upper()
joined = pandas.merge(t1, ex_avg, how="inner", on="Site")
print(joined)
In [12]:
# calculate the difference between the start time and the end time
mid_time = (ex_avg['Start_Time'] + (ex_avg['Start_Time'] - ex_avg['End_Time'])/2)[0]
print(mid_time)
# add column with difference between mid time and time associated with the sampling point dataframe
td = t1
td["TimeDelta"]=abs(td["Date_Time"]-mid_time) # absolute diff of time difference
print(td)
In [13]:
# join - using concat - the closest match with the water quality average df
# reset index
td_closest = td.reset_index(drop=True)
ex_avg = ex_avg.reset_index(drop=True)
result = pandas.concat([td_closest, ex_avg ], axis=1, join='inner')
print(result) # there might be duplicate columns
In [15]:
diff = wq_gain.gain_gps_timediff(avg_1m_df, gain_df)
wq_gain.gain_gps_join_closest_timestamp(avg_1m_df, diff)
In [19]:
e1 = wq_gain.main(wq_file, gain_shp, "CC1", "G1")
print(e1)
e2 = wq_gain.main(wq_file, gain_shp, gain="g1")
print(e2)