In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import urllib2
from pyproj import Proj, transform
import xmltodict
import mechanize
import sys
import platform
import datetime
import statsmodels.api as sm
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10
%matplotlib inline
In [2]:
print("Operating System " + platform.system() + " " + platform.release())
print("Python Version " + str(sys.version))
print("Pandas Version " + str(pd.__version__))
print("Numpy Version " + str(np.__version__))
print("Matplotlib Version " + str(matplotlib.__version__))
In [3]:
import wellapplication
In [1]:
import wellapplication.chem as wc
In [ ]:
wellapplication.__version__
In [6]:
#rootname = "/media/p/5F5B-8FCB/PROJECTS/UMAR/Data/chem/" #thumb on ubuntu
rootname = "E:\\PROJECTS\\UMAR\\Data\\chem\\" #thumb on windows
In [7]:
WQPResultsFile = rootname + "result.csv"
WQPStationFile = rootname + "station.csv"
SDWISFile = rootname + "SDWIS_Cache.txt"
AGStationsFile = rootname + "AG_Stations_Cache.csv"
AGResultsFile = rootname + "AG_byparam.csv"
UGSFile = rootname + "UGS_Cache.txt"
STORLegStatFile = rootname + "UT_Cache_sta_001.txt"
STORLegResFile = rootname + "UT_Cache_res_001.txt"
STORParamFile = rootname + "parameter.txt"
In [8]:
fielddata = rootname + "FieldData.xlsx"
statelabresults0 = rootname + "utgs1402.txt"
statelabresults1 = rootname + "utgs1403.txt"
statelabresults2 = rootname + "utgs1501.txt"
statelabstations = rootname + "UtahStateLabStations.xlsx"
In [10]:
df = wc.WQP.WQPimportRes(WQPResultsFile)
df = wc.WQP.WQPmassageResults(df)
df
The following html addresses are REST-based queries to download WQP data from the WQP portal. If you click on them, they will produce zipped csv files that can be opened and processed with the code below. Originally, the code directly applied these links, but the files are large and take a lot of time to download.
Station data address:
Result data address:
Define data type of each field in the WQP database. This allows for easy import of data.
In [ ]:
Rdtypes = {"OrganizationIdentifier":np.str_, "OrganizationFormalName":np.str_, "ActivityIdentifier":np.str_,
"ActivityStartTime/Time":np.str_,
"ActivityTypeCode":np.str_, "ActivityMediaName":np.str_, "ActivityMediaSubdivisionName":np.str_,
"ActivityStartDate":np.str_, "ActivityStartTime/Time":np.str_, "ActivityStartTime/TimeZoneCode":np.str_,
"ActivityEndDate":np.str_, "ActivityEndTime/Time":np.str_, "ActivityEndTime/TimeZoneCode":np.str_,
"ActivityDepthHeightMeasure/MeasureValue":np.float16, "ActivityDepthHeightMeasure/MeasureUnitCode":np.str_,
"ActivityDepthAltitudeReferencePointText":np.str_, "ActivityTopDepthHeightMeasure/MeasureValue":np.float16,
"ActivityTopDepthHeightMeasure/MeasureUnitCode":np.str_,
"ActivityBottomDepthHeightMeasure/MeasureValue":np.float16,
"ActivityBottomDepthHeightMeasure/MeasureUnitCode":np.str_,
"ProjectIdentifier":np.str_, "ActivityConductingOrganizationText":np.str_,
"MonitoringLocationIdentifier":np.str_, "ActivityCommentText":np.str_,
"SampleAquifer":np.str_, "HydrologicCondition":np.str_, "HydrologicEvent":np.str_,
"SampleCollectionMethod/MethodIdentifier":np.str_, "SampleCollectionMethod/MethodIdentifierContext":np.str_,
"SampleCollectionMethod/MethodName":np.str_, "SampleCollectionEquipmentName":np.str_,
"ResultDetectionConditionText":np.str_, "CharacteristicName":np.str_, "ResultSampleFractionText":np.str_,
"ResultMeasureValue":np.str_, "ResultMeasure/MeasureUnitCode":np.str_, "MeasureQualifierCode":np.str_,
"ResultStatusIdentifier":np.str_, "StatisticalBaseCode":np.str_, "ResultValueTypeName":np.str_,
"ResultWeightBasisText":np.str_, "ResultTimeBasisText":np.str_, "ResultTemperatureBasisText":np.str_,
"ResultParticleSizeBasisText":np.str_, "PrecisionValue":np.str_, "ResultCommentText":np.str_,
"USGSPCode":np.str_, "ResultDepthHeightMeasure/MeasureValue":np.float16,
"ResultDepthHeightMeasure/MeasureUnitCode":np.str_, "ResultDepthAltitudeReferencePointText":np.str_,
"SubjectTaxonomicName":np.str_, "SampleTissueAnatomyName":np.str_,
"ResultAnalyticalMethod/MethodIdentifier":np.str_, "ResultAnalyticalMethod/MethodIdentifierContext":np.str_,
"ResultAnalyticalMethod/MethodName":np.str_, "MethodDescriptionText":np.str_, "LaboratoryName":np.str_,
"AnalysisStartDate":np.str_, "ResultLaboratoryCommentText":np.str_,
"DetectionQuantitationLimitTypeName":np.str_, "DetectionQuantitationLimitMeasure/MeasureValue":np.str_,
"DetectionQuantitationLimitMeasure/MeasureUnitCode":np.str_, "PreparationStartDate":np.str_,
"ProviderName":np.str_}
dt = [6,56,61]
Read csv data into python.
In [ ]:
WQP = pd.read_csv(WQPResultsFile, dtype=Rdtypes, parse_dates=dt)
Rename columns to match with other data later.
In [ ]:
ResFieldDict = {"AnalysisStartDate":"AnalysisDate", "ResultAnalyticalMethod/MethodIdentifier":"AnalytMeth",
"ResultAnalyticalMethod/MethodName":"AnalytMethId", "ResultDetectionConditionText":"DetectCond",
"ResultLaboratoryCommentText":"LabComments", "LaboratoryName":"LabName",
"DetectionQuantitationLimitTypeName":"LimitType", "DetectionQuantitationLimitMeasure/MeasureValue":"MDL",
"DetectionQuantitationLimitMeasure/MeasureUnitCode":"MDLUnit", "MethodDescriptionText":"MethodDescript",
"OrganizationIdentifier":"OrgId", "OrganizationFormalName":"OrgName", "CharacteristicName":"Param",
"ProjectIdentifier":"ProjectId", "MeasureQualifierCode":"QualCode", "ResultCommentText":"ResultComment",
"ResultStatusIdentifier":"ResultStatus", "ResultMeasureValue":"ResultValue",
"ActivityCommentText":"SampComment", "ActivityDepthHeightMeasure/MeasureValue":"SampDepth",
"ActivityDepthAltitudeReferencePointText":"SampDepthRef",
"ActivityDepthHeightMeasure/MeasureUnitCode":"SampDepthU", "SampleCollectionEquipmentName":"SampEquip",
"ResultSampleFractionText":"SampFrac", "ActivityStartDate":"SampleDate", "ActivityIdentifier":"SampleId",
"ActivityStartTime/Time":"SampleTime", "ActivityMediaSubdivisionName":"SampMedia",
"SampleCollectionMethod/MethodIdentifier":"SampMeth", "SampleCollectionMethod/MethodName":"SampMethName",
"ActivityTypeCode":"SampType", "MonitoringLocationIdentifier":"StationId",
"ResultMeasure/MeasureUnitCode":"Unit", "USGSPCode":"USGSPCode",
"ActivityStartDate":"StartDate","ActivityStartTime/Time":"StartTime"}
In [ ]:
WQP.rename(columns=ResFieldDict,inplace=True)
Define unneeded columns that will be dropped to save memory.
In [ ]:
resdroplist = ["ActivityBottomDepthHeightMeasure/MeasureUnitCode", "ActivityBottomDepthHeightMeasure/MeasureValue",
"ActivityConductingOrganizationText", "ActivityEndDate", "ActivityEndTime/Time",
"ActivityEndTime/TimeZoneCode", "ActivityMediaName", "ActivityStartTime/TimeZoneCode",
"ActivityTopDepthHeightMeasure/MeasureUnitCode", "ActivityTopDepthHeightMeasure/MeasureValue",
"HydrologicCondition", "HydrologicEvent", "PrecisionValue", "PreparationStartDate", "ProviderName",
"ResultAnalyticalMethod/MethodIdentifierContext", "ResultDepthAltitudeReferencePointText",
"ResultDepthHeightMeasure/MeasureUnitCode", "ResultDepthHeightMeasure/MeasureValue",
"ResultParticleSizeBasisText", "ResultTemperatureBasisText",
"ResultTimeBasisText", "ResultValueTypeName", "ResultWeightBasisText", "SampleAquifer",
"SampleCollectionMethod/MethodIdentifierContext", "SampleTissueAnatomyName", "StatisticalBaseCode",
"SubjectTaxonomicName","StartTime","StartDate","StartTime","StartDate"]
Define a function to fix funky dates found in the WQP database. This includes impossible dates or dates with too many numbers.
In [ ]:
def datetimefix(x,format):
'''
This script cleans date-time errors
input
x = date-time string
format = format of date-time string
output
formatted datetime type
'''
d = str(x[0]).lstrip().rstrip()[0:10]
t = str(x[1]).lstrip().rstrip()[0:5].zfill(5)
try:
int(d[0:2])
except(ValueError,TypeError,NameError):
return np.nan
try:
int(t[0:2])
int(t[3:5])
except(ValueError,TypeError,NameError):
t = "00:00"
if int(t[0:2])>23:
t = "00:00"
elif int(t[3:5])>59:
t = "00:00"
else:
t = t[0:2].zfill(2) + ":" + t[3:5]
return datetime.datetime.strptime(d + " " + t, format)
In [ ]:
WQP["SampleDate"] = WQP[["StartDate","StartTime"]].apply(lambda x: datetimefix(x,"%Y-%m-%d %H:%M"),1)
Drop unwanted fields
In [ ]:
WQP.drop(resdroplist,inplace=True,axis=1)
Convert result values and the MDL values to numeric fields from string fields.
In [ ]:
WQP['ResultValue'] = WQP['ResultValue'].convert_objects(convert_numeric=True)
WQP['MDL'] = WQP['MDL'].convert_objects(convert_numeric=True)
Remove station duplicates by removing the preceding 'WQX' found in the station id field.
In [ ]:
WQP['StationId'] = WQP['StationId'].str.replace('_WQX-','-')
Standardize all ug/l data to mg/l by changing out the unit descriptor and dividing by 1000.
In [ ]:
#standardize all ug/l data to mg/l
def unitfix(x):
z = str(x).lower()
if z == "ug/l":
return "mg/l"
elif z == "mg/l":
return "mg/l"
else:
return x
WQP.Unit = WQP.Unit.apply(lambda x: str(x).rstrip(), 1)
WQP.ResultValue = WQP[["ResultValue","Unit"]].apply(lambda x: x[0]/1000 if str(x[1]).lower()=="ug/l" else x[0], 1)
WQP.Unit = WQP.Unit.apply(lambda x: unitfix(x),1)
Normalize common nutrients so that they are all in the same type of units. For example, sometimes nitrate is reported "as nitrogen" and sometimes it is reported "as nitrate". The difference between the two types of reporting is a factor of 4.427!
In [ ]:
def parnorm(x):
p = str(x[0]).rstrip().lstrip().lower()
u = str(x[2]).rstrip().lstrip().lower()
if p == 'nitrate' and u == 'mg/l as n':
return 'Nitrate', x[1]*4.427, 'mg/l'
elif p == 'nitrite' and u == 'mg/l as n':
return 'Nitrite', x[1]*3.285, 'mg/l'
elif p == 'ammonia-nitrogen' or p == 'ammonia-nitrogen as n' or p == 'ammonia and ammonium':
return 'Ammonium', x[1]*1.288, 'mg/l'
elif p == 'ammonium' and u == 'mg/l as n':
return 'Ammonium', x[1]*1.288, 'mg/l'
elif p == 'sulfate as s':
return 'Sulfate', x[1]*2.996, 'mg/l'
elif p in ('phosphate-phosphorus', 'phosphate-phosphorus as p','orthophosphate as p'):
return 'Phosphate', x[1]*3.066, 'mg/l'
elif (p == 'phosphate' or p == 'orthophosphate') and u == 'mg/l as p':
return 'Phosphate', x[1]*3.066, 'mg/l'
elif u == 'ug/l':
return x[0], x[1]/1000, 'mg/l'
else:
return x[0], x[1], str(x[2]).rstrip()
WQP['Param'], WQP['ResultValue'], WQP['Unit'] = zip(*WQP[['Param','ResultValue','Unit']].apply(lambda x: parnorm(x),1))
Read in WQP station data.
In [ ]:
WQPStat = pd.read_csv(WQPStationFile)
Rename and simplify station data column names for later compilation.
In [ ]:
StatFieldDict = {"MonitoringLocationIdentifier":"StationId", "AquiferName":"Aquifer", "AquiferTypeName":"AquiferType",
"ConstructionDateText":"ConstDate", "CountyCode":"CountyCode", "WellDepthMeasure/MeasureValue":"Depth",
"WellDepthMeasure/MeasureUnitCode":"DepthUnit", "VerticalMeasure/MeasureValue":"Elev",
"VerticalAccuracyMeasure/MeasureValue":"ElevAcc", "VerticalAccuracyMeasure/MeasureUnitCode":"ElevAccUnit",
"VerticalCollectionMethodName":"ElevMeth", "VerticalCoordinateReferenceSystemDatumName":"ElevRef",
"VerticalMeasure/MeasureUnitCode":"ElevUnit", "FormationTypeText":"FmType",
"WellHoleDepthMeasure/MeasureValue":"HoleDepth", "WellHoleDepthMeasure/MeasureUnitCode":"HoleDUnit",
"HorizontalAccuracyMeasure/MeasureValue":"HorAcc", "HorizontalAccuracyMeasure/MeasureUnitCode":"HorAccUnit",
"HorizontalCollectionMethodName":"HorCollMeth", "HorizontalCoordinateReferenceSystemDatumName":"HorRef",
"HUCEightDigitCode":"HUC8", "LatitudeMeasure":"Lat_Y", "LongitudeMeasure":"Lon_X",
"OrganizationIdentifier":"OrgId", "OrganizationFormalName":"OrgName", "StateCode":"StateCode",
"MonitoringLocationDescriptionText":"StationComment", "MonitoringLocationName":"StationName",
"MonitoringLocationTypeName":"StationType"}
In [ ]:
WQPStat.rename(columns=StatFieldDict,inplace=True)
Define the fields to drop to save memory.
In [ ]:
statdroplist = ["ContributingDrainageAreaMeasure/MeasureUnitCode", "ContributingDrainageAreaMeasure/MeasureValue",
"DrainageAreaMeasure/MeasureUnitCode", "DrainageAreaMeasure/MeasureValue", "CountryCode", "ProviderName",
"SourceMapScaleNumeric"]
WQPStat.drop(statdroplist,inplace=True,axis=1)
Make station types in the StationType field consistent for easier summary and compilation later on.
In [ ]:
TypeDict = {"Stream: Canal":"Stream", "River/Stream":"Stream",
"Stream: Canal":"Stream", "Well: Test hole not completed as a well":"Well"}
WQPStat.StationType = WQPStat["StationType"].apply(lambda x: TypeDict.get(x,x),1)
WQPStat.Elev = WQPStat.Elev.apply(lambda x: np.nan if x==0.0 else round(x,1), 1)
Remove preceding WQX from StationId field to remove duplicate station data created by legacy database.
In [ ]:
WQPStat['StationId'] = WQPStat['StationId'].str.replace('_WQX-','-')
WQPStat.drop_duplicates(subset=['StationId'],inplace=True)
SDWIS data were extracted from the Utah SDWIS database into ArcGIS 10.3.2 using the following SQL query. NED 10m elevation and UTM coordinates were appended using ArcGIS.
SELECT UTV80.TINWSF.EXTERNAL_SYS_NUM AS "FED_NM", UTV80.TINWSF.ST_ASGN_IDENT_CD AS "ST_ID", UTV80.TINWSF.TYPE_CODE, UTV80.TINWSYS.NAME AS "SYS_NM", UTV80.TINWSYS.D_PRIN_CNTY_SVD_NM AS "COUNTY", UTV80.TINWSF.NAME AS "FAC_NM", UTV80.TINWSF.TINWSYS_IS_NUMBER AS "SY_NBR", UTV80.TINLOC.LATITUDE_MEASURE AS "Y", UTV80.TINLOC.LONGITUDE_MEASURE AS "X", UTV80.TINLOC.VERTICAL_MEASURE AS "Z", UTV80.TSASAMPL.COLLLECTION_END_DT AS "DTE", UTV80.TSAANLYT.NAME AS "ANLY_NM", UTV80.TSASAR.CONCENTRATION_MSR AS "CONC_MSR", UTV80.TSASAR.TSASAR_IS_NUMBER AS "ID_NUM", UTV80.TSASAR.UOM_CODE, UTV80.TSASAR.DETECTN_LIMIT_NUM AS "DET_LIM", UTV80.TSASAR.DETECTN_LIM_UOM_CD AS "DET_UOM" FROM UTV80.TINWSF INNER JOIN UTV80.TINWSYS ON UTV80.TINWSF.TINWSYS_IS_NUMBER = UTV80.TINWSYS.TINWSYS_IS_NUMBER INNER JOIN UTV80.TINLOC ON UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TINLOC.TINWSF_IS_NUMBER INNER JOIN UTV80.TSASMPPT ON UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TSASMPPT.TINWSF0IS_NUMBER INNER JOIN UTV80.TSASAMPL ON UTV80.TSASMPPT.TSASMPPT_IS_NUMBER = UTV80.TSASAMPL.TSASMPPT_IS_NUMBER INNER JOIN UTV80.TSASAR ON UTV80.TSASAMPL.TSASAMPL_IS_NUMBER = UTV80.TSASAR.TSASAMPL_IS_NUMBER INNER JOIN UTV80.TSAANLYT ON UTV80.TSASAR.TSAANLYT_IS_NUMBER = UTV80.TSAANLYT.TSAANLYT_IS_NUMBER WHERE (UTV80.TINWSYS.D_PRIN_CNTY_SVD_NM LIKE '%CACHE COUNTY%') AND (UTV80.TSAANLYT.NAME LIKE '%NITRATE%' OR UTV80.TSAANLYT.NAME LIKE '%NITRITE%' OR UTV80.TSAANLYT.NAME LIKE '%AMMONI%' OR UTV80.TSAANLYT.NAME LIKE '%SULFATE%' OR UTV80.TSAANLYT.NAME LIKE '%TDS%' OR UTV80.TSAANLYT.NAME LIKE '%SODIUM%' OR UTV80.TSAANLYT.NAME LIKE '%FLUORIDE%' OR UTV80.TSAANLYT.NAME LIKE '%MAGNESIUM%' OR UTV80.TSAANLYT.NAME LIKE '%SELENIUM%' OR UTV80.TSAANLYT.NAME LIKE '%CALCIUM%' OR UTV80.TSAANLYT.NAME LIKE '%CHLORIDE%' OR UTV80.TSAANLYT.NAME LIKE '%POTASSIUM%' OR UTV80.TSAANLYT.NAME LIKE '%SELENIUM%' OR UTV80.TSAANLYT.NAME LIKE '%SILICA%' OR UTV80.TSAANLYT.NAME LIKE '%IRON %' OR UTV80.TSAANLYT.NAME LIKE '%ALKA %' OR UTV80.TSAANLYT.NAME LIKE '%CONDUCTIVITY%' OR UTV80.TSAANLYT.NAME LIKE '%PH %' OR UTV80.TSAANLYT.NAME LIKE '%TEMP%' OR UTV80.TSAANLYT.NAME LIKE '%ARSENIC%' OR UTV80.TSAANLYT.NAME LIKE '%CARBON%' OR UTV80.TSAANLYT.NAME LIKE '%TRITIUM%' OR UTV80.TSAANLYT.NAME LIKE '%COPPER%' OR UTV80.TSAANLYT.NAME LIKE '%LEAD%' OR UTV80.TSAANLYT.NAME LIKE '%NITROGEN%' OR UTV80.TSAANLYT.NAME LIKE '%PHOSPHATE%' OR UTV80.TSAANLYT.NAME LIKE '%TDS%' OR UTV80.TSAANLYT.NAME LIKE '%ZINC%' OR UTV80.TSAANLYT.NAME LIKE '%IRON%' OR UTV80.TSAANLYT.NAME LIKE '%CHROMIUM%' ) ORDER BY UTV80.TINWSF.ST_ASGN_IDENT_CD
SELECT UTV80.TINWSF.EXTERNAL_SYS_NUM AS "FED_NM", UTV80.TINWSF.ST_ASGN_IDENT_CD AS "ST_ID", UTV80.TINWSF.TYPE_CODE, UTV80.TINWSYS.NAME AS "SYS_NM", UTV80.TINWSYS.D_PRIN_CNTY_SVD_NM AS "COUNTY", UTV80.TINWSF.NAME AS "FAC_NM", UTV80.TINWSF.TINWSYS_IS_NUMBER AS "SY_NBR", UTV80.TINLOC.LATITUDE_MEASURE AS "Y", UTV80.TINLOC.LONGITUDE_MEASURE AS "X", UTV80.TINLOC.VERTICAL_MEASURE AS "Z", UTV80.TSASAMPL.COLLLECTION_END_DT AS "DTE", UTV80.TSAANLYT.NAME AS "ANLY_NM", UTV80.TSASAR.CONCENTRATION_MSR AS "CONC_MSR", UTV80.TSASAR.TSASAR_IS_NUMBER AS "ID_NUM", UTV80.TSASAR.UOM_CODE, UTV80.TSASAR.DETECTN_LIMIT_NUM AS "DET_LIM", UTV80.TSASAR.DETECTN_LIM_UOM_CD AS "DET_UOM" FROM UTV80.TINWSF INNER JOIN UTV80.TINWSYS ON UTV80.TINWSF.TINWSYS_IS_NUMBER = UTV80.TINWSYS.TINWSYS_IS_NUMBER INNER JOIN UTV80.TINLOC ON UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TINLOC.TINWSF_IS_NUMBER INNER JOIN UTV80.TSASMPPT ON UTV80.TINWSF.TINWSF_IS_NUMBER = UTV80.TSASMPPT.TINWSF0IS_NUMBER INNER JOIN UTV80.TSASAMPL ON UTV80.TSASMPPT.TSASMPPT_IS_NUMBER = UTV80.TSASAMPL.TSASMPPT_IS_NUMBER INNER JOIN UTV80.TSASAR ON UTV80.TSASAMPL.TSASAMPL_IS_NUMBER = UTV80.TSASAR.TSASAMPL_IS_NUMBER INNER JOIN UTV80.TSAANLYT ON UTV80.TSASAR.TSAANLYT_IS_NUMBER = UTV80.TSAANLYT.TSAANLYT_IS_NUMBER WHERE UTV80.TINWSYS.D_PRIN_CNTY_SVD_NM LIKE '%CACHE COUNTY%' AND (UTV80.TINWSYS.NAME IN('%PROVID%','%MILL%','%LOG%','%NIB%', ORDER BY UTV80.TINWSF.ST_ASGN_IDENT_CD
Read in the queried SDWIS data and make a StationId and StationName field. Make field names consistent with those applied to WQP data above so that compilation is easier later.
In [ ]:
SDWIS = pd.read_csv(SDWISFile)
def sampid(x):
return "SDWIS" + str(x[0]) + str(x[1]) + str(x[2])[:-7]
def statid(x):
return "SDWIS" + str(x[0]) + str(x[1])
def statnm(x):
return str(str(x[0]) + " " + str(x[1])).title()
SDWIS["StationId"] = SDWIS[["FED_NM","ST_ID"]].apply(lambda x: statid(x),1)
SDWIS["StationName"] = SDWIS[["SYS_NM","FAC_NM"]].apply(lambda x: statnm(x),1)
SDWIS["SampleId"] = SDWIS[["FED_NM","ST_ID","DTE"]].apply(lambda x: sampid(x),1)
SDWIS["OrgId"] = "UDDW"
SDWIS["OrgName"] = "Utah Division of Drinking Water"
SDWIS["Elev"] = SDWIS["Z"].apply(lambda x: round(x*3.2808,1),1)
SDWIS["Unit"] = SDWIS["UOM_CODE"].apply(lambda x: str(x).lower(),1)
SDWIS["MDLUnit"] = SDWIS["DET_UOM"].apply(lambda x: str(x).lower(),1)
SDWIS["Param"] = SDWIS["ANLY_NM"].apply(lambda x: str(x).title().rstrip(),1)
In [ ]:
SDWISFields ={"DTE":"SampleDate", "TYPE_CODE":"StationType",
"CONC_MSR":"ResultValue", "DET_LIM":"MDL",
"Y":"Lat_Y", "X":"Lon_X"}
SDWIS.rename(columns=SDWISFields,inplace=True)
In [ ]:
def datetimefixSDWIS(x,format):
d = str(x).lstrip().rstrip()
try:
return datetime.datetime.strptime(d, "%m/%d/%Y %H:%M:%S")
except(ValueError):
return datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
SDWIS["SampleDate"] = SDWIS["SampleDate"].apply(lambda x: datetimefixSDWIS(x,"%m/%d/%Y %H:%M:%S"),1)
Normalize units and nutrient data so that they are consistent with the WQP data. This includes standardizing ug/l to mg/l
In [ ]:
print sorted(list(SDWIS.Param.unique()))
def parnormSDWIS(x):
p = str(x[0]).rstrip().lstrip().lower()
u = str(x[2]).rstrip().lstrip().lower()
if p == 'nitrate':
return 'Nitrate', x[1]*4.427, 'mg/l'
elif p == 'nitrite':
return 'Nitrite', x[1]*3.285, 'mg/l'
elif p == 'nitrogen-ammonia as (n)':
return 'Ammonium', x[1]*1.288, 'mg/l'
elif u == 'ug/l':
return x[0], x[1]/1000, 'mg/l'
else:
return x[0], x[1], str(x[2]).rstrip()
SDWIS['Param'], SDWIS['ResultValue'], SDWIS['Unit'] = zip(*SDWIS[['Param','ResultValue','Unit']].apply(lambda x: parnormSDWIS(x),1))
Drop unneeded SDWIS fields to save memory and reduce confusion.
In [ ]:
SDWIS.drop(["FED_NM", "DET_UOM", "UOM_CODE","ANLY_NM", "FAC_NM", "ST_ID",
"SYS_NM", "COUNTY", "SY_NBR", "Z", "ID_NUM"],inplace=True, axis=1)
Rename chemical parameters in the SDWIS Param field to match those of the WQP data.
In [ ]:
SDWISPmatch = {"Ph":"pH","Tds":"TDS","Nitrogen-Ammonia As (N)":"Nitrogen-Ammonia as (N)",
"Hydroxide As Calcium Carbonate":"Hydroxide as Calcium Carbonate",
"Bicarbonate As Hco3":"Bicarbonate as HCO3"}
SDWIS["Param"] = SDWIS["Param"].apply(lambda x: SDWISPmatch.get(x,x))
SDWIS["StationName"] = SDWIS["StationName"].apply(lambda x: x.replace("Wtp","WTP"))
SDWIS["ResultValue"] = SDWIS[["ResultValue","Unit"]].apply(lambda x: x[0]/1000 if x[1]=="ug/L" else x[0], 1)
Make station types consistent with the WQP data.
In [ ]:
SDWISType = {"SP":"Spring","WL":"Well","TP":"Facility Other","IN":"Stream","CC":"Connection","WH":"Well"}
SDWIS.StationType = SDWIS.StationType.apply(lambda x: SDWISType.get(x,x),1)
SDWIS facility type code (FacTypeCode): CC, Consecutive_connection; CH, Common_headers; CS, Cistern; CW, Clear_well; DS, Distribution_system_zone; IG, Infiltration_gallery; IN, Intake; NP, Non-piped; OT, Other; PC, Pressure_control; PF, Pump_facility; RC, Roof_catchment; RS, Reservoir; SI, Surface_impoundment; SP, Spring; SS, Sampling_station; ST, Storage; TM, Transmission_main; TP, Treatment_plant; WH, Well_head; WL, Well.
Create a SDWIS stations file from the SDWIS data. Drop unneeded fields from the station file. Remove duplication stations.
In [ ]:
SDWISSta = SDWIS.drop([u'SampleDate', u'ResultValue', u'MDL', u'SampleId', u'Unit', u'MDLUnit', u'Param'], axis=1)
SDWISSta.drop_duplicates(inplace=True)
Create SDWIS results file from the SDWIS data. Drop unneeded fields from the results file. These are fields that are in the station field and apply to stations.
In [ ]:
SDWISRes = SDWIS.drop([u'StationType', u'Lat_Y', u'Lon_X', u'StationName', u'Elev'], axis=1)
Create a sample media field and populate it with the value Groundwater.
In [ ]:
SDWISRes["SampMedia"] = "Groundwater"
Import Utah Department of Food and Agriculture data from the data file. These data were compiled from reports available of the UDAF website. Once the data are imported, rename the fields to match the above SDWIS and WQP data.
In [ ]:
AGStat = pd.read_csv(AGStationsFile)
AGStat["StationType"] = "Well"
AGStatFields = {"SITEID":"StationId","FINISHEDDE":"Depth","POINT_Y":"Lat_Y",
"POINT_X":"Lon_X","ELEV_FT":"Elev","ACCURACY":"HorAcc"}
AGStat.rename(columns=AGStatFields,inplace=True)
Drop unneeded fields to save memory.
In [ ]:
AGStat.drop(["OBJECTID_1", "OBJECTID", "PUB_YR", "SAMPLENO", "WLDATE", "WLDEPTH"], inplace=True, axis=1)
Add UDAF prefix to the station identification field (StationId) to make station ids unique.
In [ ]:
AGStat.StationId = AGStat.StationId.apply(lambda x: "UDAF-"+str(int(x)).zfill(5),1)
Import data Utah Department of Food and Agriculture data from the data file. These data were compiled from reports available of the UDAF website. Once the data are imported, rename the fields to match the above SDWIS and WQP data.
In [ ]:
names = ["SampleId","ResultValue", "ParAbb", "Unit", "Param", "MDL","BelowLim","TestNo",
"StationId","SampleDate","SampYear"]
AGRes = pd.read_csv(AGResultsFile, names=names, index_col=10)
Create a detection condition field and populate it based on values in the imported data.
In [ ]:
AGRes["DetectCond"] = AGRes["BelowLim"].apply(lambda x: 'Not Detected' if x=='Y' else np.nan,1)
Fill null result values with zeros when data are reported as below detection limit.
In [ ]:
AGRes.ResultValue = AGRes[["BelowLim","ResultValue"]].apply(lambda x: np.nan if x[0]=="Y" or x[1] == 0.0 else x[1], 1)
Make data consistent by cleaning up parameter descriptions.
In [ ]:
def parnormAG(x):
p = str(x[0]).rstrip().lstrip().lower()
u = str(x[2]).rstrip().lstrip().lower()
if p == 'nitrate-n':
return 'Nitrate', x[1]*4.427, 'mg/l'
elif u == 'ug/l':
return x[0], x[1]/1000, 'mg/l'
else:
return x[0], x[1], str(x[2]).rstrip()
AGRes['Param'], AGRes['ResultValue'], AGRes['Unit'] = zip(*AGRes[['Param','ResultValue','Unit']].apply(lambda x: parnormAG(x),1))
In [ ]:
AGRes.Unit.unique()
In [ ]:
AGRes.dropna(subset=["StationId","ResultValue"], how="any", inplace=True)
In [ ]:
AGRes.StationId = AGRes.StationId.apply(lambda x: "UDAF-"+str(int(x)).zfill(5),1)
In [ ]:
AGStAv = list(AGStat.StationId.values)
AGRes = AGRes[AGRes.StationId.isin(AGStAv)]
AGRes["SampMedia"] = "Groundwater"
In [ ]:
AGStat['OrgId']='UDAF'
Legacy EPA data are kept in the STORET Legacy Database.
In [ ]:
STORLegSta = pd.read_table(STORLegStatFile, skiprows=[1])
STORLegRes = pd.read_table(STORLegResFile, skiprows=[1])
STORParam = pd.read_table(STORParamFile)
Parse choppy text data from the STORET Legacy database.
In [ ]:
rescol = list(STORLegRes.columns)
j = []
for i in rescol:
j.append(i.rstrip("\t").rstrip().lstrip().replace(" ",""))
resdict = dict(zip(rescol,j))
STORLegRes.rename(columns=resdict,inplace=True)
statcol = list(STORLegSta.columns)
k = []
for i in statcol:
k.append(i.rstrip("\t").rstrip().lstrip().replace(" ",""))
statdict = dict(zip(statcol,k))
STORLegSta.rename(columns=statdict,inplace=True)
In [ ]:
STORLegRes["SampleDate"] = STORLegRes[["StartDate","StartTime"]].apply(lambda x: datetimefix(x,"%Y-%m-%d %H:%M"),1)
STORLegRes = STORLegRes[STORLegRes.SecondaryActivityCategory.isin(['Water',np.nan])]
STORParamDict = dict(zip(STORParam['Parameter No.'].values, STORParam['Full Name'].values))
STORLegRes.Param = STORLegRes.Param.apply(lambda x: STORParamDict.get(x),1)
In [ ]:
STORResField = {"Agency":"OrgId","AgencyName":"OrgName","Station":"StationId","SampleDepth":"SampDepth"}
STORLegRes.rename(columns=STORResField,inplace=True)
In [ ]:
STORLegRes.drop(["StateName", "CountyName", "HUC", "EndDate", "UMK", "CS", "ReplicateNumber",
"COMPOSITE_GRAB_NUMBER","CM","PrimaryActivityCategory","PrimaryActivityCategory",
"SecondaryActivityCategory",
"EndTime", "StartDate", "StartTime", "Latitude", "Longitude"],inplace=True,axis=1)
In [ ]:
STORLegRes["SampleId"] = STORLegRes[["StationId","SampleDate"]].apply(lambda x: str(x[0]) + "-" + str(x[1]),1 )
STORLegRes["StationId"] = STORLegRes["StationId"].apply(lambda x: "EPALeg-" + x, 1)
In [ ]:
STORLegRes.Param = STORLegRes.Param.apply(lambda x: str(x).title(),1)
In [ ]:
STORLegRes.columns
In [ ]:
def parnormSTOR(x):
p = str(x[0]).rstrip().lstrip().lower()
if p == 'nitrate nitrogen, total (mg/L as n)' or p== 'nitrate nitrogen, total':
return 'Nitrate', x[1]*4.427, 'mg/l'
elif p == 'nitrite nitrogen, total (mg/l as n)':
return 'Nitrite', x[1]*3.285, 'mg/l'
elif p == 'nitrogen, ammonia, total (mg/l as n)':
return 'Ammonium', x[1]*1.288, 'mg/l'
elif p == 'sulfate (as s) whole water, mg/L':
return 'Sulfate', x[1]*2.996, 'mg/l'
elif p in ('phosphorus, dissolved orthophosphate (mg/l as p)'):
return 'Phosphate', x[1]*3.066, 'mg/l'
else:
return x[0], x[1], np.nan
STORLegRes['Param'], STORLegRes['ResultValue'], STORLegRes['Unit'] = zip(*STORLegRes[['Param','ResultValue']].apply(lambda x: parnormSTOR(x),1))
In [ ]:
STORKeepers = ['Temperature, Water (Degrees Centigrade)',
'Temperature, Water (Degrees Fahrenheit)',
'Specific Conductance,Field (Umhos/Cm @ 25C)',
'Specific Conductance (Umhos/Cm @ 25C)',
'Sulfate (As S) Whole Water, Mg/L',
'Oxygen, Dissolved Mg/L',
'Oxygen, Dissolved, Percent Of Saturation %',
'Bod, 5 Day, 20 Deg C Mg/L',
'Ph (Standard Units)', 'Ph, Lab, Standard Units Su',
'Carbon Dioxide (Mg/L As Co2)', 'Alkalinity,Total,Low Level Gran Analysis Ueq/L',
'Alkalinity, Total (Mg/L As Caco3)', 'Bicarbonate Ion (Mg/L As Hco3)', 'Carbonate Ion (Mg/L As Co3)',
'Nitrogen, Ammonia, Total (Mg/L As N)', 'Ammonia, Unionzed (Mg/L As N)',
'Nitrite Nitrogen, Total (Mg/L As N)', 'Ammonia, Unionized (Calc Fr Temp-Ph-Nh4) (Mg/L)',
'Nitrate Nitrogen, Total (Mg/L As N)', 'Nitrogen, Kjeldahl, Total, (Mg/L As N)',
'Nitrite Plus Nitrate, Total 1 Det. (Mg/L As N)', 'Phosphorus (P), Water, Total Recoverable Ug/L',
'Phosphorus, Total (Mg/L As P)', 'Phosphorus, Dissolved Orthophosphate (Mg/L As P)',
'Carbon, Dissolved Organic (Mg/L As C)',
'Carbon, Dissolved Inorganic (Mg/L As C)',
'Hardness, Total (Mg/L As Caco3)', 'Calcium (Mg/L As Caco3)',
'Calcium, Dissolved (Mg/L As Ca)',
'Magnesium, Dissolved (Mg/L As Mg)',
'Sodium, Dissolved (Mg/L As Na)',
'Potassium, Dissolved (Mg/L As K)',
'Chloride, Dissolved In Water Mg/L',
'Sulfate, Dissolved (Mg/L As So4)',
'Fluoride, Dissolved (Mg/L As F)',
'Silica, Dissolved (Mg/L As Si02)',
'Arsenic, Dissolved (Ug/L As As)', 'Arsenic, Total (Ug/L As As)',
'Barium, Dissolved (Ug/L As Ba)', 'Barium, Total (Ug/L As Ba)',
'Beryllium, Total (Ug/L As Be)', 'Boron, Dissolved (Ug/L As B)',
'Boron, Total (Ug/L As B)', 'Cadmium, Dissolved (Ug/L As Cd)',
'Cadmium, Total (Ug/L As Cd)', 'Chromium, Dissolved (Ug/L As Cr)',
'Chromium, Hexavalent (Ug/L As Cr)', 'Chromium, Total (Ug/L As Cr)',
'Copper, Dissolved (Ug/L As Cu)', 'Copper, Total (Ug/L As Cu)',
'Iron, Dissolved (Ug/L As Fe)', 'Lead, Dissolved (Ug/L As Pb)',
'Lead, Total (Ug/L As Pb)', 'Manganese, Total (Ug/L As Mn)',
'Manganese, Dissolved (Ug/L As Mn)', 'Thallium, Total (Ug/L As Tl)',
'Nickel, Dissolved (Ug/L As Ni)', 'Nickel, Total (Ug/L As Ni)',
'Silver, Dissolved (Ug/L As Ag)', 'Silver, Total (Ug/L As Ag)',
'Zinc, Dissolved (Ug/L As Zn)', 'Zinc, Total (Ug/L As Zn)',
'Antimony, Total (Ug/L As Sb)', 'Aluminum, Total (Ug/L As Al)',
'Selenium, Dissolved (Ug/L As Se)', 'Selenium, Total (Ug/L As Se)',
'Tritium (1H3),Total (Picocuries/Liter)',
'Hardness, Ca Mg Calculated (Mg/L As Caco3)',
'Chlorine, Total Residual (Mg/L)',
'Residue,Total Filtrable (Dried At 180C),Mg/L',
'Nitrate Nitrogen, Dissolved (Mg/L As No3)', 'Iron (Ug/L As Fe)',
'Phosphorus, Total, As Po4 - Mg/L', 'Mercury, Total (Ug/L As Hg)']
STORLegRes = STORLegRes[STORLegRes.Param.isin(STORKeepers)]
In [ ]:
def parsplit(x,p):
x = str(x).rstrip().lstrip()
if p == "Un":
z = -1
x = str(x).replace("Mg/L", "mg/l")
x = str(x).replace("Ug/L", "ug/l")
x = str(x).replace("o", "O")
x = str(x).replace("c", "C")
x = str(x).replace("TOtal ReCOverable","Total Recoverable")
x = str(x).replace("UmhOs", "umhos")
x = str(x).replace("TOtal","Total")
elif p== "Par":
z = 0
x = str(x).replace(", Standard Units","")
x = str(x).replace(", Unionized","")
x = str(x).replace(", Unionzed","")
x = str(x).replace(",Low Level Gran Analysis","")
x = str(x).replace(" Ion","")
x = str(x).replace(",Total",", Total")
if x == "Ph" or x == "Ph, Lab":
x = str(x).replace("Ph","pH")
if "(" in x:
x = str(x).replace(" As ", " as ")
return str(x).split(" (")[z].rstrip(")").rstrip().lstrip()
else:
return str(x).split(" ")[z].rstrip().lstrip()
def splitmore(x):
if "NO3" in x:
return x
elif " as " in x:
return x.split(" as ")[0]
elif x == "As S) WhOle Water, mg/l" or x == "Dried At 180C),mg/l" or x=="PhOsphOrus, Total, As PO4 - mg/l":
return "mg/l"
elif x == "P), Water, Total Recoverable ug/l":
return "ug/l"
else:
return x
def unitconv(x):
if x[1]=="ug/l":
return x[0]/1000
elif x[1]=="Degrees Fahrenheit":
return (float(x[0])-32.0)*(5.0/9.0)
else:
return x[0]
In [ ]:
STORLegRes["Unit"] = STORLegRes["Param"].apply(lambda x: parsplit(x,"Un"), 1)
STORLegRes["Param"] = STORLegRes["Param"].apply(lambda x: parsplit(x,"Par"), 1)
STORLegRes["Unit"] = STORLegRes["Unit"].apply(lambda x: splitmore(x), 1)
STORLegRes["ResultValue"] = STORLegRes[["ResultValue","Unit"]].apply(lambda x: unitconv(x), 1)
STORLegRes["Unit"] = STORLegRes["Unit"].apply(lambda x: "mg/l" if x=="ug/l" else x, 1)
STORLegRes["Unit"] = STORLegRes["Unit"].apply(lambda x: "Degrees Centigrade" if x=="Degrees Fahrenheit" else x, 1)
In [ ]:
STORStaField = {"Agency":"OrgId","AgencyName":"OrgName","Station":"StationId", "DepthUnits":"DepthUnit",
"Latitude":"Lat_Y", "Longitude":"Lon_X", "HUC":"HUC8", "StationDepth":"Depth"}
STORLegSta.rename(columns=STORStaField,inplace=True)
In [ ]:
STORLegSta.columns
In [ ]:
STORLegSta.drop(["RchmileSegment", "MilesUpReach", "Rchonoff", "Description", "G", "S", "StationAlias",
"Rchname", "StateName", "CountyName"], inplace=True, axis=1)
In [ ]:
STORLegSta.StationType = STORLegSta.StationType.apply(lambda x: str(x).rstrip(" ").strip("/SUPPLY").split("/")[-1].title(),1)
In [ ]:
LegTypeDict = {"We":"Well"}
STORLegSta.StationType = STORLegSta.StationType.apply(lambda x: LegTypeDict.get(x,x),1)
In [ ]:
STORLegSta.StationId = STORLegSta["StationId"].apply(lambda x: "EPALeg-" + x, 1)
In [ ]:
UGSfield = pd.read_excel(fielddata,"FieldChem") #Field data
UGSNO3 = pd.read_excel(fielddata,"Nitrate") #Nitrate data provided by Millville City
In [ ]:
UGS = pd.read_csv(UGSFile, engine="python")
UGS["StationId"] = UGS["SITE"].apply(lambda x:"UGS-"+str(x).zfill(4),1)
In [ ]:
UGSSta = UGS.drop([u'OBJECTID_1',u'SITE', u'TDS', u'Temp', u'Cond', u'CO2', u'HCO3',
u'CO3',u'Na', u'pH', u'Ca', u'SO4', u'NO3', u'As_', u'Cl', u'K',
u'Mg', u'Hard', u'NH4'], axis=1)
In [ ]:
UGSRe = UGS.drop([u'OBJECTID_1',u'SITE',u'StationType', u'Geology', u'Elev', u'Lat_Y', u'Lon_X', u'StationName',
u'OrgId', u'WRNUM', u'SITE', u'UTM_X', u'UTM_Y', u'Depth_ft'], axis=1)
In [ ]:
UGSRe["SampleId"] = UGSRe.index
In [ ]:
UGSRe.reset_index(inplace=True)
UGSRe.set_index(["StationId","SampleId"], inplace=True)
In [ ]:
UGSRe.drop(UGSRe.columns[0],inplace=True,axis=1)
In [ ]:
UGSStack = UGSRe.stack().to_frame()
UGSStack.columns = ["ResultValue"]
In [ ]:
UGSStack.reset_index(inplace=True)
In [ ]:
UGSStack.columns=["StationId","SampleId","Param","ResultValue"]
In [ ]:
def unitcon(x):
if x=="pH":
return ""
elif x=="Temp":
return "C"
elif x=="Cond":
return "uS/cm"
else:
return "mg/l"
UGSStack["Unit"] = UGSStack["Param"].apply(lambda x: unitcon(x),1)
UGSStack["ParAbb"] = UGSStack["Param"]
UGSStack["OrgId"] = "UGS"
UGSStack["OrgName"] = "Utah Geological Survey"
UGSStack["ResultValue"] = UGSStack[['Param','ResultValue']].apply(lambda x: x[1]*1.288 if x[0]=='Ammonia as N' else x[1],1)
UGSStack["Param"] = UGSStack['Param'].apply(lambda x: 'Ammonia' if x=='Ammonia as N' else x, 1)
UGSStack["ResultValue"] = UGSStack[['Param','ResultValue']].apply(lambda x: x[1]*3.066 if x[0]=='Phosphate, Tot. Dig. (as P)' else x[1],1)
UGSStack["Param"] = UGSStack['Param'].apply(lambda x: 'Phosphate' if x=='Phosphate, Tot. Dig. (as P)' else x, 1)
These are raw data results sent to the UGS via Tab-delimited tables from the Utah State Health Laboratory. They make up the bulk of results of data collected for this study. They are supplimented with field data translated to spreadsheets.
In [ ]:
SLSampMatch = pd.read_excel(fielddata,"StateLabMatch")
SLStat = pd.read_excel(fielddata,"Stations")
#SLStat = pd.merge(SLSampMatch, SLStations, on='StationId', how='outer')
#SLStat.reset_index(inplace=True)
In [ ]:
SLStat
In [ ]:
SL0 = pd.read_table(statelabresults0, sep="\t", lineterminator="\n", error_bad_lines=False)
SL0 = SL0[SL0['Collector']=='PI']
SL1 = pd.read_table(statelabresults1, sep="\t", lineterminator="\n", error_bad_lines=False)
SL1 = SL1[SL1['Collector']=='PI']
SL2 = pd.read_table(statelabresults2, sep="\t", lineterminator="\n", error_bad_lines=False)
SL2 = SL2[SL2['Collector']=='PI']
In [ ]:
SL = pd.concat([SL0,SL1,SL2])
In [ ]:
SL["OrgId"] = "UGS"
SL["OrgName"] = "Utah Geological Survey"
SL['DetectCond'] = SL['Problem#Identifier'].apply(lambda x: 'Not Detected' if str(x).rstrip()=='<' else np.nan,1)
SL['SampleDate'] = SL[['Sample#Date','Sample#Time']].apply(lambda x: datetimefix(x,"%m/%d/%y %H:%M"),1)
SLHead = {'Sample#Number':'SampleId', 'Param#Description':'Param', 'Result#Value':'ResultValue','Units':'Unit',
'Lower#Report#Limit':'MDL','Method#ID':'SampMeth','Analysis#Date':'AnalysisDate'}
SL.rename(columns=SLHead,inplace=True)
In [ ]:
SL['Sample#Description'].unique()
In [ ]:
SL.drop([u'Lab#Code', u'Station#ID', u'Source#Code', u'Sample#Date',
u'Sample#Time', u'Sample#Type', u'Cost#Code', u'Billing#Code',
u'Agency#Bill#Code', u'Trip#ID', u'Sample#Description', u'Collector',
u'Sample#Recieved#Date', u'Chain#of#Custody#Ind.', u'Replicate#Number',
u'Sample#Comment', u'Method#Number', u'Method#Agency',
u'Method#Description', u'Param#Number', u'CAS#Number',
u'Matrix#Number', u'Matrix#Description', u'Preparation#Date',
u'Problem#Identifier', u'Result#Code',
u'Upper#Quant#Limit', u'Method#Detect#Limit',
u'Confidence#Limit', u'%#Confidence#Limit',u'Dilution#Factor',
u'Batch#Number',u'Comment#Number', u'Comment#Text'], inplace=True, axis=1)
In [ ]:
SL.columns
In [ ]:
SLRes = pd.merge(SL, SLSampMatch, on='SampleId', how='left')
In [ ]:
SLStat.drop_duplicates(inplace=True)
In [ ]:
def SLparnorm(x):
p = str(x[0]).rstrip().lstrip().lower()
u = str(x[2]).rstrip().lstrip().lower()
if p == 'nitrate nitrogen, total (mg/l as n)':
return 'Nitrate', x[1]*4.427, 'mg/l'
elif p == 'nitrite nitrogen, total (mg/l as n)':
return 'Nitrite', x[1]*3.285, 'mg/l'
elif p == 'ammonia as n':
return 'Ammonium', x[1]*1.288, 'mg/l'
elif p == 'sulfate (as s) whole water, mg/L':
return 'Sulfate', x[1]*2.996, 'mg/l'
elif p in ('phosphate, tot. dig. (as p)', 'phosphate-phosphorus as p','orthophosphate as p'):
return 'Phosphate', x[1]*3.066, 'mg/l'
elif u == 'ug/l':
return x[0], x[1]/1000, 'mg/l'
else:
return x[0], x[1], str(x[2]).rstrip()
def MDLfix(x):
u = str(x[1]).rstrip().lstrip().lower()
if np.isfinite(x[2]):
return x[0]
elif u=='ug/l':
return x[0]/1000
else:
return x[0]
SLRes['MDL'] = SLRes[['MDL','Unit','ResultValue']].apply(lambda x: MDLfix(x),1)
SLRes['Param'], SLRes['ResultValue'], SLRes['Unit'] = zip(*SLRes[['Param','ResultValue','Unit']].apply(lambda x: parnorm(x),1))
In [ ]:
SLRes.StationId.unique()
In [ ]:
Res = pd.concat([STORLegRes,AGRes,SDWISRes,WQP,UGSStack,SLRes,UGSfield,UGSNO3])
In [ ]:
Res = Res[~Res["Unit"].isin(['ueq/L','Ueq/L','ueq/l','tons/ac ft','tons/day','meq/L'])]
Res = Res[~Res["Param"].isin(["Heptachlorobiphenyl", "Hydrocarbons", "Hydroxide", "Ionic strength",
"Floating debris, severity", "Carbon Tetrachloride", "Trichlorobiphenyl",
"Vinyl Chloride", "True color", "Color", "Trash, Debris, Floatables",
"Total volatile solids", "Temperature, air", "Residue, Total Filtrable",
"Pentachlorobiphenyl", "Odor threshold number", "Odor, atmospheric",
"Instream features, est. stream width", "Hydroxide",
"Light, transmissivity","Algae, floating mats (severity)"])]
len(Res)
In [ ]:
Res[["Param","Unit","USGSPCode"]].drop_duplicates(subset=["Param","Unit"]).sort_values(by=["Param"]).to_clipboard()
In [ ]:
Stat = pd.concat([STORLegSta, AGStat, SDWISSta, WQPStat, SLStat, UGSSta])
In [ ]:
parmatch = pd.read_excel(rootname + "Aquachem.xlsx")
In [ ]:
parmatchdict = dict(zip(parmatch.Param.values, parmatch.ParrAbb.values))
Res["ParAbb"] = Res[["ParAbb","Param"]].apply(lambda x: parmatchdict.get(x[1],x[0]),1)
In [ ]:
results = Res.dropna(subset=["StationId","Param","SampleId"], how="any")
In [ ]:
Stat.loc[:,"StationName"] = Stat["StationName"].apply(lambda x: str(x).strip().lstrip().rstrip(),1)
Stat.loc[:,"StationId"] = Stat["StationId"].apply(lambda x: str(x).strip().lstrip().rstrip(),1)
Res.loc[:,"StationId"] = Res["StationId"].apply(lambda x: str(x).strip().lstrip().rstrip(),1)
In [ ]:
results.loc[:,"Unit"] = results[["ParAbb","Unit"]].apply(lambda x: "C" if x[0]=="Temp" else x[1],1)
results.loc[:,"Unit"] = results[["ParAbb","Unit"]].apply(lambda x: "umhos/cm" if x[0]=="Cond" else x[1],1)
results.loc[:,"Unit"] = results[["ParAbb","Unit"]].apply(lambda x: "" if x[0]=="pH" else x[1],1)
In [ ]:
results.drop(["AnalysisDate","AnalytMeth","SampType","AnalytMethId", "BelowLim", "StationName",
"MethodDescript", "LabComments", "LabName", "LimitType", "ProjectId", "QualCode",
"OrgName","R", "ResultComment","ResultStatus","SampComment", "SampEquip",
"SampDepthRef", "SampDepthU","SampDepth","SampType", "USGSPCode",
"SampMeth", "SampMethName","SampYear","TestNo"],inplace=True,axis=1)
In [ ]:
NDs = {'Not Detected':'<', 'Present Above Quantification Limit':'<', 'ND ':'<', '*Present >QL ':'>',
'Present Below Quantification Limit':'<', '*Non-detect ':'<', 'Detected Not Quantified':'<',
'Systematic Contamination':'<'}
results.DetectCond = results.DetectCond.apply(lambda x: NDs.get(x,np.nan),1)
In [ ]:
def is_nan(x):
'''
this function identifies nan values
Source: http://stackoverflow.com/questions/944700/how-to-check-for-nan-in-python
'''
try:
return math.isnan(x)
except:
return False
def detected(x):
'''
Finds nondetects and fixes units and values
'''
if x[1]=='<' and np.isfinite(x[0]):
return x[1]+str(x[0])
elif x[1]=='<' and np.isfinite(x[2]):
if str(x[3]).rstrip().lower() == 'ug/l':
return x[1]+str(x[2]/1000)
else:
return x[1]+str(x[2])
else:
return x[0]
results.ResultValue = results[['ResultValue','DetectCond','MDL','MDLUnit']].apply(lambda x: detected(x),1)
In [ ]:
def MDLfill(x):
if x[0] <= 0 and x[1]>0:
return 0
elif x[2] == '<':
return 0
elif x[0] < x[1]:
return 0
else:
return 1
results.loc[:,'ResValue'] = pd.to_numeric(results['ResultValue'], errors='coerce')
results.loc[:,'Censored'] = results[['ResValue','MDL','DetectCond']].apply(lambda x: MDLfill(x),1)
In [ ]:
matchDict = {'414143111495501':'USGS-414143111495501','414115111490301':'USGS-414115111490301',
'SDWIS3117.0WS004':'USGS-414115111490301',
'EPALeg-0301203':'414029111483501','SDWIS3116.0WS003':'414029111483501',
'EPALeg-0301201':'USGS-414024111481101','SDWIS5435.0WS001':'USGS-414024111481101',
'414024111481101':'USGS-414024111481101','EPALeg-0300101':'SDWIS5411.0WS001',
'EPALeg-0300102':'SDWIS5412.0WS002', 'EPALeg-0300103':'SDWIS5413.0WS003',
'UGS-107.5':'SDWIS3143.0WS001','UDAF-01492':'UGS-0412', 'UDAF-03165':'UGS-106.5',
'SDWIS3126.0WS002':'USGS-414216111485201', 'EPALeg-0301702':'USGS-414216111485201',
'EPALeg-0301901':'USGS-414328111493001', 'SDWIS3131.0WS001':'USGS-414328111493001',
'EPALeg-0301005':'SDWIS3112.0WS005', 'EPALeg-0301002':'USGS-414417111484301',
'SDWIS3109.0WS002':'USGS-414417111484301', 'SDWIS3113.0WS006':'USGS-414459111493601',
'SDWIS3127.0WS003':'414213111493101', 'SDWIS3159.0WS003':'SDWIS3157.0WS001','UDAF-01500':'UGS-63.5',
'SDWIS3111.0WS004':'USGS-414441111490701', 'EPALeg-0301904':'SDWIS3133.0WS004',
'EPALeg-0301004':'USGS-414441111490701','EPALeg-0301502':'SDWIS3118.0WS002',
'UDAF-01589':'UDAF-01568','UDAF-01586':'UDAF-01566','UGS-0050':'UDAF-01566',
'EPALeg-0300104':'SDWIS3088.0WS004', 'UDAF-01585':'UGS-0032', 'UDAF-01565':'UGS-0032',
'EPALeg-0300201':'SDWIS3091.0WS001', 'EPALeg-0300204':'SDWIS3094.0WS004',
'EPALeg-0301803':'SDWIS3129.0WS003','EPALeg-0300405':'SDWIS5418.0WS005',
'EPALeg-0300404':'SDWIS5417.0WS004', 'EPALeg-0300403':'SDWIS5416.0WS003',
'SDWIS5439.0WS003':'SDWIS5416.0WS003', 'SDWIS5460.0WS003':'SDWIS5416.0WS003',
'SDWIS5414.0WS001':'SDWIS5458.0WS001', 'SDWIS5437.0WS001':'SDWIS5458.0WS001',
'EPALeg-0308601':'USGS-415828111460001', 'SDWIS5487.0WS001':'USGS-415828111460001',
'SDWIS5430.0WS002':'USGS-415828111460001', 'SDWIS5423.0WS003':'USGS-415828111460001',
'SDWIS5421.0WS001':'USGS-415836111464701', 'EPALeg-0304901':'SDWIS5479.0WS001',
'EPALeg-0303201':'SDWIS5470.0WS001', 'SDWIS5432.0WS001':'USGS-414535111423001',
'EPALeg-0303001':'SDWIS5469.0WS001','EPALeg-0307701':'SDWIS5485.0WS001',
'EPALeg-0308301':'SDWIS5486.0WS001', 'EPALeg-0301501':'SDWIS5445.0WS001',
'EPALeg-0300701':'USGS-415120111440001', 'SDWIS5424.0WS001':'USGS-415120111440001',
'EPALeg-0302001':'SDWIS5455.0WS001', 'EPALeg-0301101':'SDWIS5433.0WS001',
'EPALeg-0301102':'SDWIS5434.0WS002'}
Stat.loc[:,'StationId'] = Stat['StationId'].apply(lambda x: matchDict.get(x,x),1)
results.loc[:,'StationId'] = results['StationId'].apply(lambda x: matchDict.get(x,x),1)
results.loc[:,'SampleDate'] = pd.to_datetime(results.SampleDate)
In [ ]:
def depthFill(x):
if x > 0:
return x
def depthUnitFill(x):
if x > 0:
return 'ft'
Stat.Depth = Stat['Depth_ft'].apply(lambda x: depthFill(x),1)
Stat.DepthUnit = Stat['Depth_ft'].apply(lambda x: depthUnitFill(x),1)
In [ ]:
WINdict = {'SDWIS3180.0WS001':435116, 'UGS-47.5':32700, 'UDAF-01566':30211, 'UGS-46.5':12420,
'USGS-414525111503705':427268, 'UDAF-01569':28327, 'SDWIS3112.0WS005':2694,
'UDAF-03162':434818, 'USGS-414328111493001':2823, 'USGS-414332111491001':2836,
'SDWIS3133.0WS004':2848, 'UGS-91.5':28647, 'UGS-95.5':35814, 'SDWIS3128.0WS004':18590,
'UGS-0102':426853, 'USGS-414115111490301':2722, 'UT4140521114843201':32975, '414029111483501':2721,
'SDWIS3088.0WS004':2741,'UGS-63.5':7126, 'UGS-0084':9639, 'USGS-414134111544701':434098, 'UGS-0070':35061,
'UGS-0029':32851,'UGS-0030':26663,'UGS-0034':29110, 'UGS-0055':3728, 'UGS-61.5':9280,
'SDWIS3129.0WS003':2816, 'UGS-0043':29329, 'UGS-0889':24493, 'UGS-44.5':28333}
Stat.WIN = Stat['StationId'].apply(lambda x: WINdict.get(x,x),1)
In [ ]:
results.SampleID = results.SampleId.apply(lambda x: str(x).replace(' ',''),1)
results.StationId = results.StationId.apply(lambda x: str(x).replace(' ',''),1)
Stat.StationId = Stat.StationId.apply(lambda x: str(x).replace(' ',''),1)
In [ ]:
results.drop_duplicates(subset = ['SampleId','ParAbb'],inplace=True)
Stat.drop_duplicates(subset = ['StationId'],inplace=True)
In [ ]:
resultsNoND = results[(~results['DetectCond'].isin(['<','>']))]
In [ ]:
datap = resultsNoND.pivot(index='SampleId', columns='ParAbb', values='ResValue')
In [ ]:
datap.dropna(subset=['SO4','Cond','Temp','TDS','pH_field'],how='all',inplace=True)
datap.drop(datap.columns[[0]], axis=1, inplace=True)
In [ ]:
results.columns
In [ ]:
resdrop = [ 'DetectCond', u'Comment#Number.1', u'Comment#Text.1', 'ResultValue', 'ResValue',
'MDL', 'MDLUnit', 'OrgId', 'Param', 'ResultValue', 'SampFrac',
'SampMedia', 'Unit', 'ParAbb']
resPivot = results.drop(resdrop, axis=1)
In [ ]:
datapiv = pd.merge(datap, resPivot, left_index=True, right_on='SampleId',how='left')
datapiv.drop_duplicates(subset=['SampleId'],inplace=True)
In [ ]:
def projy(x):
inProj = Proj(init='epsg:4326') #WGS84
outProj = Proj(init='epsg:2152') #NAD83(CSRS98) / UTM zone 12N
x2,y2 = transform(inProj,outProj,x[0],x[1])
return y2
def projx(x):
inProj = Proj(init='epsg:4326') #WGS84
outProj = Proj(init='epsg:2152') #NAD83(CSRS98) / UTM zone 12N
x2,y2 = transform(inProj,outProj,x[0],x[1])
return x2
def getelev(x):
elev = "http://ned.usgs.gov/epqs/pqs.php?x="+str(x[0])+"&y="+str(x[1])+"&units=Meters&output=xml"
response = urllib2.urlopen(elev)
html = response.read()
d = xmltodict.parse(html)
return float(d['USGS_Elevation_Point_Query_Service']['Elevation_Query']['Elevation'])
Stat.loc[:,'UTM_X'] = Stat[['Lon_X','Lat_Y']].apply(lambda x: projx(x),1)
Stat.loc[:,'UTM_Y'] = Stat[['Lon_X','Lat_Y']].apply(lambda x: projy(x),1)
Stat.loc[:,'Elev'] = Stat[['Lon_X','Lat_Y']].apply(lambda x: getelev(x),1)
In [ ]:
pivStats = Stat.drop(['Aquifer', 'ConstDate', 'Depth', 'DepthUnit','AquiferType', 'HorCollMeth', 'Geology',
'HoleDUnit', 'HoleDepth', 'HUC8', 'HorAccUnit', 'HoleDUnit', 'SCREENDEPT',
'ElevUnit', 'ElevRef', 'ElevAcc', 'ElevMeth','CountyCode', 'ElevAccUnit',
'HorAcc', 'StateCode', 'HorRef',
'OrgId', 'StationComment'], axis=1)
In [ ]:
pivStats.reset_index(inplace=True)
pivStats.set_index("StationId",inplace=True)
In [ ]:
pivdata = pd.merge(datapiv, pivStats, left_on="StationId", right_index=True, how='left')
In [ ]:
pivdata.drop_duplicates(subset=['SampleId'],inplace=True)
In [ ]:
alkmatch = pivdata[(pivdata['Meas_Alk']>0)&(pivdata['HCO3']>0)]
x = [np.float64(i) for i in alkmatch['Meas_Alk'].values]
y = [np.float64(i) for i in alkmatch['HCO3'].values]
X = sm.add_constant(x)
res = sm.RLM(y,X).fit()
b = res.params[0]
m = res.params[1]
print m
print b
plt.figure()
plt.scatter(x,y)
plt.plot(x, res.fittedvalues, color='red')
In [ ]:
def HCO3fix(x):
if x[0]>0:
return x[0]
elif x[1]>0:
return x[1]*m+b
else:
pass
pivdata['HCO3'] = pivdata[['HCO3','Meas_Alk']].apply(lambda x: HCO3fix(x),1)
In [ ]:
parlist = ['Ca','Mg','Na','K','Cl','HCO3','CO3','SO4','NO3','NO2','CO2','TDS','Si','Zn_tot','As_tot']
def removeInf(x):
if x <= 0:
return np.nan
else:
return np.log(x)
for i in parlist:
if i in pivdata.columns:
pivdata[i+'Ln'] = pivdata[i].apply(lambda x: removeInf(x),1)
d = {'Ca':0.04990269, 'Mg':0.082287595, 'Na':0.043497608, 'K':0.02557656, 'Cl':0.028206596, 'HCO3':0.016388838, 'CO3':0.033328223, 'SO4':0.020833333, 'NO2':0.021736513, 'NO3':0.016129032}
chemlist = ['Ca','Mg','Na','K','Cl','HCO3','CO3','SO4','NO3','NO2']
for i in chemlist:
if i in pivdata.columns:
pivdata[i+'Meq'] = pivdata.loc[:,i] * d[i]
pivdata.drop_duplicates(subset = ['StationId','SampleDate'], inplace=True)
In [ ]:
def sumIons(x):
b = 0
for i in x:
if i>0:
b = i + b
else:
b = b
return b
pivdata['Anions'] = pivdata[['ClMeq','HCO3Meq','SO4Meq','CO3Meq']].apply(lambda x: sumIons(x),1)
pivdata['Cations'] = pivdata[['KMeq','MgMeq','NaMeq','CaMeq']].apply(lambda x: sumIons(x),1)
pivdata['EC'] = pivdata['Anions'] - pivdata['Cations']
pivdata['CBE'] = ((pivdata['Cations']-np.abs(pivdata['Anions']))/(pivdata['Cations']+np.abs(pivdata['Anions'])))*100
In [ ]:
#piperdata = pivdata.dropna(subset = ['Ca','Na','Cl','Mg','SO4','HCO3'], how='any')
#piperdata.drop_duplicates(subset=['SampleId'], inplace=True)
In [ ]:
print(len(pivdata))
In [ ]:
pivgrps = pivdata.groupby(['StationId']).median()
pivGoodData = pivdata[abs(pivdata.CBE)<=5]
pipergrps = pivGoodData.groupby(['StationId']).median()
pipergrps['sampCount'] = pivGoodData.groupby(['StationId'])['CBE'].agg({'cnt':(lambda x: np.count_nonzero(~np.isnan(x)))}).reset_index#squeeze=True
In [ ]:
pivgrp = pd.merge(pivgrps, pivStats, left_index=True, right_index=True, how='left')
pipergrp = pd.merge(pipergrps, pivStats, left_index=True, right_index=True, how='left')
pipergrp.drop_duplicates(inplace=True)
pivgrp = pivgrp.reset_index().drop_duplicates(subset=['StationId']).set_index('StationId')
In [ ]:
princpiv = pivGoodData[(pivGoodData.SampleDate < datetime.datetime(2014,3,10))&(pivGoodData.UTM_X < 435000) & (pivGoodData.UTM_X > 422000) \
& (pivGoodData.UTM_Y > 4608000) & (pivGoodData.UTM_Y < 4634000) & (pivGoodData.StationType=='Well')]
princpiv.drop_duplicates(subset = ['SampleId'],inplace=True)
In [ ]:
ResOldPrinc = resultsNoND[(resultsNoND.SampleId.isin(princpiv.SampleId))]
In [ ]:
GWStat = Stat[Stat.StationType.isin(['Well','Spring'])]
GWRes = results[results.StationId.isin(list(GWStat.StationId))]
In [ ]:
Nitrate = GWRes[GWRes['ParAbb'].isin(['N','NO2','NO3','NH4'])]
NitrateStat = GWStat[GWStat.StationId.isin(list(Nitrate.StationId))]
In [ ]:
ParrAbbSummary = ResOldPrinc.groupby('ParAbb')['ResValue'].agg({'min':np.min, 'mean':np.mean,
'qrt5':(lambda x: np.percentile(x,q=5)),
'qrt95':(lambda x: np.percentile(x,q=95)),
'range':(lambda x: np.max(x)-np.min(x)),
'lqrt':(lambda x: np.percentile(x,q=25)),
'median':np.median,
'uqrt':(lambda x: np.percentile(x,q=75)),
'max':np.max, 'std':np.std,
'cnt':(lambda x: np.count_nonzero(~np.isnan(x)))}).reset_index()
In [ ]:
ParrAbbSummary
In [ ]:
manyPars = list(ParrAbbSummary[ParrAbbSummary['cnt'] >= 30]['ParAbb'])
In [ ]:
ResOldPrinc = ResOldPrinc[ResOldPrinc['ParAbb'].isin(manyPars)]
summaryStats = ParrAbbSummary[ParrAbbSummary['ParAbb'].isin(manyPars)]
In [ ]:
summaryStats
In [ ]:
from pylab import rcParams
rcParams['figure.figsize'] = 15, 10
parLabCounts = ParrAbbSummary.reset_index()
parLabCounts = parLabCounts.set_index(['ParAbb'])
plt.figure()
boxres= ResOldPrinc[ResOldPrinc['ParAbb'].isin(['pH_lab','pH_field'])]
boxres.boxplot(column='ResValue', by='ParAbb',vert=False)
plt.title('Boxplot of Principal Aquifer pH')
plt.yticks([1,2],['Field pH (n = %s)'%(parLabCounts.loc['pH_field','cnt']),'Lab pH (n = %s)'%(parLabCounts.loc['pH_lab','cnt'])])
plt.xlim(6,9)
plt.xticks(np.arange(6,9.25,0.25))
plt.xlabel('pH')
plt.savefig(rootname+"pHBoxplot.svg")
plt.savefig(rootname+"pHBoxplot.pdf")
In [ ]:
plt.figure()
boxres= ResOldPrinc[ResOldPrinc['ParAbb'].isin(['Temp'])]
boxres.boxplot(column='ResValue', by='ParAbb',vert=False)
plt.title('Boxplot of Principal Aquifer Temperature')
plt.yticks([1],['Temperature (deg. C) (n = %s)'%(parLabCounts.loc['Temp','cnt'])])
plt.xticks(np.arange(5,30,1))
plt.xlabel('Temp. (deg. C)')
plt.savefig(rootname+"pHBoxplot.pdf")
In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
rcParams['figure.figsize'] = 15, 20
parLabCounts = ParrAbbSummary.reset_index()
parLabCounts = parLabCounts.set_index(['ParAbb'])
parlist = ['Mg','Ca','Na','Cl','SO4','HCO3','Si','K','NO3','TDS','N']
boxres = ResOldPrinc[ResOldPrinc['ParAbb'].isin(parlist)]
plt.figure()
sns.violinplot(x="ResValue", y='ParAbb', data=boxres, palette="Set3", scale='width', cut=0)
plt.xlabel('mg/L')
plt.xlim(0,1200)
plt.ylabel('Chemical Constituent')
plt.savefig(rootname+'violinMajor.pdf')
In [ ]:
parLabCounts = ParrAbbSummary.reset_index()
parLabCounts = parLabCounts.set_index(['ParAbb'])
def parboxplot(parlist):
plt.figure()
boxres= ResOldPrinc[ResOldPrinc['ParAbb'].isin(parlist)]
boxres.boxplot(column='ResValue', by='ParAbb',vert=False)
#labs = [str(parlist[i]) + " (n= %s)"%(parLabCounts.loc[parlist[i],'cnt']) for i in range(len(parlist))]
#tickloc = [b+1 for b in range(len(parlist))]
#plt.yticks(tickloc,labs)
parlist = ['pH_lab','pH_field']
parboxplot(parlist)
plt.xlabel('pH')
plt.savefig(rootname+'pHBoxplot')
parlist = ['Mg','Ca','Na','Cl','SO4','HCO3','Si','K','NO3','TDS','N']
parboxplot(parlist)
plt.title('Major Ions')
plt.xlabel('mg/L')
plt.grid(which='both',axis='both')
plt.xscale('log')
plt.xlim(0.1,1000)
plt.savefig(rootname+'MajorIonsBoxplot.pdf')
#plt.xlim(0.00001,1000)
#plt.xscale('log')
In [ ]:
pipergrps.to_csv(rootname+'avgpiper.csv',index_label='StationId')
pivdata.to_csv(rootname+'pivotdata.csv',index_label='OBJECTID')
princpiv.to_csv(rootname+'PrincAquiferData.csv',index_label='OBJECTID')
pivgrp.to_csv(rootname+'pivgrps.csv',index_label='StationId')
In [ ]:
Nitrate.to_csv(rootname+'NitrateResults.csv')
NitrateStat.to_csv(rootname+'NitrateStations.csv')
In [ ]:
summaryStats.to_csv(rootname+'PrincAquifStats.csv')
In [ ]:
summaryStats.to_clipboard()
In [ ]:
GWStat.to_csv(rootname+'GWStations.csv',index_label='ObjectID')
GWRes.to_csv(rootname+'GWResults.csv',index_label='ObjectID')
In [ ]:
writer = pd.ExcelWriter(rootname + "combined_out.xlsx", engine="xlsxwriter")
Stat.to_excel(writer, "stations", index=False)
results.to_excel(writer, "results", index=False)
GWStat.to_excel(writer, 'GWStations',index=False)
GWRes.to_excel(writer, 'GWResults',index=False)
pipergrps.to_excel(writer,'avgpiper')
pivdata.to_excel(writer,'pivotdata')
summaryStats.to_excel(writer,'princaquifstats')
writer.save()
In [ ]: