In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
BASE_IDEM_URL = 'http://www.in.gov/idem/cleanwater/files/gw_monitoring_network_data_'
YEARS = ['2008', '2009', '2010', '2011', '2012', '2013', '2014']
urls = []
for year in YEARS:
urls.append(BASE_IDEM_URL+'{0}{1}'.format(year, '.xls'))
In [3]:
idem_08_10 = pd.DataFrame()
for url in urls[0:3]:
data = pd.read_excel(url, sheetname = "Sheet1")
idem_08_10 = idem_08_10.append(data)
In [4]:
idem_11 = pd.read_excel(urls[3])
idem_11 = idem_11.rename(columns = {'Sample ID':'SAMPLEID'})
In [5]:
idem_12 = pd.read_excel(urls[4], sheetname = 'Detections')
idem_12 = idem_12.rename(columns = {'SiteID':'GWMN Site ID', \
'Physical_System_City': 'City', \
'COUNTY':'County', 'DEPTH': 'Well Depth (ft)', \
'TOPO':'Topographic Map', \
'TWP': 'Township', \
'TWPD': 'Township Direction', \
'RNG' : 'Range', \
'RNGD': 'Range Direction', \
'PARCEL_ID': 'Section', \
'SampleID':'SAMPLEID', \
'SECTION UTM_X':'Section UTM X', \
'SECTION UTM_Y':'Section UTM Y', \
'DateSampled': 'Date Sampled', \
'METHOD': 'Method', \
'ANALYTE': 'Analyte', \
'DetectionLimit': 'Detection Limit'})
In [24]:
idem_13_14 = pd.DataFrame()
for url in urls[4:7]:
data = pd.read_excel(url, sheetname='Detections')
idem_13_14 = idem_13_14.append(data)
In [30]:
idem_13_14.info()
In [26]:
frames = [idem_08_10, idem_11, idem_12]
data = pd.concat(frames)
In [31]:
data.info()
In [34]:
data['SAMPLEID'].unique().len()
In [ ]: