In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
BASE_IDEM_URL = 'http://www.in.gov/idem/cleanwater/files/gw_monitoring_network_data_'
YEARS = ['2008', '2009', '2010', '2011', '2012', '2013', '2014']

urls = []
for year in YEARS:
    urls.append(BASE_IDEM_URL+'{0}{1}'.format(year, '.xls'))

In [3]:
idem_08_10 = pd.DataFrame()
for url in urls[0:3]:
    data = pd.read_excel(url, sheetname = "Sheet1")
    idem_08_10 = idem_08_10.append(data)

In [4]:
idem_11 = pd.read_excel(urls[3])
idem_11 = idem_11.rename(columns = {'Sample ID':'SAMPLEID'})

In [5]:
idem_12 = pd.read_excel(urls[4], sheetname = 'Detections')
idem_12 = idem_12.rename(columns = {'SiteID':'GWMN Site ID', \
                                    'Physical_System_City': 'City', \
                                    'COUNTY':'County', 'DEPTH': 'Well Depth (ft)', \
                                    'TOPO':'Topographic Map', \
                                    'TWP': 'Township', \
                                    'TWPD': 'Township Direction', \
                                    'RNG' : 'Range', \
                                    'RNGD': 'Range Direction', \
                                    'PARCEL_ID': 'Section', \
                                    'SampleID':'SAMPLEID', \
                                    'SECTION UTM_X':'Section UTM X', \
                                    'SECTION UTM_Y':'Section UTM Y', \
                                    'DateSampled': 'Date Sampled', \
                                    'METHOD': 'Method', \
                                    'ANALYTE': 'Analyte', \
                                    'DetectionLimit': 'Detection Limit'})

In [24]:
idem_13_14 = pd.DataFrame()
for url in urls[4:7]:
    data = pd.read_excel(url, sheetname='Detections')
    idem_13_14 = idem_13_14.append(data)

In [30]:
idem_13_14.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 14351 entries, 0 to 4432
Data columns (total 23 columns):
ANALYTE                 14351 non-null object
Bottle List             4923 non-null object
COUNTY                  14351 non-null object
DEPTH                   14338 non-null float64
DateSampled             14351 non-null datetime64[ns]
DetectionLimit          13993 non-null float64
METHOD                  14174 non-null object
PARCEL_ID               14351 non-null int64
Physical_System_City    14313 non-null object
RESULT                  9428 non-null float64
RNG                     14351 non-null int64
RNGD                    14257 non-null object
Result                  4923 non-null float64
SECTION UTM_X           4923 non-null float64
SECTION UTM_Y           4923 non-null float64
SECTION_UTM_X           9428 non-null float64
SECTION_UTM_Y           9428 non-null float64
SampleID                14351 non-null object
SiteID                  14351 non-null object
TOPO                    14351 non-null object
TWP                     14351 non-null int64
TWPD                    14257 non-null object
Unit                    14351 non-null object
dtypes: datetime64[ns](1), float64(8), int64(3), object(11)
memory usage: 2.6+ MB

In [26]:
frames = [idem_08_10, idem_11, idem_12]
data = pd.concat(frames)

In [31]:
data.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 218468 entries, 0 to 4922
Data columns (total 20 columns):
Analyte               218468 non-null object
Bottle List           4923 non-null object
City                  218008 non-null object
County                218468 non-null object
Date Sampled          218468 non-null datetime64[ns]
Detection Limit       216876 non-null float64
GWMN Site ID          218468 non-null object
Method                218468 non-null object
Range                 218468 non-null int64
Range Direction       216739 non-null object
Result                217972 non-null object
SAMPLEID              218468 non-null object
Section               218468 non-null int64
Section UTM X         218468 non-null float64
Section UTM Y         218468 non-null float64
Topographic Map       218468 non-null object
Township              218468 non-null int64
Township Direction    216739 non-null object
Unit                  218468 non-null object
Well Depth (ft)       218468 non-null float64
dtypes: datetime64[ns](1), float64(4), int64(3), object(12)
memory usage: 35.0+ MB

In [34]:
data['SAMPLEID'].unique().len()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-34-0e64c7cf9bfe> in <module>()
----> 1 data['SAMPLEID'].unique().len()

AttributeError: 'numpy.ndarray' object has no attribute 'len'

In [ ]: