Network Data Access - USGS NWIS Service-based Data Access

Karl Benedict
Director, Earth Data Analysis Center
Associate Professor, University Libraries
University of New Mexico
kbene@unm.edu

An Analysis

This analysis demonstrates searching for datasets that meet a set of specified conditions, accessing via advertised services, processing and plotting the data from the service.

Service Documentation: http://waterservices.usgs.gov/rest/IV-Service.html


In [2]:
! pip install StringIO


Collecting StringIO
  Could not find a version that satisfies the requirement StringIO (from versions: )
No matching distribution found for StringIO

Enable the needed python libraries


In [1]:
import urllib
import zipfile
import StringIO
import string
import pandas
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML
import json


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-1-ec31cc87e507> in <module>
      1 import urllib
      2 import zipfile
----> 3 import StringIO
      4 import string
      5 import pandas

ModuleNotFoundError: No module named 'StringIO'

Set some initial variables


In [ ]:
county_name = ""
start_date = "20140101"
end_date = "20150101"
diag = False

Options


In [ ]:
## Retrieve the bounding box of the specified county - if no county is specified, the bounding boxes for all NM counties will be requested
countyBBOXlink = "http://gstore.unm.edu/apps/epscor/search/nm_counties.json?limit=100&query=" + county_name  ## define the request URL
print countyBBOXlink ## print the request URL for verification
print
bboxFile = urllib.urlopen(countyBBOXlink) ## request the bounding box information from the server
bboxData = json.load(bboxFile)
# print bboxData

# Get data for BBOX defined by specified county(ies)
myCounties = []
for countyBBOX in bboxData["results"]:
    minx,miny,maxx,maxy = countyBBOX[u'box']
    myDownloadLink = "http://waterservices.usgs.gov/nwis/iv/?bBox=%f,%f,%f,%f&format=json&period=P7D&parameterCd=00060" % (minx,miny,maxx,maxy) # retrieve data for the specified BBOX for the last 7 days as JSON
    print myDownloadLink
    myCounty = {u'name':countyBBOX[u'text'],u'minx':minx,u'miny':miny,u'maxx':maxx,u'maxy':maxy,u'downloadLink':myDownloadLink}
    myCounties.append(myCounty)

In [ ]:
#countySubset = [myCounties[0]]
#print countySubset

valueList = []

for county in myCounties:
    print "processing: %s" % county["downloadLink"]
    try:
        datafile = urllib.urlopen(county["downloadLink"])
        data = json.load(datafile)
        values = data["value"]["timeSeries"][0]["values"]
        for item in values:
            for valueItem in item["value"]:
                #print json.dumps(item["value"], sort_keys=True, indent=4)
                myValue = {"dateTime":valueItem["dateTime"].replace("T"," ").replace(".000-06:00",""),"value":valueItem["value"], "county":county["name"]}
                #print myValue
                valueList.append(myValue)
        #print valueList
    except:
        print "\tfailed for this one ..."
     
    #print json.dumps(values, sort_keys=True, indent=4)

In [ ]:
df = pandas.DataFrame(valueList)

In [ ]:
df['dateTime'] = pandas.to_datetime(df["dateTime"])
df['value'] = df['value'].astype(float).fillna(-1)

print df.shape
print df.dtypes
print "column names"
print "------------"
for colName in df.columns:
    print colName
print
print df.head()

In [ ]:
fig,ax = plt.subplots(figsize=(10,8))
ax.width = 1
ax.height = .5
plt.xkcd()
#plt.ylim(-25,30)
ax.plot_date(df['dateTime'], df['value'], '.', label="Discharge (cf/sec)", color="0.2")
fig.autofmt_xdate()
plt.legend(loc=2, bbox_to_anchor=(1.0,1))
plt.title("15-minute Discharge - cubic feet per second")
plt.ylabel("Discharge")
plt.xlabel("Date")

In [ ]: