This analysis demonstrates searching for datasets that meet a set of specified conditions, accessing via advertised services, processing and plotting the data from the service.
Service Documentation: http://waterservices.usgs.gov/rest/IV-Service.html
Enable the needed python libraries
In [2]:
import urllib
import zipfile
import StringIO
import string
import pandas
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import HTML
import json
Set some initial variables
In [3]:
county_name = ""
start_date = "20140101"
end_date = "20150101"
diag = False
In [21]:
## Retrieve the bounding box of the specified county - if no county is specified, the bounding boxes for all NM counties will be requested
countyBBOXlink = "http://gstore.unm.edu/apps/epscor/search/nm_counties.json?limit=100&query=" + county_name ## define the request URL
print countyBBOXlink ## print the request URL for verification
print
bboxFile = urllib.urlopen(countyBBOXlink) ## request the bounding box information from the server
bboxData = json.load(bboxFile)
# print bboxData
# Get data for BBOX defined by specified county(ies)
myCounties = []
for countyBBOX in bboxData["results"]:
minx,miny,maxx,maxy = countyBBOX[u'box']
myDownloadLink = "http://waterservices.usgs.gov/nwis/iv/?bBox=%f,%f,%f,%f&format=json&period=P7D¶meterCd=00060" % (minx,miny,maxx,maxy) # retrieve data for the specified BBOX for the last 7 days as JSON
print myDownloadLink
myCounty = {u'name':countyBBOX[u'text'],u'minx':minx,u'miny':miny,u'maxx':maxx,u'maxy':maxy,u'downloadLink':myDownloadLink}
myCounties.append(myCounty)
In [70]:
#countySubset = [myCounties[0]]
#print countySubset
valueList = []
for county in myCounties:
print "processing: %s" % county["downloadLink"]
try:
datafile = urllib.urlopen(county["downloadLink"])
data = json.load(datafile)
values = data["value"]["timeSeries"][0]["values"]
for item in values:
for valueItem in item["value"]:
#print json.dumps(item["value"], sort_keys=True, indent=4)
myValue = {"dateTime":valueItem["dateTime"].replace("T"," ").replace(".000-06:00",""),"value":valueItem["value"], "county":county["name"]}
#print myValue
valueList.append(myValue)
#print valueList
except:
print "\tfailed for this one ..."
#print json.dumps(values, sort_keys=True, indent=4)
In [71]:
df = pandas.DataFrame(valueList)
In [90]:
df['dateTime'] = pandas.to_datetime(df["dateTime"])
df['value'] = df['value'].astype(float).fillna(-1)
print df.shape
print df.dtypes
print "column names"
print "------------"
for colName in df.columns:
print colName
print
print df.head()
In [92]:
fig,ax = plt.subplots(figsize=(10,8))
ax.width = 1
ax.height = .5
plt.xkcd()
#plt.ylim(-25,30)
ax.plot_date(df['dateTime'], df['value'], '.', label="Discharge (cf/sec)", color="0.2")
fig.autofmt_xdate()
plt.legend(loc=2, bbox_to_anchor=(1.0,1))
plt.title("15-minute Discharge - cubic feet per second")
plt.ylabel("Discharge")
plt.xlabel("Date")
Out[92]:
In [ ]: