In [1]:
%matplotlib inline
import requests
import json
import os
import pandas as pd
import datetime as dt
import seaborn as sns
import matplotlib.pyplot as plt
import credentials
Note on start and end date: This code is meant for beginning to end of a month. It should work for arbitrary periods, but if the end of February is enclosed in your interval, leap year offsets may have to be handled by the user and leap year related bugs fixed.
In [2]:
baseurl = "https://www.ncdc.noaa.gov/cdo-web/api/v2/"
endpoint = "data"
custom_headers = {
"token": credentials.token
}
datatypes = ["TAVG", "TMIN", "TMAX", "MDTX", 'MDTN', "PRCP", "SNOW"]
startdate = "2017-07-01"
enddate = "2017-07-31"
params = {
"datatypeid": datatypes,
"startdate": startdate,
"enddate": enddate,
"limit": 1000,
"units": "standard",
}
In [3]:
STATIONFN = "AK_NCDC_FirstOrderStations.json"
with open(STATIONFN, "r") as source:
stationdata = json.load(source)
We start out querying the Global Summary of the Month dataset.
In [4]:
dataset = "GSOM"
In [5]:
fullbase = requests.compat.urljoin(baseurl, endpoint)
In [6]:
monthlysummary = {}
for station in stationdata:
print(station['shortname'])
params["stationid"] = station["id"]
params["datasetid"] = dataset
r = requests.get(
fullbase,
headers=custom_headers,
params=params,
)
results = json.loads(r.text)
newdic = {}
for result in results['results']:
try:
newdic[result["date"][:10]][result["datatype"]] = result["value"]
except KeyError:
newdic[result["date"][:10]] = {}
newdic[result["date"][:10]][result["datatype"]] = result["value"]
dataforstation = pd.DataFrame.from_dict(newdic, orient="index")
normalsforstation = pd.DataFrame.from_csv("monthlynormals_{}.dat".format(station["shortname"]), sep='\t')
normalsforstation.index = normalsforstation.index.month
dataforstation.index = pd.to_datetime(dataforstation.index).month
monthnum = dataforstation.index[0]
stationmonthly = {}
stationmonthly["T_avg_F"] = dataforstation["TAVG"][monthnum]
stationmonthly["T_normal_F"] = normalsforstation["MLY-TAVG-NORMAL"][monthnum]
stationmonthly["Precip_in"] = dataforstation["PRCP"][monthnum]
stationmonthly["Precip_normal_in"] = normalsforstation["MLY-PRCP-NORMAL"][monthnum]
try:
stationmonthly["Snow"] = dataforstation["SNOW"][monthnum]
stationmonthly["Snow_normal"] = normalsforstation["MLY-SNOW-NORMAL"][monthnum]
except KeyError:
pass
monthlysummary[station["shortname"]] = stationmonthly
In [19]:
r.request.url
Out[19]:
In [9]:
summary = pd.DataFrame.from_dict(
monthlysummary,
orient="index")
summary['Delta_precip'] = summary['Precip_in'] - summary['Precip_normal_in']
summary['Delta_precip_perc'] = summary['Delta_precip']/summary['Precip_normal_in'] * 100
summary['Precip_perc'] = summary['Delta_precip_perc'] + 100
summary['Delta_T'] = summary['T_avg_F'] - summary['T_normal_F']
summary['Delta_Snow'] = summary['Snow'] - summary['Snow_normal']
In [10]:
colorder = [
"T_avg_F", "T_normal_F", 'Delta_T',
"Precip_in", 'Precip_normal_in', 'Delta_precip', 'Delta_precip_perc', 'Precip_perc',
'Snow', 'Snow_normal', 'Delta_Snow'
]
summary[colorder]
Out[10]:
DO not run the next cell if you don't want to overwrite the output file
In [157]:
summary[colorder].to_csv("201707_Statewide_monthly.csv", float_format='%.2f')
Second, daily summaries.
In [11]:
dataset = "GHCND"
fullbase = requests.compat.urljoin(baseurl, endpoint)
Let's manipulate start and end data a little so that we're able to pick out the normals corresponding to daily observations. Normals are formally provided with the year set to 2010.
In [13]:
st = dt.datetime.strptime(startdate, "%Y-%m-%d")
ed = dt.datetime.strptime(enddate, "%Y-%m-%d")
normalsst = dt.datetime.strftime(dt.datetime(2010, st.month, st.day), "%Y-%m-%d")
normalsed = dt.datetime.strftime(dt.datetime(2010, ed.month, ed.day), "%Y-%m-%d")
The data plotted in the Statewide Summaries are mean daily precipitation and temperature, averaged over the available stations. For per-station, per-day plotting, the max and min daily temperature are of interest as well.
In [28]:
precip_daily_mean_perc_dep = {}
temp_daily_mean_dep = {}
temp_daily_max_dep = {}
temp_daily_min_dep = {}
for station in stationdata:
print(station['shortname'])
params["stationid"] = station["id"]
params["datasetid"] = dataset
r = requests.get(
fullbase,
headers=custom_headers,
params=params,
)
results = json.loads(r.text)
newdic = {}
for result in results['results']:
try:
newdic[result["date"][:10]][result["datatype"]] = result["value"]
except KeyError:
newdic[result["date"][:10]] = {}
newdic[result["date"][:10]][result["datatype"]] = result["value"]
dataforstation = pd.DataFrame.from_dict(newdic, orient="index")
normalsforstation = pd.DataFrame.from_csv("dailynormals_{}.dat".format(station["shortname"]), sep='\t')
normalsforstation.loc[normalsforstation.index.month == 7]
normalsforstation = normalsforstation.loc[normalsst:normalsed]
normalsforstation.index = dataforstation.index
dataforstation['TAVG_dep'] = dataforstation['TAVG'] - normalsforstation['DLY-TAVG-NORMAL']
dataforstation['TMIN_dep'] = dataforstation['TMIN'] - normalsforstation['DLY-TMIN-NORMAL']
dataforstation['TMAX_dep'] = dataforstation['TMAX'] - normalsforstation['DLY-TMAX-NORMAL']
dataforstation['PRCP_dep'] = (dataforstation['PRCP'] - normalsforstation['DLY-PRCP-50PCTL'])/ normalsforstation['DLY-PRCP-50PCTL'] * 100
temp_daily_mean_dep[station['shortname']] = dataforstation['TAVG_dep'].to_dict()
precip_daily_mean_perc_dep[station['shortname']] = dataforstation['PRCP_dep'].to_dict()
In [30]:
dataforstation
Out[30]:
In [82]:
sns.set_style("white"
)
sns.set_context("poster")
In [98]:
avg_temp_dep = pd.DataFrame.from_dict(
temp_daily_mean_dep).mean(axis=1)
xlabels = pd.Series(list(range(1, len(avg_temp_dep)+1)), index=avg_temp_dep.index)
avg_temp_dep = pd.concat([avg_temp_dep, xlabels], axis=1 )
avg_temp_dep.columns = ['temp_dep', 'xlabel']
avg_temp_dep['positive'] = avg_temp_dep['temp_dep'] > 0
In [101]:
fig, ax1 = plt.subplots(figsize=(15, 12))
avg_temp_dep.plot(
x='xlabel',
y='temp_dep',
kind='bar',
ax=ax1,
legend=False,
)
ticklabels = ax1.set_xticklabels(avg_temp_dep['xlabel'], rotation=0)
ax1.text(0, 7.5, "Alaska Climate Research Center", fontsize=11, color="grey")
ax1.text(25, 7.5, "Geophysical Institute UAF", fontsize=11, color="grey")
ax1.set_ylabel("Mean Statewide Temperature Deviation (° F)")
ax1.set_xlabel("")
ax1.set_title("July 2017", y=0.95)
ax1.grid(True, axis='y')
In [78]:
avg_precip_dep = pd.DataFrame.from_dict(
precip_daily_mean_perc_dep).mean(axis=1)
xlabels = pd.Series(list(range(1, len(avg_precip_dep)+1)), index=avg_precip_dep.index)
avg_precip_dep = pd.concat([avg_precip_dep, xlabels], axis=1 )
avg_precip_dep.columns = ['precip_dep', 'xlabel']
avg_precip_dep['positive'] = avg_precip_dep['precip_dep'] > 0
In [97]:
fig, ax1 = plt.subplots(figsize=(15, 12))
avg_precip_dep.plot(
x='xlabel',
y='precip_dep',
kind='bar',
ax=ax1,
legend=False,
)
ticklabels = ax1.set_xticklabels(avg_precip_dep['xlabel'], rotation=0)
ax1.text(0, 140, "Alaska Climate Research Center", fontsize=11, color="grey")
ax1.text(25, 140, "Geophysical Institute UAF", fontsize=11, color="grey")
ax1.set_ylabel("Mean Statewide Precipitation Deviation (%)")
ax1.set_xlabel("")
ax1.set_title("July 2017", y=0.95)
ax1.grid(True, axis='y')
In [91]:
fig, ax = plt.subplots(figsize=(15, 12))
dataforstation.plot(y='TAVG_dep', kind='bar', ax=ax)
Out[91]:
In [66]:
normalsforstation.index = dataforstation.index
In [68]:
(dataforstation['TAVG'] - normalsforstation['DLY-TAVG-NORMAL'])
Out[68]:
In [ ]: