The purpose of this notebook is to create a bokeh representation of the latest data (incl. QC) from socib mooring stations.
In [1]:
import numpy as np
import pandas as pd
from urllib2 import Request, urlopen, URLError
from lxml import html
import time
from netCDF4 import Dataset
import datetime
import calendar
from collections import OrderedDict
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.models import LinearAxis, Range1d, CustomJS
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_notebook, show, output_file, vplot, hplot
import bokeh
In case, the output wants to be seen within the jupyter notebook, this line must be un-commented. However, since the generated HTML file will be opened in a new window, this is not really necessary.
In [2]:
#output_notebook()
We define some basic data handling functions here. These will just enable us to e.g. access the nefCDF variable data as numpy array (significantly faster) or convert times to a joint base.
In [3]:
def get_data_array(data_array):
if type(data_array.__array__()) is np.ma.masked_array:
return data_array.__array__().data
else:
return data_array.__array__()
def get_qc_variable_name(variable):
try:
qc_variable_name = variable.ancillary_variables
except AttributeError:
# print "No QC variable found for " + variable.name
qc_variable_name = None
return qc_variable_name
def get_pandas_timestamp_series(datetime_array):
out = pd.Series(np.zeros(len(datetime_array)))
counter = 0
for i in datetime_array:
out[counter] = pd.tslib.Timestamp(i)
counter += 1
return out
def days_to_seconds(days):
return int(days) * 24 * 60 * 60
def get_str_time(x): return str(x)
def totimestamp(dt, epoch=datetime.datetime(1970,1,1)):
td = dt - epoch
# return td.total_seconds()
return (td.microseconds + (td.seconds + td.days * 86400) * 10**6) / 10**6
Note that these scripts differ from the socib mooring station report generation tool. Here, we use a simple web - scraping from the socib thredds server.
In [4]:
def get_mooring_stations(url):
name_list = []
end_URLBuilder = []
req = Request(url)
try:
response = urlopen(req)
except URLError as e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
else:
URLBuilder = []
tree = html.fromstring(response.read())
link_path = tree.xpath('//a')
for x in range(1, len(link_path)):
URLBuilder.append(link_path[x].values())
URLLister = []
for n in range(0, len(URLBuilder) - 4):
string = str(URLBuilder[n])
idx = string.find("/")
url = "http://thredds.socib.es/thredds/catalog/mooring/weather_station/" + URLBuilder[n][0][0:idx - 1] + "/L1/catalog.html"
name = URLBuilder[n][0][0:idx - 2]
req = Request(url)
try:
response = urlopen(req)
except URLError as e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
else:
URLLister.append(url)
name_list.append(name)
for m in URLLister:
req = Request(m)
try:
response = urlopen(req)
except URLError as e:
if hasattr(e, 'reason'):
print 'We failed to reach a server.'
print 'Reason: ', e.reason
elif hasattr(e, 'code'):
print 'The server couldn\'t fulfill the request.'
print 'Error code: ', e.code
else:
tree = html.fromstring(response.read())
link_path = tree.xpath('//a')
for x in range(1, len(link_path)):
string = str(link_path[x].values())
idx = string.find("=")
end_URLBuilder.append("http://thredds.socib.es/thredds/dodsC/" + str(
link_path[x].values()[0][idx - 1:len(string)]))
break
return name_list, end_URLBuilder
Here, we define the bokeh plotting parameters. Also, we create a javascript callback to automatically adjust the y-axis according to the current zoom-extend.
In [5]:
def draw_data(links, desired_start_time, station_names):
global VARIABLES_OF_INTEREST
counter = 0
output_stations = []
for station in links:
root = Dataset(station)
time = get_data_array(root.variables["time"])
idx = time >= desired_start_time
if not np.any(idx):
counter += 1
continue
variables = root.get_variables_by_attributes(standard_name=lambda n: n in VARIABLES_OF_INTEREST)
time = time[idx]
subplot = []
variable_names = []
for v in variables:
try:
qc_data = get_data_array(root.variables[get_qc_variable_name(v)])
qc_data = qc_data[idx]
bad_idx = get_data_array(qc_data) != 1
except KeyError:
print "No QC found for " + v.name
v_name = v.name
variable_names.append(v_name)
v = get_data_array(v)
v = v[idx]
conv_time = get_pandas_timestamp_series([datetime.datetime.fromtimestamp(ts) for ts in time])
subplot.append(get_bokeh_grid_figure(v, qc_data, conv_time, station_names[counter]))
sub_counter = 0
my_tabs = []
for sp in subplot:
my_tabs.append(Panel(child=sp, title=variable_names[sub_counter]))
sub_counter += 1
p = Tabs(tabs=my_tabs)
output_stations.append(p)
counter += 1
amount_stations = len(output_stations)
rest = amount_stations % 2
verticals = []
if amount_stations >= 2:
verticals.append(hplot(output_stations[0], output_stations[1]))
elif amount_stations == 1:
verticals.append(hplot(output_stations[0]))
else:
print("No stations to plot (PerformQC.draw_bokeh()).")
return 1
for i in range(1, int(amount_stations/2)):
verticals.append(hplot(output_stations[i*2], output_stations[i*2+1]))
if rest > 0:
verticals.append(output_stations[-1])
show(vplot(*verticals))
def get_bokeh_grid_figure(data, qc, converted_time, variable_name):
time_strings = map(get_str_time, converted_time)
hover = HoverTool(names=["data"])
fig = figure(width=800, plot_height=300, title=variable_name, tools=["pan, box_zoom, xwheel_zoom, save, reset, resize", hover], x_axis_type="datetime")
source = ColumnDataSource(
data=dict(
time=time_strings,
data=data,
qc=qc
)
)
# data line
fig.line(converted_time, data, color="navy", alpha=0.5, name="data", source=source)
# data points
fig.square(converted_time, data, color="navy", alpha=0.5)
fig.extra_y_ranges = {"foo": Range1d(start=0, end=10)}
fig.add_layout(LinearAxis(y_range_name="foo"), 'right')
fig.line(converted_time, qc, color="green", alpha=0.5, y_range_name="foo")
jscode = """
range.set('start', parseInt(%s));
range.set('end', parseInt(%s));
"""
fig.extra_y_ranges['foo'].callback = CustomJS(
args=dict(range=fig.extra_y_ranges['foo']),
code=jscode % (fig.extra_y_ranges['foo'].start,
fig.extra_y_ranges['foo'].end)
)
pan_tool = fig.select(dict(type=bokeh.models.PanTool))
pan_tool.dimensions = ["width"]
hover = fig.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([
('time', '@time'),
('value', '@data{0.0}'),
('qc', '@qc')
])
# check for ranges, if they are nan
if (np.isnan(np.nanmin(data)) & np.isnan(np.nanmax(data))) or (np.nanmin(data) == np.nanmax(data)):
bottom_y_range = 0
top_y_range = 10
else:
# add a 10% buffer to the max ranges
temp_min = np.nanmin(data)
temp_max = np.nanmax(data)
temp_diff = abs(temp_max-temp_min)
temp_thresh = round(temp_diff*0.1, 3)
bottom_y_range = temp_min - temp_thresh
top_y_range = temp_max + temp_thresh
fig.y_range = Range1d(bottom_y_range, top_y_range)
translate_time = converted_time.apply(lambda x: x.to_pydatetime())
converted_time_backward = map(totimestamp, translate_time)
source = ColumnDataSource({'x': converted_time_backward, 'y': data})
jscode = """
function isNumeric(n) {
return !isNaN(parseFloat(n)) && isFinite(n);
}
var data = source.get('data');
var start = yrange.get('start');
var end = yrange.get('end');
var time_start = xrange.get('start')/1000;
var time_end = xrange.get('end')/1000;
var pre_max_old = end;
var pre_min_old = start;
var time = data['x'];
var pre = data['y'];
t_idx_start = time.filter(function(st){return st>=time_start})[0];
t_idx_start = time.indexOf(t_idx_start);
t_idx_end = time.filter(function(st){return st>=time_end})[0];
t_idx_end = time.indexOf(t_idx_end);
var pre_interval = pre.slice(t_idx_start, t_idx_end);
pre_interval = pre_interval.filter(function(st){return !isNaN(st)});
var pre_max = Math.max.apply(null, pre_interval);
var pre_min = Math.min.apply(null, pre_interval);
var ten_percent = (pre_max-pre_min)*0.1;
pre_max = pre_max + ten_percent;
pre_min = pre_min - ten_percent;
if((!isNumeric(pre_max)) || (!isNumeric(pre_min))) {
pre_max = pre_max_old;
pre_min = pre_min_old;
}
yrange.set('start', pre_min);
yrange.set('end', pre_max);
console.log(yrange.get('end'))
source.trigger('change');
"""
fig.y_range.callback = CustomJS(
args=dict(source=source, yrange=fig.y_range, xrange=fig.x_range), code=jscode)
fig.x_range.callback = CustomJS(
args=dict(source=source, yrange=fig.y_range, xrange=fig.x_range), code=jscode)
return fig
Also, we have to define the variables we want to plot. In this case, we just used the "List of important parameters" from the socib DataDiscovery service and added the relative humidity to it (since we will plot weather stations here).
In [6]:
VARIABLES_OF_INTEREST = [
"sea_water_temperature",
"air_temperature",
"sea_surface_wave_from_direction",
"sea_surface_wave_significant_height",
"wind_speed",
"wind_from_direction",
"wind_speed_of_gust",
"water_surface_height_above_reference_datum",
"air_pressure",
"sea_water_speed",
"direction_of_sea_water_velocity",
"sea_water_salinity",
"relative_humidity"]
Here, we will call our defined methods. Also, we will define the output filename and the desired timespan of the plotting.
In [7]:
station_names, station_links = get_mooring_stations('http://thredds.socib.es/thredds/catalog/mooring/weather_station/catalog.html')
# get latest x days
days = 2
html_file = 'bokeh_latest_data.html'
seconds = days_to_seconds(days)
dt = datetime.datetime.now()
desired_start_time = calendar.timegm(dt.utctimetuple()) - seconds
output_file(html_file)
draw_data(station_links, desired_start_time, station_names)