Plot bokeh graphs

The purpose of this notebook is to create a bokeh representation of the latest data (incl. QC) from socib mooring stations.

Define Imports


In [1]:
import numpy as np
import pandas as pd
from urllib2 import Request, urlopen, URLError
from lxml import html
import time
from netCDF4 import Dataset
import datetime
import calendar
from collections import OrderedDict
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.models import LinearAxis, Range1d, CustomJS
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import output_notebook, show, output_file, vplot, hplot
import bokeh

In case, the output wants to be seen within the jupyter notebook, this line must be un-commented. However, since the generated HTML file will be opened in a new window, this is not really necessary.


In [2]:
#output_notebook()

Define data sources

We define some basic data handling functions here. These will just enable us to e.g. access the nefCDF variable data as numpy array (significantly faster) or convert times to a joint base.


In [3]:
def get_data_array(data_array):
    if type(data_array.__array__()) is np.ma.masked_array:
        return data_array.__array__().data
    else:
        return data_array.__array__()

def get_qc_variable_name(variable):
    try:
        qc_variable_name = variable.ancillary_variables
    except AttributeError:
        # print "No QC variable found for " + variable.name
        qc_variable_name = None
    return qc_variable_name

def get_pandas_timestamp_series(datetime_array):
    out = pd.Series(np.zeros(len(datetime_array)))
    counter = 0
    for i in datetime_array:
        out[counter] = pd.tslib.Timestamp(i)
        counter += 1
    return out
    
def days_to_seconds(days):
    return int(days) * 24 * 60 * 60

def get_str_time(x): return str(x)

def totimestamp(dt, epoch=datetime.datetime(1970,1,1)):
    td = dt - epoch
    # return td.total_seconds()
    return (td.microseconds + (td.seconds + td.days * 86400) * 10**6) / 10**6

Note that these scripts differ from the socib mooring station report generation tool. Here, we use a simple web - scraping from the socib thredds server.


In [4]:
def get_mooring_stations(url):
        name_list = []
        end_URLBuilder = []
        req = Request(url)
        try:
            response = urlopen(req)
        except URLError as e:
            if hasattr(e, 'reason'):
                print 'We failed to reach a server.'
                print 'Reason: ', e.reason
            elif hasattr(e, 'code'):
                print 'The server couldn\'t fulfill the request.'
                print 'Error code: ', e.code
        else:
            URLBuilder = []
            tree = html.fromstring(response.read())
            link_path = tree.xpath('//a')
            for x in range(1, len(link_path)):
                URLBuilder.append(link_path[x].values())
            URLLister = []
            for n in range(0, len(URLBuilder) - 4):
                string = str(URLBuilder[n])
                idx = string.find("/")
                url = "http://thredds.socib.es/thredds/catalog/mooring/weather_station/" + URLBuilder[n][0][0:idx - 1] + "/L1/catalog.html"
                name = URLBuilder[n][0][0:idx - 2]
                req = Request(url)
                try:
                    response = urlopen(req)
                except URLError as e:
                    if hasattr(e, 'reason'):
                        print 'We failed to reach a server.'
                        print 'Reason: ', e.reason
                    elif hasattr(e, 'code'):
                        print 'The server couldn\'t fulfill the request.'
                        print 'Error code: ', e.code
                else:
                    URLLister.append(url)
                    name_list.append(name)

            for m in URLLister:
                req = Request(m)
                try:
                    response = urlopen(req)
                except URLError as e:
                    if hasattr(e, 'reason'):
                        print 'We failed to reach a server.'
                        print 'Reason: ', e.reason
                    elif hasattr(e, 'code'):
                        print 'The server couldn\'t fulfill the request.'
                        print 'Error code: ', e.code
                else:
                    tree = html.fromstring(response.read())
                    link_path = tree.xpath('//a')
                    for x in range(1, len(link_path)):
                        string = str(link_path[x].values())
                        idx = string.find("=")
                        end_URLBuilder.append("http://thredds.socib.es/thredds/dodsC/" + str(
                            link_path[x].values()[0][idx - 1:len(string)]))
                        break
            return name_list, end_URLBuilder

Here, we define the bokeh plotting parameters. Also, we create a javascript callback to automatically adjust the y-axis according to the current zoom-extend.


In [5]:
def draw_data(links, desired_start_time, station_names):
    global VARIABLES_OF_INTEREST
    counter = 0
    output_stations = []
    for station in links:
        root = Dataset(station)
        time = get_data_array(root.variables["time"])
        idx = time >= desired_start_time
        if not np.any(idx):
            counter += 1
            continue
        variables = root.get_variables_by_attributes(standard_name=lambda n: n in VARIABLES_OF_INTEREST)
        time = time[idx]
        subplot =  []
        variable_names = []
        for v in variables:
            try:
                qc_data = get_data_array(root.variables[get_qc_variable_name(v)])
                qc_data = qc_data[idx]
                bad_idx = get_data_array(qc_data) != 1
            except KeyError:
                print "No QC found for " + v.name
            v_name = v.name
            variable_names.append(v_name)
            v = get_data_array(v)
            v = v[idx]
            conv_time = get_pandas_timestamp_series([datetime.datetime.fromtimestamp(ts) for ts in time])
            subplot.append(get_bokeh_grid_figure(v, qc_data, conv_time, station_names[counter]))
            
        sub_counter = 0
        my_tabs = []
        for sp in subplot:
            my_tabs.append(Panel(child=sp, title=variable_names[sub_counter]))
            sub_counter += 1
        p = Tabs(tabs=my_tabs)
        output_stations.append(p)
        counter += 1
    amount_stations = len(output_stations)
    rest = amount_stations % 2
    verticals = []
    if amount_stations >= 2:
        verticals.append(hplot(output_stations[0], output_stations[1]))
    elif amount_stations == 1:
        verticals.append(hplot(output_stations[0]))
    else:
        print("No stations to plot (PerformQC.draw_bokeh()).")
        return 1
    for i in range(1, int(amount_stations/2)):
        verticals.append(hplot(output_stations[i*2], output_stations[i*2+1]))
    if rest > 0:
        verticals.append(output_stations[-1])
    show(vplot(*verticals))

def get_bokeh_grid_figure(data, qc, converted_time, variable_name):
        time_strings = map(get_str_time, converted_time)
        hover = HoverTool(names=["data"])
        fig = figure(width=800, plot_height=300, title=variable_name, tools=["pan, box_zoom, xwheel_zoom, save, reset, resize", hover], x_axis_type="datetime")

        source = ColumnDataSource(
            data=dict(
                time=time_strings,
                data=data,
                qc=qc
            )
        )
        # data line
        fig.line(converted_time, data, color="navy", alpha=0.5, name="data", source=source)
        # data points
        fig.square(converted_time, data, color="navy", alpha=0.5)
        fig.extra_y_ranges = {"foo": Range1d(start=0, end=10)}
        fig.add_layout(LinearAxis(y_range_name="foo"), 'right')
        fig.line(converted_time, qc, color="green", alpha=0.5, y_range_name="foo")
        jscode = """
                range.set('start', parseInt(%s));
                range.set('end', parseInt(%s));
                """
        fig.extra_y_ranges['foo'].callback = CustomJS(
            args=dict(range=fig.extra_y_ranges['foo']),
            code=jscode % (fig.extra_y_ranges['foo'].start,
                           fig.extra_y_ranges['foo'].end)
        )
        pan_tool = fig.select(dict(type=bokeh.models.PanTool))
        pan_tool.dimensions = ["width"]

        hover = fig.select(dict(type=HoverTool))
        hover.tooltips = OrderedDict([
            ('time', '@time'),
            ('value', '@data{0.0}'),
            ('qc', '@qc')
        ])

        # check for ranges, if they are nan
        if (np.isnan(np.nanmin(data)) & np.isnan(np.nanmax(data))) or (np.nanmin(data) == np.nanmax(data)):
            bottom_y_range = 0
            top_y_range = 10
        else:
            # add a 10% buffer to the max ranges
            temp_min = np.nanmin(data)
            temp_max = np.nanmax(data)
            temp_diff = abs(temp_max-temp_min)
            temp_thresh = round(temp_diff*0.1, 3)

            bottom_y_range = temp_min - temp_thresh
            top_y_range = temp_max + temp_thresh

        fig.y_range = Range1d(bottom_y_range, top_y_range)
        translate_time = converted_time.apply(lambda x: x.to_pydatetime())
        converted_time_backward = map(totimestamp, translate_time)
        source = ColumnDataSource({'x': converted_time_backward, 'y': data})

        jscode = """
        function isNumeric(n) {
          return !isNaN(parseFloat(n)) && isFinite(n);
        }
        var data = source.get('data');
        var start = yrange.get('start');
        var end = yrange.get('end');

        var time_start = xrange.get('start')/1000;
        var time_end = xrange.get('end')/1000;

        var pre_max_old = end;
        var pre_min_old = start;

        var time = data['x'];
        var pre = data['y'];
        t_idx_start = time.filter(function(st){return st>=time_start})[0];
        t_idx_start = time.indexOf(t_idx_start);

        t_idx_end = time.filter(function(st){return st>=time_end})[0];
        t_idx_end = time.indexOf(t_idx_end);

        var pre_interval = pre.slice(t_idx_start, t_idx_end);
        pre_interval = pre_interval.filter(function(st){return !isNaN(st)});
        var pre_max = Math.max.apply(null, pre_interval);
        var pre_min = Math.min.apply(null, pre_interval);
        var ten_percent = (pre_max-pre_min)*0.1;

        pre_max = pre_max + ten_percent;
        pre_min = pre_min - ten_percent;

        if((!isNumeric(pre_max)) || (!isNumeric(pre_min))) {
            pre_max = pre_max_old;
            pre_min = pre_min_old;
        }

        yrange.set('start', pre_min);
        yrange.set('end', pre_max);
        console.log(yrange.get('end'))

        source.trigger('change');
        """

        fig.y_range.callback = CustomJS(
            args=dict(source=source, yrange=fig.y_range, xrange=fig.x_range), code=jscode)
        fig.x_range.callback = CustomJS(
            args=dict(source=source, yrange=fig.y_range, xrange=fig.x_range), code=jscode)
        return fig

Also, we have to define the variables we want to plot. In this case, we just used the "List of important parameters" from the socib DataDiscovery service and added the relative humidity to it (since we will plot weather stations here).


In [6]:
VARIABLES_OF_INTEREST = [
          "sea_water_temperature",
          "air_temperature",
          "sea_surface_wave_from_direction",
          "sea_surface_wave_significant_height",
          "wind_speed",
          "wind_from_direction",
          "wind_speed_of_gust",
          "water_surface_height_above_reference_datum",
          "air_pressure",
          "sea_water_speed",
          "direction_of_sea_water_velocity",
          "sea_water_salinity",
          "relative_humidity"]

Get latest data

Here, we will call our defined methods. Also, we will define the output filename and the desired timespan of the plotting.


In [7]:
station_names, station_links = get_mooring_stations('http://thredds.socib.es/thredds/catalog/mooring/weather_station/catalog.html')
# get latest x days

days = 2
html_file = 'bokeh_latest_data.html'

seconds = days_to_seconds(days)
dt = datetime.datetime.now()
desired_start_time = calendar.timegm(dt.utctimetuple()) - seconds
output_file(html_file)
draw_data(station_links, desired_start_time, station_names)


No QC found for WIN_DIR_GUS
No QC found for WIN_DIR_GUS
No QC found for WIN_DIR_GUS
No QC found for WIN_DIR_GUS
No QC found for WDIR_MAX