create_hdf5.py


In [5]:
from app.create_hdf5 import generate_data

In [6]:
import h5py
import numpy as np
import scipy.stats as ss

In [7]:
from os.path import join

In [8]:
distributions = {'Gaussian': {'options': dict(loc=0, scale=0.1),
                                'name': 'norm'},
                    'Exponential': {'options': dict(loc=-0.5, scale=1),
                                    'name': 'expon'},
                    'Chi Square': {'options': dict(loc=0.5, df=1),
                                    'name': 'chi2'},
                    'Alpha': {'options': dict(a=3, loc=-0.5),
                              'name': 'alpha'},
                    'Beta': {'options': dict(a=3, b=2, loc=-0.5),
                              'name': 'beta'}
                  }

In [11]:
print(type(distributions))
# Python3 uses items: 
# cf. https://stackoverflow.com/questions/10458437/what-is-the-difference-between-dict-items-and-dict-iteritems
for key, value in distributions.items(): print(key, value)


<class 'dict'>
Gaussian {'options': {'loc': 0, 'scale': 0.1}, 'name': 'norm'}
Exponential {'options': {'loc': -0.5, 'scale': 1}, 'name': 'expon'}
Chi Square {'options': {'loc': 0.5, 'df': 1}, 'name': 'chi2'}
Alpha {'options': {'a': 3, 'loc': -0.5}, 'name': 'alpha'}
Beta {'options': {'a': 3, 'b': 2, 'loc': -0.5}, 'name': 'beta'}

In [ ]:
# Return true if the object argument is an instance of the classinfo argument,
# or of a (direct, indirect or virtual) subclass thereof.
# cf. https://docs.python.org/2/library/functions.html#isinstance

In [16]:
def print_dict(dictionary, ident = '', braces=1):
    """ Recursively prints nested dictionaries.
    @url http://code.activestate.com/recipes/578094-recursively-print-nested-dictionaries/
    """
    for key, value in dictionary.items(): # Python 3
        if isinstance(value, dict):
            print('%s%s%s%s' %(ident, braces * '[', key, braces * ']')) 
            print_dict(value, ident + '  ', braces + 1)
        else:
            print(ident + '%s = %s' %(key, value))

In [17]:
print_dict(distributions)


[Gaussian]
  [[options]]
    loc = 0
    scale = 0.1
  name = norm
[Exponential]
  [[options]]
    loc = -0.5
    scale = 1
  name = expon
[Chi Square]
  [[options]]
    loc = 0.5
    df = 1
  name = chi2
[Alpha]
  [[options]]
    a = 3
    loc = -0.5
  name = alpha
[Beta]
  [[options]]
    a = 3
    b = 2
    loc = -0.5
  name = beta

In [21]:
x = np.linspace(-1, 1, num = 1000)
print(x.shape)
print(type(x))
print(x[:10])


(1000,)
<class 'numpy.ndarray'>
[-1.         -0.997998   -0.995996   -0.99399399 -0.99199199 -0.98998999
 -0.98798799 -0.98598599 -0.98398398 -0.98198198]

In [22]:
print(join("some path", 'demo_data.hdf5'))


some path/demo_data.hdf5

In [25]:
#  getattr(object, name[, default])
#    Return the value of the named attribute of object. name must be a string. 
# cf. https://docs.python.org/3/library/functions.html#getattr
print(distributions["Gaussian"]['name'])
Gaussian_attribute = getattr(ss, distributions["Gaussian"]['name'])
print(type(Gaussian_attribute))


norm
<class 'scipy.stats._continuous_distns.norm_gen'>

In [29]:
print(distributions["Gaussian"]["options"])
print(type(distributions["Gaussian"]["options"]))


{'loc': 0, 'scale': 0.1}
<class 'dict'>

In [32]:
distributions["Gaussian"]["options"].keys()


Out[32]:
dict_keys(['loc', 'scale'])

In [36]:
# Asterisks for unpacking into function call

# When calling a function, the * operator can be used to unpack an iterable into the arguments in the function call:

# The ** operator does something similar, but with keyword arguments. 
# The ** operator allows us to take a dictionary of key-value pairs and unpack it into keyword arguments
# in a function call.
# cf. https://treyhunner.com/2018/10/asterisks-in-python-what-they-are-and-how-to-use-them/
Gaussian_distribution = Gaussian_attribute(**distributions['Gaussian']['options'])
print(type(Gaussian_attribute(**distributions['Gaussian']['options'])))


<class 'scipy.stats._distn_infrastructure.rv_frozen'>

In [42]:
print(dir(Gaussian_distribution))
print(type(Gaussian_distribution.pdf(x)))
print(Gaussian_distribution.pdf(x).shape)


['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'a', 'args', 'b', 'cdf', 'dist', 'entropy', 'expect', 'interval', 'isf', 'kwds', 'logcdf', 'logpdf', 'logpmf', 'logsf', 'mean', 'median', 'moment', 'pdf', 'pmf', 'ppf', 'random_state', 'rvs', 'sf', 'stats', 'std', 'var']
<class 'numpy.ndarray'>
(1000,)

In [44]:
import os

In [45]:
# os.path.dirname(path)
#    Return the directory name of pathname path. 
# This is the first element of the pair returned by passing path to the function split().
os.path.dirname(join("some path", 'demo_data.hdf5'))


Out[45]:
'some path'

In [46]:
# @url https://bokeh.pydata.org/en/latest/docs/reference/io.html
# curdoc()[source]¶
#    Return the document for the current default state.
#    Returns:	the current default document object.
#    Return type:	Document
#
#  class Document(**kwargs)[source]¶
#    The basic unit of serialization for Bokeh.
#    Document instances collect Bokeh models (e.g. plots, layouts, widgets, etc.) 
# so that they may be reflected into the BokehJS client runtime. Because models may refer to other models 
# (e.g., a plot has a list of renderers), it is not generally useful or meaningful to convert individual models to JSON. 
# Accordingly, the Document is thus the smallest unit of serialization for Bokeh.
# In computer science, in the context of data storage, serialization (or serialisation) is 
# the process of translating data structures or object state into a format that can be stored 
# (for example, in a file or memory buffer) or transmitted (for example, across a network connection link) and 
# reconstructed later (possibly in a different computer environment).
#
# @url https://bokeh.pydata.org/en/latest/docs/reference/document.html#bokeh.document.document.Document
#
#  add_root(model, setter=None)[source]¶
#    Add a model as a root of this Document.
#    Any changes to this model (including to other models referred to by it) will 
# trigger on_change callbacks registered on this document.
#    Parameters:	
#
#        model (Model) – The model to add as a root of this document.
#        setter (ClientSession or ServerSession or None, optional) – 
#
#  title¶
#    A title for this document.
#
from bokeh.io import curdoc

In [48]:
# bokeh.layouts¶
# cf. https://bokeh.pydata.org/en/latest/docs/reference/layouts.html
# Functions for arranging bokeh Layout objects.
#  row(*args, **kwargs)[source]¶
#    Create a row of Bokeh Layout objects. 
#    Forces all objects to have the same sizing_mode, which is required for complex layouts to work.
from bokeh.layouts import row

In [49]:
# @url https://bokeh.pydata.org/en/latest/docs/reference/models/sources.html
#  class ColumnDataSource(*args, **kw)[source]¶
#    Bases: bokeh.models.sources.ColumnarDataSource
#    Maps names of columns to sequences or arrays.
#    The ColumnDataSource is a fundamental data structure of Bokeh. 
# Most plots, data tables, etc. will be driven by a ColumnDataSource.
#    If the ColumnDataSource initializer is called with a single argument that can be any of the following:
#        A Python dict that maps string names to sequences of values, e.g. lists, arrays, etc.
# A Pandas DataFrame object
# A Pandas GroupBy object
from bokeh.models import ColumnDataSource

In [50]:
# class Select(**kwargs)[source]¶
#    Bases: bokeh.models.widgets.inputs.InputWidget
#    Single-select widget. 
# @url https://bokeh.pydata.org/en/latest/docs/reference/models/widgets.inputs.html
from bokeh.models.widgets import Select

In [51]:
# @url https://bokeh.pydata.org/en/latest/docs/reference/plotting.html
#  figure(**kwargs)[source]¶
#    Create a new Figure for plotting.
#    A subclass of Plot that simplifies plot creation with default axes, grids, tools, etc.
#    Figure objects have many glyph methods that can be used to draw vectorized graphical glyphs:
from bokeh.plotting import figure

In [53]:
# @url https://stackoverflow.com/questions/9271464/what-does-the-file-variable-mean-do/9271617
# When a module is loaded in Python, __file__ is set to its name. 
# You can then use that with other functions to find the directory that the file is located in.
__file__


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-53-4fd5c34ee0c2> in <module>
      2 # When a module is loaded in Python, __file__ is set to its name.
      3 # You can then use that with other functions to find the directory that the file is located in.
----> 4 __file__

NameError: name '__file__' is not defined

In [57]:
options = ['Gaussian', 'Exponential', 'Chi Square', 'Alpha', 'Beta']
print(options)


['Gaussian', 'Exponential', 'Chi Square', 'Alpha', 'Beta']

In [58]:
data_select = Select(title="Distribution:", value=options[0], options=options)
print(type(Select(title="Distribution:", value=options[0], options=options)))


<class 'bokeh.models.widgets.inputs.Select'>

In [60]:
# @url https://bokeh.pydata.org/en/latest/docs/reference/models/widgets.inputs.html
# options¶

#    property type: Either ( List ( Either ( String , Tuple ( Either ( Int , String ), String ) ) ), 
# Dict ( String , List ( Either ( String , Tuple ( Either ( Int , String ), String ) ) ) ) )
#    Available selection options. Options may be provided either as a list of possible string values, 
# or as a list of tuples, each of the form (value, label). 
# In the latter case, the visible widget text for each value will be corresponding given label. 
# Option groupings can be provided by supplying a dictionary object whose values are in the aforementioned list format
#
# value¶
#    property type: String
#
#    Initial or selected value.

In [61]:
# @url https://bokeh.pydata.org/en/latest/docs/reference/models/sources.html
# data¶
#    property type: ColumnData ( String , Seq ( Any ) )
#    Mapping of column names to sequences of data. The data can be, e.g, Python lists or tuples, NumPy arrays, etc.
# ColumnData(keys_type, values_type, default={}, help=None)[source]¶
#
#    Accept a Python dictionary suitable as the data attribute of a ColumnDataSource.
#
#    This class is a specialization of Dict that handles efficiently encoding columns that are NumPy arrays.
#

In [ ]:
# @url https://bokeh.pydata.org/en/latest/docs/user_guide/interaction/widgets.html
# All widgets have an .on_change method that takes an attribute name and one or more event handlers as parameters. 
# These handlers are expected to have the function signature, (attr, old, new), 
# where attr refers to the changed attribute’s name, and old and new refer to the previous and updated values of the 
# attribute. .on_change must be used when you need the previous value of an attribute.