In [22]:
import numpy as np
import tensorflow as tf
import edward as ed
import matplotlib.pyplot as plt
%matplotlib inline
import sys
import gzip
import pickle
from keras.utils import to_categorical
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-22-316db3f4698f> in <module>()
8 import pickle
9
---> 10 from keras.utils import to_categorical
ImportError: No module named 'keras'
In [15]:
from edward.models import Normal
In [21]:
# DATA
def load_mnist():
"""
Loads the MNIST handwritten digits dataset into three tuples training_data/
:return: Three tuples containing training data, validation data and test data
"""
f = gzip.open(r'C:\Code\neural-nets\data\mnist.pkl.gz')
training_data, validation_data, test_data = pickle.load(f, encoding='latin1')
f.close()
return training_data, validation_data, test_data
training_data, validation_data, test_data = load_mnist()
def show_grid_of_digits(data,nc,nr):
for i in range(nc*nr):
plt.subplot(nc,nr,i+1)
im = data[0][i].reshape(28,28)
plt.imshow(im,cmap='gray')
plt.gca().axes.get_xaxis().set_visible(False)
plt.gca().axes.get_yaxis().set_visible(False)
show_grid_of_digits(training_data,3,5)
In [45]:
def multilayer_perceptron(x, weights, biases):
"""
This function takes in the input placeholder, weights and biases and returns the output tensor of a network with
two hidden ReLU layers, and an output layer with linear activation.
:param tf.placeholder x: Placeholder for input
:param dict weights: Dictionary containing Variables describing weights of each layer
:param dict biases: Dictionary containing Variables describing biases of each layer
:return: The activations of the output layer
"""
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
n_input = 28**2
n_hidden_1 = 10
n_hidden_2 = 10
n_classes = 10
batch_size = 20
# Priors on layers weight & bias
weights = {
'h1': Normal(loc=tf.zeros([n_input, n_hidden_1]), scale=tf.ones([n_input, n_hidden_1])),
'h2': Normal(loc=tf.zeros([n_hidden_1, n_hidden_2]), scale=tf.ones([n_hidden_1, n_hidden_2])),
'out': Normal(loc=tf.zeros([n_hidden_2, n_classes]), scale=tf.ones([n_hidden_2, n_classes]))
}
biases = {
'b1': Normal(loc=tf.zeros(n_hidden_1), scale=tf.ones(n_hidden_1)),# tf.Variable(tf.random_normal([n_hidden_1])),
'b2': Normal(loc=tf.zeros(n_hidden_2), scale=tf.ones(n_hidden_2)),#tf.Variable(tf.random_normal([n_hidden_2])),
'out': Normal(loc=tf.zeros(n_classes), scale=tf.ones(n_classes))#tf.Variable(tf.random_normal([n_classes]))
}
x = tf.placeholder(tf.float32, [batch_size, n_input])
y = Normal(loc=multilayer_perceptron(x, weights, biases),scale=tf.ones(n_classes) * 0.1) # constant noise
In [46]:
# BACKWARD MODEL
q_weights = {
'h1': Normal(loc=tf.Variable(tf.random_normal([n_input, n_hidden_1])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_input, n_hidden_1])))),
'h2': Normal(loc=tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])))),
'out': Normal(loc=tf.Variable(tf.random_normal([n_hidden_2, n_classes])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_hidden_2, n_classes]))))
}
q_biases = {
'b1': Normal(loc=tf.Variable(tf.random_normal([n_hidden_1])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_hidden_1])))),
'b2': Normal(loc=tf.Variable(tf.random_normal([n_hidden_2])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_hidden_2])))),
'out': Normal(loc=tf.Variable(tf.random_normal([n_classes])),
scale=tf.nn.softplus(tf.Variable(tf.random_normal([n_classes]))))
}
In [53]:
batch_x = training_data[0][:20]
batch_y = to_categorical(training_data[1][:20])
In [54]:
# INFERENCE
# this will take a couple of minutes
latent_vars = {weights['h1']: q_weights['h1'], biases['b1']: q_biases['b1'],
weights['h2']: q_weights['h2'], biases['b2']: q_biases['b2'],
weights['out']: q_weights['out'], biases['out']: q_biases['out']}
inference = ed.KLqp(latent_vars=latent_vars,
data={x: batch_x, y:batch_y})
inference.run(n_samples=10, n_iter=25000)
25000/25000 [100%] ██████████████████████████████ Elapsed: 442s | Loss: 5028.379
In [56]:
batch_y
Out[56]:
array([[ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
[ 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
[ 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
[ 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[ 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
[ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])
In [ ]:
In [50]:
batch_y
Out[50]:
array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9], dtype=int64)
In [9]:
from edward.models import Categorical, Multinomial, Normal
In [2]:
help(Multinomial)
Help on class Multinomial in module abc:
class Multinomial(edward.models.random_variable.RandomVariable, tensorflow.contrib.distributions.python.ops.multinomial.Multinomial)
| Multinomial distribution.
|
| This Multinomial distribution is parameterized by `probs`, a (batch of)
| length-`k` `prob` (probability) vectors (`k > 1`) such that
| `tf.reduce_sum(probs, -1) = 1`, and a `total_count` number of trials, i.e.,
| the number of trials per draw from the Multinomial. It is defined over a
| (batch of) length-`k` vector `counts` such that
| `tf.reduce_sum(counts, -1) = total_count`. The Multinomial is identically the
| Binomial distribution when `k = 2`.
|
| #### Mathematical Details
|
| The Multinomial is a distribution over `k`-class counts, i.e., a length-`k`
| vector of non-negative integer `counts = n = [n_0, ..., n_{k-1}]`.
|
| The probability mass function (pmf) is,
|
| ```none
| pmf(n; pi, N) = prod_j (pi_j)**n_j / Z
| Z = (prod_j n_j!) / N!
| ```
|
| where:
| * `probs = pi = [pi_0, ..., pi_{k-1}]`, `pi_j > 0`, `sum_j pi_j = 1`,
| * `total_count = N`, `N` a positive integer,
| * `Z` is the normalization constant, and,
| * `N!` denotes `N` factorial.
|
| Distribution parameters are automatically broadcast in all functions; see
| examples for details.
|
| #### Examples
|
| Create a 3-class distribution, with the 3rd class is most likely to be drawn,
| using logits.
|
| ```python
| logits = [-50., -43, 0]
| dist = Multinomial(total_count=4., logits=logits)
| ```
|
| Create a 3-class distribution, with the 3rd class is most likely to be drawn.
|
| ```python
| p = [.2, .3, .5]
| dist = Multinomial(total_count=4., probs=p)
| ```
|
| The distribution functions can be evaluated on counts.
|
| ```python
| # counts same shape as p.
| counts = [1., 0, 3]
| dist.prob(counts) # Shape []
|
| # p will be broadcast to [[.2, .3, .5], [.2, .3, .5]] to match counts.
| counts = [[1., 2, 1], [2, 2, 0]]
| dist.prob(counts) # Shape [2]
|
| # p will be broadcast to shape [5, 7, 3] to match counts.
| counts = [[...]] # Shape [5, 7, 3]
| dist.prob(counts) # Shape [5, 7]
| ```
|
| Create a 2-batch of 3-class distributions.
|
| ```python
| p = [[.1, .2, .7], [.3, .3, .4]] # Shape [2, 3]
| dist = Multinomial(total_count=[4., 5], probs=p)
|
| counts = [[2., 1, 1], [3, 1, 1]]
| dist.prob(counts) # Shape [2]
| ```
|
| Method resolution order:
| Multinomial
| edward.models.random_variable.RandomVariable
| tensorflow.contrib.distributions.python.ops.multinomial.Multinomial
| tensorflow.contrib.distributions.python.ops.distribution.Distribution
| tensorflow.contrib.distributions.python.ops.distribution._BaseDistribution
| builtins.object
|
| Methods defined here:
|
| conjugate_log_prob = wrapped(self, val=None)
|
| ----------------------------------------------------------------------
| Data and other attributes defined here:
|
| __abstractmethods__ = frozenset()
|
| support = 'onehot'
|
| ----------------------------------------------------------------------
| Methods inherited from edward.models.random_variable.RandomVariable:
|
| __abs__(self)
|
| __add__(self, other)
|
| __and__(self, other)
|
| __bool__(self)
|
| __div__(self, other)
|
| __eq__(self, other)
| Return self==value.
|
| __floordiv__(self, other)
|
| __ge__(self, other)
| Return self>=value.
|
| __getitem__(self, key)
| Subset the tensor associated to the random variable, not the
| random variable itself.
|
| __gt__(self, other)
| Return self>value.
|
| __hash__(self)
| Return hash(self).
|
| __init__(self, *args, **kwargs)
| Parameters
| ----------
| sample_shape : tf.TensorShape, optional
| Shape of samples to draw from the random variable.
| value : tf.Tensor, optional
| Fixed tensor to associate with random variable. Must have shape
| ``sample_shape + batch_shape + event_shape``.
| collections : list, optional
| Optional list of graph collections keys. The random variable is
| added to these collections. Defaults to ["random_variables"].
| *args, **kwargs
| Passed into parent ``__init__``.
|
| __invert__(self)
|
| __iter__(self)
|
| __le__(self, other)
| Return self<=value.
|
| __lt__(self, other)
| Return self<value.
|
| __mod__(self, other)
|
| __mul__(self, other)
|
| __neg__(self)
|
| __nonzero__(self)
|
| __or__(self, other)
|
| __pow__(self, other)
|
| __radd__(self, other)
|
| __rand__(self, other)
|
| __rdiv__(self, other)
|
| __repr__(self)
| Return repr(self).
|
| __rfloordiv__(self, other)
|
| __rmod__(self, other)
|
| __rmul__(self, other)
|
| __ror__(self, other)
|
| __rpow__(self, other)
|
| __rsub__(self, other)
|
| __rtruediv__ = __rdiv__(self, other)
|
| __rxor__(self, other)
|
| __str__(self)
| Return str(self).
|
| __sub__(self, other)
|
| __truediv__ = __div__(self, other)
|
| __xor__(self, other)
|
| eval(self, session=None, feed_dict=None)
| In a session, computes and returns the value of this random variable.
|
| This is not a graph construction method, it does not add ops to the graph.
|
| This convenience method requires a session where the graph
| containing this variable has been launched. If no session is
| passed, the default session is used.
|
| Parameters
| ----------
| session : tf.BaseSession, optional
| The ``tf.Session`` to use to evaluate this random variable. If
| none, the default session is used.
| feed_dict : dict, optional
| A dictionary that maps ``tf.Tensor`` objects to feed values. See
| ``tf.Session.run()`` for a description of the valid feed values.
|
| Examples
| --------
| >>> x = Normal(0.0, 1.0)
| >>> with tf.Session() as sess:
| >>> # Usage passing the session explicitly.
| >>> print(x.eval(sess))
| >>> # Usage with the default session. The 'with' block
| >>> # above makes 'sess' the default session.
| >>> print(x.eval())
|
| get_ancestors(self, collection=None)
| Get ancestor random variables.
|
| get_blanket(self, collection=None)
| Get the random variable's Markov blanket.
|
| get_children(self, collection=None)
| Get child random variables.
|
| get_descendants(self, collection=None)
| Get descendant random variables.
|
| get_parents(self, collection=None)
| Get parent random variables.
|
| get_shape(self)
| Get shape of random variable.
|
| get_siblings(self, collection=None)
| Get sibling random variables.
|
| get_variables(self, collection=None)
| Get TensorFlow variables that the random variable depends on.
|
| value(self)
| Get tensor that the random variable corresponds to.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from edward.models.random_variable.RandomVariable:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
|
| sample_shape
| Sample shape of random variable.
|
| shape
| Shape of random variable.
|
| unique_name
| Name of random variable with its unique scoping name. Use
| ``name`` to just get the name of the random variable.
|
| ----------------------------------------------------------------------
| Methods inherited from tensorflow.contrib.distributions.python.ops.multinomial.Multinomial:
|
| log_prob(self, value, name='log_prob')
| Log probability density/mass function.
|
|
| Additional documentation from `Multinomial`:
|
| For each batch of counts, `value = [n_0, ...
| ,n_{k-1}]`, `P[value]` is the probability that after sampling `self.total_count`
| draws from this Multinomial distribution, the number of draws falling in class
| `j` is `n_j`. Since this definition is [exchangeable](
| https://en.wikipedia.org/wiki/Exchangeable_random_variables); different
| sequences have the same counts so the probability includes a combinatorial
| coefficient.
|
| Note: `value` must be a non-negative tensor with dtype `self.dtype`, have no
| fractional components, and such that
| `tf.reduce_sum(value, -1) = self.total_count`. Its shape must be broadcastable
| with `self.probs` and `self.total_count`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| prob(self, value, name='prob')
| Probability density/mass function.
|
|
| Additional documentation from `Multinomial`:
|
| For each batch of counts, `value = [n_0, ...
| ,n_{k-1}]`, `P[value]` is the probability that after sampling `self.total_count`
| draws from this Multinomial distribution, the number of draws falling in class
| `j` is `n_j`. Since this definition is [exchangeable](
| https://en.wikipedia.org/wiki/Exchangeable_random_variables); different
| sequences have the same counts so the probability includes a combinatorial
| coefficient.
|
| Note: `value` must be a non-negative tensor with dtype `self.dtype`, have no
| fractional components, and such that
| `tf.reduce_sum(value, -1) = self.total_count`. Its shape must be broadcastable
| with `self.probs` and `self.total_count`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from tensorflow.contrib.distributions.python.ops.multinomial.Multinomial:
|
| logits
| Vector of coordinatewise logits.
|
| probs
| Probability of of drawing a `1` in that coordinate.
|
| total_count
| Number of trials used to construct a sample.
|
| ----------------------------------------------------------------------
| Methods inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| batch_shape_tensor(self, name='batch_shape_tensor')
| Shape of a single sample from a single event index as a 1-D `Tensor`.
|
| The batch dimensions are indexes into independent, non-identical
| parameterizations of this distribution.
|
| Args:
| name: name to give to the op
|
| Returns:
| batch_shape: `Tensor`.
|
| cdf(self, value, name='cdf')
| Cumulative distribution function.
|
| Given random variable `X`, the cumulative distribution function `cdf` is:
|
| ```none
| cdf(x) := P[X <= x]
| ```
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| cdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| copy(self, **override_parameters_kwargs)
| Creates a deep copy of the distribution.
|
| Note: the copy distribution may continue to depend on the original
| initialization arguments.
|
| Args:
| **override_parameters_kwargs: String/value dictionary of initialization
| arguments to override with new values.
|
| Returns:
| distribution: A new instance of `type(self)` initialized from the union
| of self.parameters and override_parameters_kwargs, i.e.,
| `dict(self.parameters, **override_parameters_kwargs)`.
|
| covariance(self, name='covariance')
| Covariance.
|
| Covariance is (possibly) defined only for non-scalar-event distributions.
|
| For example, for a length-`k`, vector-valued distribution, it is calculated
| as,
|
| ```none
| Cov[i, j] = Covariance(X_i, X_j) = E[(X_i - E[X_i]) (X_j - E[X_j])]
| ```
|
| where `Cov` is a (batch of) `k x k` matrix, `0 <= (i, j) < k`, and `E`
| denotes expectation.
|
| Alternatively, for non-vector, multivariate distributions (e.g.,
| matrix-valued, Wishart), `Covariance` shall return a (batch of) matrices
| under some vectorization of the events, i.e.,
|
| ```none
| Cov[i, j] = Covariance(Vec(X)_i, Vec(X)_j) = [as above]
| ```
|
| where `Cov` is a (batch of) `k' x k'` matrices,
| `0 <= (i, j) < k' = reduce_prod(event_shape)`, and `Vec` is some function
| mapping indices of this distribution's event dimensions to indices of a
| length-`k'` vector.
|
| Args:
| name: The name to give this op.
|
| Returns:
| covariance: Floating-point `Tensor` with shape `[B1, ..., Bn, k', k']`
| where the first `n` dimensions are batch coordinates and
| `k' = reduce_prod(self.event_shape)`.
|
| entropy(self, name='entropy')
| Shannon entropy in nats.
|
| event_shape_tensor(self, name='event_shape_tensor')
| Shape of a single sample from a single batch as a 1-D int32 `Tensor`.
|
| Args:
| name: name to give to the op
|
| Returns:
| event_shape: `Tensor`.
|
| is_scalar_batch(self, name='is_scalar_batch')
| Indicates that `batch_shape == []`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| is_scalar_batch: `bool` scalar `Tensor`.
|
| is_scalar_event(self, name='is_scalar_event')
| Indicates that `event_shape == []`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| is_scalar_event: `bool` scalar `Tensor`.
|
| log_cdf(self, value, name='log_cdf')
| Log cumulative distribution function.
|
| Given random variable `X`, the cumulative distribution function `cdf` is:
|
| ```none
| log_cdf(x) := Log[ P[X <= x] ]
| ```
|
| Often, a numerical approximation can be used for `log_cdf(x)` that yields
| a more accurate answer than simply taking the logarithm of the `cdf` when
| `x << -1`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| logcdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| log_survival_function(self, value, name='log_survival_function')
| Log survival function.
|
| Given random variable `X`, the survival function is defined:
|
| ```none
| log_survival_function(x) = Log[ P[X > x] ]
| = Log[ 1 - P[X <= x] ]
| = Log[ 1 - cdf(x) ]
| ```
|
| Typically, different numerical approximations can be used for the log
| survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
| `self.dtype`.
|
| mean(self, name='mean')
| Mean.
|
| mode(self, name='mode')
| Mode.
|
| sample(self, sample_shape=(), seed=None, name='sample')
| Generate samples of the specified shape.
|
| Note that a call to `sample()` without arguments will generate a single
| sample.
|
| Args:
| sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
| seed: Python integer seed for RNG
| name: name to give to the op.
|
| Returns:
| samples: a `Tensor` with prepended dimensions `sample_shape`.
|
| stddev(self, name='stddev')
| Standard deviation.
|
| Standard deviation is defined as,
|
| ```none
| stddev = E[(X - E[X])**2]**0.5
| ```
|
| where `X` is the random variable associated with this distribution, `E`
| denotes expectation, and `stddev.shape = batch_shape + event_shape`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| stddev: Floating-point `Tensor` with shape identical to
| `batch_shape + event_shape`, i.e., the same shape as `self.mean()`.
|
| survival_function(self, value, name='survival_function')
| Survival function.
|
| Given random variable `X`, the survival function is defined:
|
| ```none
| survival_function(x) = P[X > x]
| = 1 - P[X <= x]
| = 1 - cdf(x).
| ```
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
| `self.dtype`.
|
| variance(self, name='variance')
| Variance.
|
| Variance is defined as,
|
| ```none
| Var = E[(X - E[X])**2]
| ```
|
| where `X` is the random variable associated with this distribution, `E`
| denotes expectation, and `Var.shape = batch_shape + event_shape`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| variance: Floating-point `Tensor` with shape identical to
| `batch_shape + event_shape`, i.e., the same shape as `self.mean()`.
|
| ----------------------------------------------------------------------
| Class methods inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| param_shapes(sample_shape, name='DistributionParamShapes') from tensorflow.contrib.distributions.python.ops.distribution._DistributionMeta
| Shapes of parameters given the desired shape of a call to `sample()`.
|
| This is a class method that describes what key/value arguments are required
| to instantiate the given `Distribution` so that a particular shape is
| returned for that instance's call to `sample()`.
|
| Subclasses should override class method `_param_shapes`.
|
| Args:
| sample_shape: `Tensor` or python list/tuple. Desired shape of a call to
| `sample()`.
| name: name to prepend ops with.
|
| Returns:
| `dict` of parameter name to `Tensor` shapes.
|
| param_static_shapes(sample_shape) from tensorflow.contrib.distributions.python.ops.distribution._DistributionMeta
| param_shapes with static (i.e. `TensorShape`) shapes.
|
| This is a class method that describes what key/value arguments are required
| to instantiate the given `Distribution` so that a particular shape is
| returned for that instance's call to `sample()`. Assumes that the sample's
| shape is known statically.
|
| Subclasses should override class method `_param_shapes` to return
| constant-valued tensors when constant values are fed.
|
| Args:
| sample_shape: `TensorShape` or python list/tuple. Desired shape of a call
| to `sample()`.
|
| Returns:
| `dict` of parameter name to `TensorShape`.
|
| Raises:
| ValueError: if `sample_shape` is a `TensorShape` and is not fully defined.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| allow_nan_stats
| Python `bool` describing behavior when a stat is undefined.
|
| Stats return +/- infinity when it makes sense. E.g., the variance of a
| Cauchy distribution is infinity. However, sometimes the statistic is
| undefined, e.g., if a distribution's pdf does not achieve a maximum within
| the support of the distribution, the mode is undefined. If the mean is
| undefined, then by definition the variance is undefined. E.g. the mean for
| Student's T for df = 1 is undefined (no clear way to say it is either + or -
| infinity), so the variance = E[(X - mean)**2] is also undefined.
|
| Returns:
| allow_nan_stats: Python `bool`.
|
| batch_shape
| Shape of a single sample from a single event index as a `TensorShape`.
|
| May be partially defined or unknown.
|
| The batch dimensions are indexes into independent, non-identical
| parameterizations of this distribution.
|
| Returns:
| batch_shape: `TensorShape`, possibly unknown.
|
| dtype
| The `DType` of `Tensor`s handled by this `Distribution`.
|
| event_shape
| Shape of a single sample from a single batch as a `TensorShape`.
|
| May be partially defined or unknown.
|
| Returns:
| event_shape: `TensorShape`, possibly unknown.
|
| name
| Name prepended to all ops created by this `Distribution`.
|
| parameters
| Dictionary of parameters used to instantiate this `Distribution`.
|
| reparameterization_type
| Describes how samples from the distribution are reparameterized.
|
| Currently this is one of the static instances
| `distributions.FULLY_REPARAMETERIZED`
| or `distributions.NOT_REPARAMETERIZED`.
|
| Returns:
| An instance of `ReparameterizationType`.
|
| validate_args
| Python `bool` indicating possibly expensive checks are enabled.
In [3]:
logits = [-50., -43, 0]
dist = Multinomial(total_count=4., logits=logits)
In [16]:
H=20
K=10
In [17]:
W_0 = Normal(loc=tf.zeros([D, H]), scale=tf.ones([D, H]))
W_1 = Normal(loc=tf.zeros([H, K]), scale=tf.ones([H, K]))
b_0 = Normal(loc=tf.zeros(H), scale=tf.ones(H))
b_1 = Normal(loc=tf.zeros(K), scale=tf.ones(K))
def neural_network(x):
h = tf.nn.tanh(tf.matmul(x, W_0) + b_0)
h = tf.matmul(h, W_1) + b_1
return h
In [18]:
import tensorflow as tf
M = 1
D = 28**2
x = tf.placeholder(tf.float32, [M, D])
y = Multinomial(total_count=4., logits=neural_network(x))
In [19]:
y
Out[19]:
<ed.RandomVariable 'Multinomial_8/' shape=(1, 10) dtype=float32>
In [20]:
y = Multinomial(logits=neural_network(x))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-20-1a09f569111d> in <module>()
----> 1 y = Multinomial(logits=neural_network(x))
c:\programdata\miniconda3\envs\tensorflow\lib\site-packages\edward\models\random_variable.py in __init__(self, *args, **kwargs)
101 self._kwargs['collections'] = collections
102
--> 103 super(RandomVariable, self).__init__(*args, **kwargs)
104
105 self._sample_shape = tf.TensorShape(sample_shape)
TypeError: __init__() missing 1 required positional argument: 'total_count'
In [21]:
help(Multinomial)
Help on class Multinomial in module abc:
class Multinomial(edward.models.random_variable.RandomVariable, tensorflow.contrib.distributions.python.ops.multinomial.Multinomial)
| Multinomial distribution.
|
| This Multinomial distribution is parameterized by `probs`, a (batch of)
| length-`k` `prob` (probability) vectors (`k > 1`) such that
| `tf.reduce_sum(probs, -1) = 1`, and a `total_count` number of trials, i.e.,
| the number of trials per draw from the Multinomial. It is defined over a
| (batch of) length-`k` vector `counts` such that
| `tf.reduce_sum(counts, -1) = total_count`. The Multinomial is identically the
| Binomial distribution when `k = 2`.
|
| #### Mathematical Details
|
| The Multinomial is a distribution over `k`-class counts, i.e., a length-`k`
| vector of non-negative integer `counts = n = [n_0, ..., n_{k-1}]`.
|
| The probability mass function (pmf) is,
|
| ```none
| pmf(n; pi, N) = prod_j (pi_j)**n_j / Z
| Z = (prod_j n_j!) / N!
| ```
|
| where:
| * `probs = pi = [pi_0, ..., pi_{k-1}]`, `pi_j > 0`, `sum_j pi_j = 1`,
| * `total_count = N`, `N` a positive integer,
| * `Z` is the normalization constant, and,
| * `N!` denotes `N` factorial.
|
| Distribution parameters are automatically broadcast in all functions; see
| examples for details.
|
| #### Examples
|
| Create a 3-class distribution, with the 3rd class is most likely to be drawn,
| using logits.
|
| ```python
| logits = [-50., -43, 0]
| dist = Multinomial(total_count=4., logits=logits)
| ```
|
| Create a 3-class distribution, with the 3rd class is most likely to be drawn.
|
| ```python
| p = [.2, .3, .5]
| dist = Multinomial(total_count=4., probs=p)
| ```
|
| The distribution functions can be evaluated on counts.
|
| ```python
| # counts same shape as p.
| counts = [1., 0, 3]
| dist.prob(counts) # Shape []
|
| # p will be broadcast to [[.2, .3, .5], [.2, .3, .5]] to match counts.
| counts = [[1., 2, 1], [2, 2, 0]]
| dist.prob(counts) # Shape [2]
|
| # p will be broadcast to shape [5, 7, 3] to match counts.
| counts = [[...]] # Shape [5, 7, 3]
| dist.prob(counts) # Shape [5, 7]
| ```
|
| Create a 2-batch of 3-class distributions.
|
| ```python
| p = [[.1, .2, .7], [.3, .3, .4]] # Shape [2, 3]
| dist = Multinomial(total_count=[4., 5], probs=p)
|
| counts = [[2., 1, 1], [3, 1, 1]]
| dist.prob(counts) # Shape [2]
| ```
|
| Method resolution order:
| Multinomial
| edward.models.random_variable.RandomVariable
| tensorflow.contrib.distributions.python.ops.multinomial.Multinomial
| tensorflow.contrib.distributions.python.ops.distribution.Distribution
| tensorflow.contrib.distributions.python.ops.distribution._BaseDistribution
| builtins.object
|
| Methods defined here:
|
| conjugate_log_prob = wrapped(self, val=None)
|
| ----------------------------------------------------------------------
| Data and other attributes defined here:
|
| __abstractmethods__ = frozenset()
|
| support = 'onehot'
|
| ----------------------------------------------------------------------
| Methods inherited from edward.models.random_variable.RandomVariable:
|
| __abs__(self)
|
| __add__(self, other)
|
| __and__(self, other)
|
| __bool__(self)
|
| __div__(self, other)
|
| __eq__(self, other)
| Return self==value.
|
| __floordiv__(self, other)
|
| __ge__(self, other)
| Return self>=value.
|
| __getitem__(self, key)
| Subset the tensor associated to the random variable, not the
| random variable itself.
|
| __gt__(self, other)
| Return self>value.
|
| __hash__(self)
| Return hash(self).
|
| __init__(self, *args, **kwargs)
| Parameters
| ----------
| sample_shape : tf.TensorShape, optional
| Shape of samples to draw from the random variable.
| value : tf.Tensor, optional
| Fixed tensor to associate with random variable. Must have shape
| ``sample_shape + batch_shape + event_shape``.
| collections : list, optional
| Optional list of graph collections keys. The random variable is
| added to these collections. Defaults to ["random_variables"].
| *args, **kwargs
| Passed into parent ``__init__``.
|
| __invert__(self)
|
| __iter__(self)
|
| __le__(self, other)
| Return self<=value.
|
| __lt__(self, other)
| Return self<value.
|
| __mod__(self, other)
|
| __mul__(self, other)
|
| __neg__(self)
|
| __nonzero__(self)
|
| __or__(self, other)
|
| __pow__(self, other)
|
| __radd__(self, other)
|
| __rand__(self, other)
|
| __rdiv__(self, other)
|
| __repr__(self)
| Return repr(self).
|
| __rfloordiv__(self, other)
|
| __rmod__(self, other)
|
| __rmul__(self, other)
|
| __ror__(self, other)
|
| __rpow__(self, other)
|
| __rsub__(self, other)
|
| __rtruediv__ = __rdiv__(self, other)
|
| __rxor__(self, other)
|
| __str__(self)
| Return str(self).
|
| __sub__(self, other)
|
| __truediv__ = __div__(self, other)
|
| __xor__(self, other)
|
| eval(self, session=None, feed_dict=None)
| In a session, computes and returns the value of this random variable.
|
| This is not a graph construction method, it does not add ops to the graph.
|
| This convenience method requires a session where the graph
| containing this variable has been launched. If no session is
| passed, the default session is used.
|
| Parameters
| ----------
| session : tf.BaseSession, optional
| The ``tf.Session`` to use to evaluate this random variable. If
| none, the default session is used.
| feed_dict : dict, optional
| A dictionary that maps ``tf.Tensor`` objects to feed values. See
| ``tf.Session.run()`` for a description of the valid feed values.
|
| Examples
| --------
| >>> x = Normal(0.0, 1.0)
| >>> with tf.Session() as sess:
| >>> # Usage passing the session explicitly.
| >>> print(x.eval(sess))
| >>> # Usage with the default session. The 'with' block
| >>> # above makes 'sess' the default session.
| >>> print(x.eval())
|
| get_ancestors(self, collection=None)
| Get ancestor random variables.
|
| get_blanket(self, collection=None)
| Get the random variable's Markov blanket.
|
| get_children(self, collection=None)
| Get child random variables.
|
| get_descendants(self, collection=None)
| Get descendant random variables.
|
| get_parents(self, collection=None)
| Get parent random variables.
|
| get_shape(self)
| Get shape of random variable.
|
| get_siblings(self, collection=None)
| Get sibling random variables.
|
| get_variables(self, collection=None)
| Get TensorFlow variables that the random variable depends on.
|
| value(self)
| Get tensor that the random variable corresponds to.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from edward.models.random_variable.RandomVariable:
|
| __dict__
| dictionary for instance variables (if defined)
|
| __weakref__
| list of weak references to the object (if defined)
|
| sample_shape
| Sample shape of random variable.
|
| shape
| Shape of random variable.
|
| unique_name
| Name of random variable with its unique scoping name. Use
| ``name`` to just get the name of the random variable.
|
| ----------------------------------------------------------------------
| Methods inherited from tensorflow.contrib.distributions.python.ops.multinomial.Multinomial:
|
| log_prob(self, value, name='log_prob')
| Log probability density/mass function.
|
|
| Additional documentation from `Multinomial`:
|
| For each batch of counts, `value = [n_0, ...
| ,n_{k-1}]`, `P[value]` is the probability that after sampling `self.total_count`
| draws from this Multinomial distribution, the number of draws falling in class
| `j` is `n_j`. Since this definition is [exchangeable](
| https://en.wikipedia.org/wiki/Exchangeable_random_variables); different
| sequences have the same counts so the probability includes a combinatorial
| coefficient.
|
| Note: `value` must be a non-negative tensor with dtype `self.dtype`, have no
| fractional components, and such that
| `tf.reduce_sum(value, -1) = self.total_count`. Its shape must be broadcastable
| with `self.probs` and `self.total_count`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| log_prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| prob(self, value, name='prob')
| Probability density/mass function.
|
|
| Additional documentation from `Multinomial`:
|
| For each batch of counts, `value = [n_0, ...
| ,n_{k-1}]`, `P[value]` is the probability that after sampling `self.total_count`
| draws from this Multinomial distribution, the number of draws falling in class
| `j` is `n_j`. Since this definition is [exchangeable](
| https://en.wikipedia.org/wiki/Exchangeable_random_variables); different
| sequences have the same counts so the probability includes a combinatorial
| coefficient.
|
| Note: `value` must be a non-negative tensor with dtype `self.dtype`, have no
| fractional components, and such that
| `tf.reduce_sum(value, -1) = self.total_count`. Its shape must be broadcastable
| with `self.probs` and `self.total_count`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| prob: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from tensorflow.contrib.distributions.python.ops.multinomial.Multinomial:
|
| logits
| Vector of coordinatewise logits.
|
| probs
| Probability of of drawing a `1` in that coordinate.
|
| total_count
| Number of trials used to construct a sample.
|
| ----------------------------------------------------------------------
| Methods inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| batch_shape_tensor(self, name='batch_shape_tensor')
| Shape of a single sample from a single event index as a 1-D `Tensor`.
|
| The batch dimensions are indexes into independent, non-identical
| parameterizations of this distribution.
|
| Args:
| name: name to give to the op
|
| Returns:
| batch_shape: `Tensor`.
|
| cdf(self, value, name='cdf')
| Cumulative distribution function.
|
| Given random variable `X`, the cumulative distribution function `cdf` is:
|
| ```none
| cdf(x) := P[X <= x]
| ```
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| cdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| copy(self, **override_parameters_kwargs)
| Creates a deep copy of the distribution.
|
| Note: the copy distribution may continue to depend on the original
| initialization arguments.
|
| Args:
| **override_parameters_kwargs: String/value dictionary of initialization
| arguments to override with new values.
|
| Returns:
| distribution: A new instance of `type(self)` initialized from the union
| of self.parameters and override_parameters_kwargs, i.e.,
| `dict(self.parameters, **override_parameters_kwargs)`.
|
| covariance(self, name='covariance')
| Covariance.
|
| Covariance is (possibly) defined only for non-scalar-event distributions.
|
| For example, for a length-`k`, vector-valued distribution, it is calculated
| as,
|
| ```none
| Cov[i, j] = Covariance(X_i, X_j) = E[(X_i - E[X_i]) (X_j - E[X_j])]
| ```
|
| where `Cov` is a (batch of) `k x k` matrix, `0 <= (i, j) < k`, and `E`
| denotes expectation.
|
| Alternatively, for non-vector, multivariate distributions (e.g.,
| matrix-valued, Wishart), `Covariance` shall return a (batch of) matrices
| under some vectorization of the events, i.e.,
|
| ```none
| Cov[i, j] = Covariance(Vec(X)_i, Vec(X)_j) = [as above]
| ```
|
| where `Cov` is a (batch of) `k' x k'` matrices,
| `0 <= (i, j) < k' = reduce_prod(event_shape)`, and `Vec` is some function
| mapping indices of this distribution's event dimensions to indices of a
| length-`k'` vector.
|
| Args:
| name: The name to give this op.
|
| Returns:
| covariance: Floating-point `Tensor` with shape `[B1, ..., Bn, k', k']`
| where the first `n` dimensions are batch coordinates and
| `k' = reduce_prod(self.event_shape)`.
|
| entropy(self, name='entropy')
| Shannon entropy in nats.
|
| event_shape_tensor(self, name='event_shape_tensor')
| Shape of a single sample from a single batch as a 1-D int32 `Tensor`.
|
| Args:
| name: name to give to the op
|
| Returns:
| event_shape: `Tensor`.
|
| is_scalar_batch(self, name='is_scalar_batch')
| Indicates that `batch_shape == []`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| is_scalar_batch: `bool` scalar `Tensor`.
|
| is_scalar_event(self, name='is_scalar_event')
| Indicates that `event_shape == []`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| is_scalar_event: `bool` scalar `Tensor`.
|
| log_cdf(self, value, name='log_cdf')
| Log cumulative distribution function.
|
| Given random variable `X`, the cumulative distribution function `cdf` is:
|
| ```none
| log_cdf(x) := Log[ P[X <= x] ]
| ```
|
| Often, a numerical approximation can be used for `log_cdf(x)` that yields
| a more accurate answer than simply taking the logarithm of the `cdf` when
| `x << -1`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| logcdf: a `Tensor` of shape `sample_shape(x) + self.batch_shape` with
| values of type `self.dtype`.
|
| log_survival_function(self, value, name='log_survival_function')
| Log survival function.
|
| Given random variable `X`, the survival function is defined:
|
| ```none
| log_survival_function(x) = Log[ P[X > x] ]
| = Log[ 1 - P[X <= x] ]
| = Log[ 1 - cdf(x) ]
| ```
|
| Typically, different numerical approximations can be used for the log
| survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`.
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
| `self.dtype`.
|
| mean(self, name='mean')
| Mean.
|
| mode(self, name='mode')
| Mode.
|
| sample(self, sample_shape=(), seed=None, name='sample')
| Generate samples of the specified shape.
|
| Note that a call to `sample()` without arguments will generate a single
| sample.
|
| Args:
| sample_shape: 0D or 1D `int32` `Tensor`. Shape of the generated samples.
| seed: Python integer seed for RNG
| name: name to give to the op.
|
| Returns:
| samples: a `Tensor` with prepended dimensions `sample_shape`.
|
| stddev(self, name='stddev')
| Standard deviation.
|
| Standard deviation is defined as,
|
| ```none
| stddev = E[(X - E[X])**2]**0.5
| ```
|
| where `X` is the random variable associated with this distribution, `E`
| denotes expectation, and `stddev.shape = batch_shape + event_shape`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| stddev: Floating-point `Tensor` with shape identical to
| `batch_shape + event_shape`, i.e., the same shape as `self.mean()`.
|
| survival_function(self, value, name='survival_function')
| Survival function.
|
| Given random variable `X`, the survival function is defined:
|
| ```none
| survival_function(x) = P[X > x]
| = 1 - P[X <= x]
| = 1 - cdf(x).
| ```
|
| Args:
| value: `float` or `double` `Tensor`.
| name: The name to give this op.
|
| Returns:
| `Tensor` of shape `sample_shape(x) + self.batch_shape` with values of type
| `self.dtype`.
|
| variance(self, name='variance')
| Variance.
|
| Variance is defined as,
|
| ```none
| Var = E[(X - E[X])**2]
| ```
|
| where `X` is the random variable associated with this distribution, `E`
| denotes expectation, and `Var.shape = batch_shape + event_shape`.
|
| Args:
| name: The name to give this op.
|
| Returns:
| variance: Floating-point `Tensor` with shape identical to
| `batch_shape + event_shape`, i.e., the same shape as `self.mean()`.
|
| ----------------------------------------------------------------------
| Class methods inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| param_shapes(sample_shape, name='DistributionParamShapes') from tensorflow.contrib.distributions.python.ops.distribution._DistributionMeta
| Shapes of parameters given the desired shape of a call to `sample()`.
|
| This is a class method that describes what key/value arguments are required
| to instantiate the given `Distribution` so that a particular shape is
| returned for that instance's call to `sample()`.
|
| Subclasses should override class method `_param_shapes`.
|
| Args:
| sample_shape: `Tensor` or python list/tuple. Desired shape of a call to
| `sample()`.
| name: name to prepend ops with.
|
| Returns:
| `dict` of parameter name to `Tensor` shapes.
|
| param_static_shapes(sample_shape) from tensorflow.contrib.distributions.python.ops.distribution._DistributionMeta
| param_shapes with static (i.e. `TensorShape`) shapes.
|
| This is a class method that describes what key/value arguments are required
| to instantiate the given `Distribution` so that a particular shape is
| returned for that instance's call to `sample()`. Assumes that the sample's
| shape is known statically.
|
| Subclasses should override class method `_param_shapes` to return
| constant-valued tensors when constant values are fed.
|
| Args:
| sample_shape: `TensorShape` or python list/tuple. Desired shape of a call
| to `sample()`.
|
| Returns:
| `dict` of parameter name to `TensorShape`.
|
| Raises:
| ValueError: if `sample_shape` is a `TensorShape` and is not fully defined.
|
| ----------------------------------------------------------------------
| Data descriptors inherited from tensorflow.contrib.distributions.python.ops.distribution.Distribution:
|
| allow_nan_stats
| Python `bool` describing behavior when a stat is undefined.
|
| Stats return +/- infinity when it makes sense. E.g., the variance of a
| Cauchy distribution is infinity. However, sometimes the statistic is
| undefined, e.g., if a distribution's pdf does not achieve a maximum within
| the support of the distribution, the mode is undefined. If the mean is
| undefined, then by definition the variance is undefined. E.g. the mean for
| Student's T for df = 1 is undefined (no clear way to say it is either + or -
| infinity), so the variance = E[(X - mean)**2] is also undefined.
|
| Returns:
| allow_nan_stats: Python `bool`.
|
| batch_shape
| Shape of a single sample from a single event index as a `TensorShape`.
|
| May be partially defined or unknown.
|
| The batch dimensions are indexes into independent, non-identical
| parameterizations of this distribution.
|
| Returns:
| batch_shape: `TensorShape`, possibly unknown.
|
| dtype
| The `DType` of `Tensor`s handled by this `Distribution`.
|
| event_shape
| Shape of a single sample from a single batch as a `TensorShape`.
|
| May be partially defined or unknown.
|
| Returns:
| event_shape: `TensorShape`, possibly unknown.
|
| name
| Name prepended to all ops created by this `Distribution`.
|
| parameters
| Dictionary of parameters used to instantiate this `Distribution`.
|
| reparameterization_type
| Describes how samples from the distribution are reparameterized.
|
| Currently this is one of the static instances
| `distributions.FULLY_REPARAMETERIZED`
| or `distributions.NOT_REPARAMETERIZED`.
|
| Returns:
| An instance of `ReparameterizationType`.
|
| validate_args
| Python `bool` indicating possibly expensive checks are enabled.
In [ ]:
Content source: jessegeerts/neural-nets
Similar notebooks: