In [20]:
from astropy.stats import bayesian_blocks
from astropy.stats import histogram
from astropy.visualization import hist
import healpix_util as hu
import astropy as ap
import numpy as np
from astropy.io import fits
from astropy.table import Table
import astropy.io.ascii as ascii
from astropy.constants import c
import matplotlib.pyplot as plt
import math
import scipy.special as sp
from scipy import integrate
%matplotlib inline

In [2]:
t = np.random.normal(size=1000)
t[80:] = t[:20]
edges = bayesian_blocks(t, fitness='events', p0=0.01)


WARNING: p0 does not seem to accurately represent the false positive rate for event data. It is highly recommended that you run random trials on signal-free noise to calibrate ncp_prior to achieve a desired false positive rate. [astropy.stats.bayesian_blocks]

In [3]:
plt.hist(edges)


Out[3]:
(array([ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.,  1.]),
 array([-2.31543303, -1.84630021, -1.37716739, -0.90803457, -0.43890175,
         0.03023106,  0.49936388,  0.9684967 ,  1.43762952,  1.90676234,
         2.37589516]),
 <a list of 10 Patch objects>)

In [4]:
edges


Out[4]:
array([-2.31543303, -0.71504841,  0.71304904,  2.37589516])

In [5]:
t


Out[5]:
array([-0.36900026, -0.13407204,  2.37589516,  1.55550514, -0.70465455,
        1.27748591, -0.31035756, -0.19080958, -1.30987381, -0.63578801,
        0.27085577,  0.44936278, -0.24964019, -2.17967096,  0.66209108,
       -0.4097776 , -1.69210694,  0.22849402, -0.26311481, -0.216975  ,
       -0.0145258 , -1.30502953,  0.49128205,  1.74756964, -0.12063942,
        0.04320932, -1.63401441,  0.42509747,  0.88335911, -0.72544227,
       -1.13804694,  1.78247158, -1.00219071, -0.27423841,  0.52074478,
        0.06040856, -0.68350824,  0.54519608, -0.5188705 , -2.31543303,
        0.41435898,  0.36384442, -0.07775946, -0.66613716,  1.00939913,
        0.39877216,  0.47023857, -1.03387214,  1.16043863,  1.17992033,
        0.31031614, -0.80941523, -1.62611866, -1.62873771, -0.4375572 ,
       -0.24452085,  1.86994442, -0.60477912, -0.55331025, -0.43583892,
       -0.65116038, -0.90040116,  0.55276931,  0.76400701,  0.62062027,
        0.52777174, -1.09609309, -0.65079643, -0.67771938, -0.81370947,
       -0.96102394, -0.21826609,  0.20723307,  0.03146648, -1.40289577,
       -2.20423738, -0.08092451,  0.46863327,  1.61866805,  1.20940738,
       -0.36900026, -0.13407204,  2.37589516,  1.55550514, -0.70465455,
        1.27748591, -0.31035756, -0.19080958, -1.30987381, -0.63578801,
        0.27085577,  0.44936278, -0.24964019, -2.17967096,  0.66209108,
       -0.4097776 , -1.69210694,  0.22849402, -0.26311481, -0.216975  ])

In [7]:
np.sort(t)


Out[7]:
array([-2.31543303, -2.20423738, -2.17967096, -2.17967096, -1.69210694,
       -1.69210694, -1.63401441, -1.62873771, -1.62611866, -1.40289577,
       -1.30987381, -1.30987381, -1.30502953, -1.13804694, -1.09609309,
       -1.03387214, -1.00219071, -0.96102394, -0.90040116, -0.81370947,
       -0.80941523, -0.72544227, -0.70465455, -0.70465455, -0.68350824,
       -0.67771938, -0.66613716, -0.65116038, -0.65079643, -0.63578801,
       -0.63578801, -0.60477912, -0.55331025, -0.5188705 , -0.4375572 ,
       -0.43583892, -0.4097776 , -0.4097776 , -0.36900026, -0.36900026,
       -0.31035756, -0.31035756, -0.27423841, -0.26311481, -0.26311481,
       -0.24964019, -0.24964019, -0.24452085, -0.21826609, -0.216975  ,
       -0.216975  , -0.19080958, -0.19080958, -0.13407204, -0.13407204,
       -0.12063942, -0.08092451, -0.07775946, -0.0145258 ,  0.03146648,
        0.04320932,  0.06040856,  0.20723307,  0.22849402,  0.22849402,
        0.27085577,  0.27085577,  0.31031614,  0.36384442,  0.39877216,
        0.41435898,  0.42509747,  0.44936278,  0.44936278,  0.46863327,
        0.47023857,  0.49128205,  0.52074478,  0.52777174,  0.54519608,
        0.55276931,  0.62062027,  0.66209108,  0.66209108,  0.76400701,
        0.88335911,  1.00939913,  1.16043863,  1.17992033,  1.20940738,
        1.27748591,  1.27748591,  1.55550514,  1.55550514,  1.61866805,
        1.74756964,  1.78247158,  1.86994442,  2.37589516,  2.37589516])

In [9]:
histogram(t,'blocks')


Out[9]:
(array([22, 62, 16]),
 array([-2.31543303, -0.71504841,  0.71304904,  2.37589516]))

In [11]:
plt.hist(histogram(t,'blocks'))


Out[11]:
([array([ 0.,  0.,  1.,  1.,  0.,  0.,  0.,  0.,  0.,  1.]),
  array([ 4.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])],
 array([ -2.31543303,   4.11611027,  10.54765358,  16.97919688,
         23.41074018,  29.84228349,  36.27382679,  42.70537009,
         49.13691339,  55.5684567 ,  62.        ]),
 <a list of 2 Lists of Patches objects>)

In [14]:
plt.hist(t,bins=100)


Out[14]:
(array([ 1.,  0.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         2.,  3.,  0.,  0.,  0.,  0.,  1.,  0.,  3.,  0.,  0.,  0.,  2.,
         0.,  2.,  1.,  0.,  1.,  0.,  2.,  1.,  4.,  5.,  1.,  1.,  1.,
         0.,  4.,  2.,  2.,  3.,  6.,  2.,  3.,  2.,  0.,  1.,  3.,  0.,
         0.,  1.,  2.,  3.,  0.,  2.,  4.,  3.,  3.,  1.,  1.,  2.,  0.,
         1.,  0.,  0.,  1.,  0.,  1.,  0.,  0.,  0.,  2.,  1.,  2.,  0.,
         0.,  0.,  0.,  0.,  2.,  1.,  0.,  0.,  1.,  1.,  0.,  1.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  2.]),
 array([-2.31543303, -2.26851975, -2.22160647, -2.17469318, -2.1277799 ,
        -2.08086662, -2.03395334, -1.98704006, -1.94012677, -1.89321349,
        -1.84630021, -1.79938693, -1.75247365, -1.70556036, -1.65864708,
        -1.6117338 , -1.56482052, -1.51790724, -1.47099396, -1.42408067,
        -1.37716739, -1.33025411, -1.28334083, -1.23642755, -1.18951426,
        -1.14260098, -1.0956877 , -1.04877442, -1.00186114, -0.95494786,
        -0.90803457, -0.86112129, -0.81420801, -0.76729473, -0.72038145,
        -0.67346816, -0.62655488, -0.5796416 , -0.53272832, -0.48581504,
        -0.43890175, -0.39198847, -0.34507519, -0.29816191, -0.25124863,
        -0.20433535, -0.15742206, -0.11050878, -0.0635955 , -0.01668222,
         0.03023106,  0.07714435,  0.12405763,  0.17097091,  0.21788419,
         0.26479747,  0.31171075,  0.35862404,  0.40553732,  0.4524506 ,
         0.49936388,  0.54627716,  0.59319045,  0.64010373,  0.68701701,
         0.73393029,  0.78084357,  0.82775686,  0.87467014,  0.92158342,
         0.9684967 ,  1.01540998,  1.06232326,  1.10923655,  1.15614983,
         1.20306311,  1.24997639,  1.29688967,  1.34380296,  1.39071624,
         1.43762952,  1.4845428 ,  1.53145608,  1.57836937,  1.62528265,
         1.67219593,  1.71910921,  1.76602249,  1.81293577,  1.85984906,
         1.90676234,  1.95367562,  2.0005889 ,  2.04750218,  2.09441547,
         2.14132875,  2.18824203,  2.23515531,  2.28206859,  2.32898187,
         2.37589516]),
 <a list of 100 Patch objects>)

In [22]:
t = np.random.normal(size=1000)
plt.figure()
plt.hist(t,bins=100)
hist(t,'blocks')


Out[22]:
(array([   5.,   31.,   93.,  138.,  438.,  184.,   71.,   40.]),
 array([-3.81328585, -2.4112474 , -1.82123886, -1.11508241, -0.62637691,
         0.45952958,  1.21640712,  1.78780077,  2.73960227]),
 <a list of 8 Patch objects>)

In [23]:
plt.hist(t,bins=100)


Out[23]:
(array([  1.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          0.,   0.,   1.,   0.,   0.,   1.,   0.,   0.,   1.,   0.,   3.,
          3.,   2.,   3.,   2.,   6.,   4.,   5.,   2.,  10.,   9.,  10.,
          5.,   6.,   6.,  12.,   7.,   7.,  11.,  10.,  17.,  24.,  18.,
         26.,  13.,  15.,  19.,  18.,  27.,  28.,  20.,  24.,  28.,  22.,
         28.,  31.,  31.,  13.,  32.,  24.,  26.,  29.,  27.,  32.,  15.,
         19.,  15.,  17.,  18.,  15.,  21.,  15.,  12.,  19.,  13.,  13.,
          9.,   6.,   7.,  11.,  11.,   9.,   7.,   5.,   6.,   5.,   2.,
          4.,   4.,   2.,   4.,   7.,   1.,   3.,   1.,   2.,   0.,   1.,
          2.]),
 array([-3.81328585, -3.74775696, -3.68222808, -3.6166992 , -3.55117032,
        -3.48564144, -3.42011256, -3.35458368, -3.2890548 , -3.22352592,
        -3.15799703, -3.09246815, -3.02693927, -2.96141039, -2.89588151,
        -2.83035263, -2.76482375, -2.69929487, -2.63376598, -2.5682371 ,
        -2.50270822, -2.43717934, -2.37165046, -2.30612158, -2.2405927 ,
        -2.17506382, -2.10953494, -2.04400605, -1.97847717, -1.91294829,
        -1.84741941, -1.78189053, -1.71636165, -1.65083277, -1.58530389,
        -1.519775  , -1.45424612, -1.38871724, -1.32318836, -1.25765948,
        -1.1921306 , -1.12660172, -1.06107284, -0.99554396, -0.93001507,
        -0.86448619, -0.79895731, -0.73342843, -0.66789955, -0.60237067,
        -0.53684179, -0.47131291, -0.40578402, -0.34025514, -0.27472626,
        -0.20919738, -0.1436685 , -0.07813962, -0.01261074,  0.05291814,
         0.11844702,  0.18397591,  0.24950479,  0.31503367,  0.38056255,
         0.44609143,  0.51162031,  0.57714919,  0.64267807,  0.70820696,
         0.77373584,  0.83926472,  0.9047936 ,  0.97032248,  1.03585136,
         1.10138024,  1.16690912,  1.232438  ,  1.29796689,  1.36349577,
         1.42902465,  1.49455353,  1.56008241,  1.62561129,  1.69114017,
         1.75666905,  1.82219794,  1.88772682,  1.9532557 ,  2.01878458,
         2.08431346,  2.14984234,  2.21537122,  2.2809001 ,  2.34642898,
         2.41195787,  2.47748675,  2.54301563,  2.60854451,  2.67407339,
         2.73960227]),
 <a list of 100 Patch objects>)

In [24]:
hmqdat=ascii.read("./hmqdata_sorted_full.csv")
hmqdat


Out[24]:
<Table length=510355>
ZRADEC
float64float64float64
-0.002178.95169161.598303
-0.001143.5277939.44227
-0.001217.26922112.069508
0.02.2663936.472577
0.07.306389-28.213611
0.010.10375-23.666889
0.017.522778-31.037222
0.025.4575-31.127222
0.044.53027820.500556
0.044.99-29.338889
.........
6.2512.52777834.756389
6.29177.0137087.035639
6.305157.6130565.415278
6.43352.284722-3.033056
6.43832.555-4.939167
6.44177.06936352.863975
6.60446.3205-31.848889
6.74517.471375-30.790639
6.886357.138917-30.902778
7.085170.0061676.690083

In [28]:
plt.hist(hmqdat['Z'])


Out[28]:
(array([  9.60460000e+04,   1.07042000e+05,   1.05669000e+05,
          1.51404000e+05,   4.12070000e+04,   7.77500000e+03,
          1.00800000e+03,   1.26000000e+02,   7.10000000e+01,
          7.00000000e+00]),
 array([ -2.00000000e-03,   7.06700000e-01,   1.41540000e+00,
          2.12410000e+00,   2.83280000e+00,   3.54150000e+00,
          4.25020000e+00,   4.95890000e+00,   5.66760000e+00,
          6.37630000e+00,   7.08500000e+00]),
 <a list of 10 Patch objects>)

In [26]:
help(plt.hist)


Help on function hist in module matplotlib.pyplot:

hist(x, bins=None, range=None, normed=False, weights=None, cumulative=False, bottom=None, histtype=u'bar', align=u'mid', orientation=u'vertical', rwidth=None, log=False, color=None, label=None, stacked=False, hold=None, data=None, **kwargs)
    Plot a histogram.
    
    Compute and draw the histogram of *x*. The return value is a
    tuple (*n*, *bins*, *patches*) or ([*n0*, *n1*, ...], *bins*,
    [*patches0*, *patches1*,...]) if the input contains multiple
    data.
    
    Multiple data can be provided via *x* as a list of datasets
    of potentially different length ([*x0*, *x1*, ...]), or as
    a 2-D ndarray in which each column is a dataset.  Note that
    the ndarray form is transposed relative to the list form.
    
    Masked arrays are not supported at present.
    
    Parameters
    ----------
    x : (n,) array or sequence of (n,) arrays
        Input values, this takes either a single array or a sequency of
        arrays which are not required to be of the same length
    
    bins : integer or array_like or 'auto', optional
        If an integer is given, `bins + 1` bin edges are returned,
        consistently with :func:`numpy.histogram` for numpy version >=
        1.3.
    
        Unequally spaced bins are supported if `bins` is a sequence.
    
        If Numpy 1.11 is installed, may also be ``'auto'``.
    
        Default is taken from the rcParam ``hist.bins``.
    
    range : tuple or None, optional
        The lower and upper range of the bins. Lower and upper outliers
        are ignored. If not provided, `range` is (x.min(), x.max()). Range
        has no effect if `bins` is a sequence.
    
        If `bins` is a sequence or `range` is specified, autoscaling
        is based on the specified bin range instead of the
        range of x.
    
        Default is ``None``
    
    normed : boolean, optional
        If `True`, the first element of the return tuple will
        be the counts normalized to form a probability density, i.e.,
        ``n/(len(x)`dbin)``, i.e., the integral of the histogram will sum
        to 1. If *stacked* is also *True*, the sum of the histograms is
        normalized to 1.
    
        Default is ``False``
    
    weights : (n, ) array_like or None, optional
        An array of weights, of the same shape as `x`.  Each value in `x`
        only contributes its associated weight towards the bin count
        (instead of 1).  If `normed` is True, the weights are normalized,
        so that the integral of the density over the range remains 1.
    
        Default is ``None``
    
    cumulative : boolean, optional
        If `True`, then a histogram is computed where each bin gives the
        counts in that bin plus all bins for smaller values. The last bin
        gives the total number of datapoints.  If `normed` is also `True`
        then the histogram is normalized such that the last bin equals 1.
        If `cumulative` evaluates to less than 0 (e.g., -1), the direction
        of accumulation is reversed.  In this case, if `normed` is also
        `True`, then the histogram is normalized such that the first bin
        equals 1.
    
        Default is ``False``
    
    bottom : array_like, scalar, or None
        Location of the bottom baseline of each bin.  If a scalar,
        the base line for each bin is shifted by the same amount.
        If an array, each bin is shifted independently and the length
        of bottom must match the number of bins.  If None, defaults to 0.
    
        Default is ``None``
    
    histtype : {'bar', 'barstacked', 'step',  'stepfilled'}, optional
        The type of histogram to draw.
    
        - 'bar' is a traditional bar-type histogram.  If multiple data
          are given the bars are aranged side by side.
    
        - 'barstacked' is a bar-type histogram where multiple
          data are stacked on top of each other.
    
        - 'step' generates a lineplot that is by default
          unfilled.
    
        - 'stepfilled' generates a lineplot that is by default
          filled.
    
        Default is 'bar'
    
    align : {'left', 'mid', 'right'}, optional
        Controls how the histogram is plotted.
    
            - 'left': bars are centered on the left bin edges.
    
            - 'mid': bars are centered between the bin edges.
    
            - 'right': bars are centered on the right bin edges.
    
        Default is 'mid'
    
    orientation : {'horizontal', 'vertical'}, optional
        If 'horizontal', `~matplotlib.pyplot.barh` will be used for
        bar-type histograms and the *bottom* kwarg will be the left edges.
    
    rwidth : scalar or None, optional
        The relative width of the bars as a fraction of the bin width.  If
        `None`, automatically compute the width.
    
        Ignored if `histtype` is 'step' or 'stepfilled'.
    
        Default is ``None``
    
    log : boolean, optional
        If `True`, the histogram axis will be set to a log scale. If `log`
        is `True` and `x` is a 1D array, empty bins will be filtered out
        and only the non-empty (`n`, `bins`, `patches`) will be returned.
    
        Default is ``False``
    
    color : color or array_like of colors or None, optional
        Color spec or sequence of color specs, one per dataset.  Default
        (`None`) uses the standard line color sequence.
    
        Default is ``None``
    
    label : string or None, optional
        String, or sequence of strings to match multiple datasets.  Bar
        charts yield multiple patches per dataset, but only the first gets
        the label, so that the legend command will work as expected.
    
        default is ``None``
    
    stacked : boolean, optional
        If `True`, multiple data are stacked on top of each other If
        `False` multiple data are aranged side by side if histtype is
        'bar' or on top of each other if histtype is 'step'
    
        Default is ``False``
    
    Returns
    -------
    n : array or list of arrays
        The values of the histogram bins. See **normed** and **weights**
        for a description of the possible semantics. If input **x** is an
        array, then this is an array of length **nbins**. If input is a
        sequence arrays ``[data1, data2,..]``, then this is a list of
        arrays with the values of the histograms for each of the arrays
        in the same order.
    
    bins : array
        The edges of the bins. Length nbins + 1 (nbins left edges and right
        edge of last bin).  Always a single array even when multiple data
        sets are passed in.
    
    patches : list or list of lists
        Silent list of individual patches used to create the histogram
        or list of such list if multiple input datasets.
    
    Other Parameters
    ----------------
    kwargs : `~matplotlib.patches.Patch` properties
    
    See also
    --------
    hist2d : 2D histograms
    
    Notes
    -----
    Until numpy release 1.5, the underlying numpy histogram function was
    incorrect with `normed`=`True` if bin sizes were unequal.  MPL
    inherited that error.  It is now corrected within MPL when using
    earlier numpy versions.
    
    Examples
    --------
    .. plot:: mpl_examples/statistics/histogram_demo_features.py
    
    .. note::
        In addition to the above described arguments, this function can take a
        **data** keyword argument. If such a **data** argument is given, the
        following arguments are replaced by **data[<arg>]**:
    
        * All arguments with the following names: 'weights', 'x'.


In [30]:
hist(hmqdat['Z'],'knuth')


Out[30]:
(array([  3.,  64.,  20., ...,   0.,   0.,   1.]),
 array([ -2.00000000e-03,  -6.61567517e-04,   6.76864967e-04, ...,
          7.08232314e+00,   7.08366157e+00,   7.08500000e+00]),
 <a list of 5295 Patch objects>)

In [36]:
hist(hmqdat['Z'],bins='blocks')
plt.figure()


Out[36]:
<matplotlib.figure.Figure at 0x138fa9f50>
<matplotlib.figure.Figure at 0x138fa9f50>

In [32]:
help(hist)


Help on function hist in module astropy.visualization.hist:

hist(x, bins=10, ax=None, **kwargs)
    Enhanced histogram function
    
    This is a histogram function that enables the use of more sophisticated
    algorithms for determining bins.  Aside from the ``bins`` argument allowing
    a string specified how bins are computed, the parameters are the same
    as pylab.hist().
    
    This function was ported from astroML: http://astroML.org/
    
    Parameters
    ----------
    x : array_like
        array of data to be histogrammed
    
    bins : int or list or str (optional)
        If bins is a string, then it must be one of:
    
        - 'blocks' : use bayesian blocks for dynamic bin widths
    
        - 'knuth' : use Knuth's rule to determine bins
    
        - 'scott' : use Scott's rule to determine bins
    
        - 'freedman' : use the Freedman-diaconis rule to determine bins
    
    ax : Axes instance (optional)
        specify the Axes on which to draw the histogram.  If not specified,
        then the current active axes will be used.
    
    **kwargs :
        other keyword arguments are described in ``plt.hist()``.
    
    Notes
    -----
    Return values are the same as for ``plt.hist()``
    
    See Also
    --------
    astropy.stats.histogram


In [ ]: