In [1]:
# Necessary import evil
%matplotlib inline
from physt import histogram, binnings
import numpy as np
import matplotlib.pyplot as plt
In [2]:
# Some data
np.random.seed(42)
heights1 = np.random.normal(169, 10, 100000)
heights2 = np.random.normal(180, 6, 100000)
numbers = np.random.rand(100000)
In [3]:
X = [int(x) for x in np.logspace(0, 4, 50)]
algos = binnings.bincount_methods
Ys = { algo: [] for algo in algos}
for x in X:
ex_dataset = np.random.exponential(1, x)
for algo in algos:
Ys[algo].append(binnings.ideal_bin_count(ex_dataset, algo))
figure, axis = plt.subplots(figsize=(8, 8))
for algo in algos:
if algo == "default":
axis.plot(X, Ys[algo], ":.", label=algo, alpha=0.5, lw=2)
else:
axis.plot(X, Ys[algo], "-", label=algo, alpha=0.5, lw=2)
axis.set_xscale("log")
axis.set_yscale("log")
axis.set_xlabel("Sample size")
axis.set_ylabel("Bin count")
axis.legend(loc=2);
In [4]:
figure, axis = plt.subplots(1, 2, figsize=(10, 4))
hist1 = histogram(numbers, "exponential", 10, range=(0.0001, 1))
hist1.plot(color="green", ax=axis[0])
hist1.plot(density=True, errors=True, ax=axis[1])
axis[0].set_title("Absolute scale")
axis[1].set_title("Log scale")
axis[1].set_xscale("log");
In [5]:
# Sum of two dice (should be triangle, right?)
dice = np.floor(np.random.rand(10000) * 6) + np.floor(np.random.rand(10000) * 6) + 2
histogram(dice, "integer").plot(ticks="center", density=True);
In [6]:
figure, axis = plt.subplots(1, 2, figsize=(10, 4))
# bins2 = binning.quantile_bins(heights1, 40)
hist2 = histogram(heights1, "quantile", 40)
hist2.plot(ax=axis[0]);
hist2.plot(density=True, ax=axis[1]);
axis[0].set_title("Frequencies")
axis[1].set_title("Density");
In [7]:
figure, axis = plt.subplots()
histogram(heights1, "quantile", 10).plot(alpha=0.3, density=True, ax=axis, label="Quantile based")
histogram(heights1, 10).plot(alpha=0.3, density=True, ax=axis, color="green", label="Equal spaced")
axis.legend(loc=2);
In [8]:
hist_fixed = histogram(heights1, "fixed_width", 3)
hist_fixed.plot()
hist_fixed
Out[8]:
In [9]:
human = histogram(heights1, "human", 15)
human.plot()
human
Out[9]:
Astropy includes its histogramming tools. If this package is available, we reuse its binning methods. These include:
See http://docs.astropy.org/en/stable/visualization/histogram.html for more details.
In [10]:
middle_sized = np.random.normal(180, 6, 5000)
for n in ["blocks", "scott", "knuth", "freedman"]:
algo = "{0}".format(n)
hist = histogram(middle_sized, algo, name=algo)
hist.plot(density=True)