In [1]:
# Necessary import evil
import physt
from physt import h1, h2, histogramdd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
np.random.seed(42)
In [2]:
# Some data
x = np.random.normal(100, 1, 1000)
y = np.random.normal(10, 10, 1000)
In [3]:
# Create a simple histogram
histogram = h2(x, y, [8, 4], name="Some histogram", axis_names=["x", "y"])
histogram
Out[3]:
In [4]:
# Frequencies are a 2D-array
histogram.frequencies
Out[4]:
In most cases, binning methods that apply for 1D histograms, can be used also in higher dimensions. In such cases, each parameter can be either scalar (applies to all dimensions) or a list/tuple with independent values for each dimension. This also applies for range that has to be list/tuple of tuples.
In [5]:
histogram = h2(x, y, "fixed_width", (2, 10), name="Fixed-width bins", axis_names=["x", "y"])
histogram.plot();
histogram.numpy_bins
Out[5]:
In [6]:
histogram = h2(x, y, "quantile", (3, 4), name="Quantile bins", axis_names=["x", "y"])
histogram.plot(cmap_min=0);
histogram.numpy_bins
Out[6]:
In [7]:
histogram = h2(x, y, "human", 5, name="Human-friendly bins", axis_names=["x", "y"])
histogram.plot();
histogram.numpy_bins
Out[7]:
In [8]:
# Default is workable
ax = histogram.plot()
In [9]:
# Custom colormap, no colorbar
import matplotlib.cm as cm
fig, ax = plt.subplots()
ax = histogram.plot(ax=ax, cmap=cm.copper, show_colorbar=False, grid_color=cm.copper(0.5))
ax.set_title("Custom colormap");
In [10]:
# Use a named colormap + limit it to a range of values
import matplotlib.cm as cm
fig, ax = plt.subplots()
ax = histogram.plot(ax=ax, cmap="Oranges", show_colorbar=True, cmap_min=20, cmap_max=100, show_values=True)
ax.set_title("Clipped colormap");
In [11]:
# Show labels (and hide zero bins), no grid(lw=0)
ax = histogram.plot(show_values=True, show_zero=False, cmap=cm.RdBu, format_value=float, lw=0)
Plotting histograms in this way gets problematic with more than roughly 50x50 bins. There is an alternative, though, partially inspired by the datashader
project - plot the histogram as bitmap, which works very fast even for very large histograms.
Note: This method does not work for histograms with irregular bins.
In [12]:
x = np.random.normal(100, 1, 1000000)
y = np.random.normal(10, 10, 1000000)
In [13]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
h2(x, y, 20, name="20 bins - map").plot("map", cmap="rainbow", lw=0, alpha=1, ax=axes[0], show_colorbar=False)
h2(x, y, 20, name="20 bins - image").plot("image", cmap="rainbow", alpha=1, ax=axes[1])
h2(x, y, 500, name="500 bins - image").plot("image", cmap="rainbow", alpha=1, ax=axes[2]);
See that the output is equivalent to map without lines.
In [14]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
h2(x, y, 20, name="20 bins - map").plot("map", alpha=1, lw=0, show_zero=False, cmap="rainbow", ax=axes[0], show_colorbar=False, cmap_normalize="log")
h2(x, y, 20, name="20 bins - image").plot("image", alpha=1, ax=axes[1], cmap="rainbow", cmap_normalize="log")
h2(x, y, 500, name="500 bins - image").plot("image", alpha=1, ax=axes[2], cmap="rainbow", cmap_normalize="log");
In [15]:
# Composition - show histogram overlayed with "points"
fig, ax = plt.subplots(figsize=(8, 7))
h_2 = h2(x, y, 30)
h_2.plot("map", lw=0, alpha=0.9, cmap="Blues", ax=ax, cmap_normalize="log", show_zero=False)
# h2(x, y, 300).plot("image", alpha=1, cmap="Greys", ax=ax, transform=lambda x: x > 0);
# Not working currently
Out[15]:
In [16]:
histogram.plot("bar3d", cmap="rainbow");
In [17]:
histogram.plot("bar3d", color="red");
In [18]:
proj1 = histogram.projection("x", name="Projection to X")
proj1.plot(errors=True)
proj1
Out[18]:
In [19]:
proj2 = histogram.projection("y", name="Projection to Y")
proj2.plot(errors=True)
proj2
Out[19]:
In [20]:
# Create and add two histograms with adaptive binning
height1 = np.random.normal(180, 5, 1000)
weight1 = np.random.normal(80, 2, 1000)
ad1 = h2(height1, weight1, "fixed_width", 1, adaptive=True)
ad1.plot(show_zero=False)
height2 = np.random.normal(160, 5, 1000)
weight2 = np.random.normal(70, 2, 1000)
ad2 = h2(height2, weight2, "fixed_width", 1, adaptive=True)
ad2.plot(show_zero=False)
(ad1 + ad2).plot(show_zero=False);
In [21]:
# Create a 4D histogram
data = [np.random.rand(1000)[:, np.newaxis] for i in range(4)]
data = np.concatenate(data, axis=1)
h4 = histogramdd(data, [3, 2, 2, 3], axis_names="abcd")
h4
Out[21]:
In [22]:
h4.frequencies
Out[22]:
In [23]:
h4.projection("a", "d", name="4D -> 2D").plot(show_values=True, format_value=int, cmap_min="min");
In [24]:
h4.projection("d", name="4D -> 1D").plot("scatter", errors=True);
In [25]:
# Load notorious example data set
iris = sns.load_dataset('iris')
In [26]:
iris = sns.load_dataset('iris')
iris_hist = physt.h2(iris["sepal_length"], iris["sepal_width"], "human", (12, 7), name="Iris")
iris_hist.plot(show_zero=False, cmap=cm.gray_r, show_values=True, format_value=int);
In [27]:
iris_hist.projection("sepal_length").plot();