In [1]:
%matplotlib inline
from scipy import stats
import matplotlib.pyplot as plt

We generate some random variates from a non-normal distribution and make a probability plot for it, to show it is non-normal in the tails:


In [2]:
# Generate data
x = stats.loggamma.rvs(5, size=500) + 5

In [3]:
# Plot it
fig = plt.figure(figsize=(6,9))
ax1 = fig.add_subplot(211)
prob = stats.probplot(x, dist=stats.norm, plot=ax1)
ax1.set_title('Probplot against normal distribution')

# Plot an histogram
ax2 = fig.add_subplot(212)
ax2.hist(x)
ax2.set_title('Histogram')


Out[3]:
<matplotlib.text.Text at 0x7fb79b1246d0>

We now use boxcox to transform the data so it's closest to normal:


In [4]:
xt, _ = stats.boxcox(x)

In [5]:
# Plot the results
fig = plt.figure(figsize=(6,9))
ax1 = fig.add_subplot(211)
prob = stats.probplot(xt, dist=stats.norm, plot=ax1)
ax1.set_title('Probplot after Box-Cox transformation')

# Plot an histogram
ax2 = fig.add_subplot(212)
ax2.hist(xt)
ax2.set_title('Histogram')


Out[5]:
<matplotlib.text.Text at 0x7fb79b045810>

In [ ]: