In [1]:
%pylab inline
In [2]:
from scipy.stats import norm
In [3]:
# fake some data
data = norm.rvs(loc=0.0, scale=1.0, size =150)
plt.hist(data, rwidth=0.85, facecolor='black');
plt.ylabel('Number of events');
plt.xlabel('Value');
This is simply the mean and the standard deviation of the sample data
In [4]:
mean, stdev = norm.fit(data)
print('Mean =%f, Stdev=%f'%(mean,stdev))
To adapt the normalized PDF of the normal distribution we simply have to multiply every value by the area of the histogram obtained
In [5]:
histdata = plt.hist(data, bins=10, color='black', rwidth=.85) # we set 10 bins
In [8]:
counts, binedge = np.histogram(data, bins=10);
print(binedge)
In [9]:
#G et bincenters from bin edges
bincenter = [0.5 * (binedge[i] + binedge[i+1]) for i in xrange(len(binedge)-1)]
In [10]:
bincenter
Out[10]:
In [11]:
binwidth = (max(bincenter) - min(bincenter)) / len(bincenter)
print(binwidth)
In [12]:
x = np.linspace( start = -4 , stop = 4, num = 100)
mynorm = norm(loc = mean, scale = stdev)
In [13]:
# Scale Norm PDF to the area (binwidth)*number of samples of the histogram
myfit = mynorm.pdf(x)*binwidth*len(data)
In [14]:
# Plot everthing together
plt.hist(data, bins=10, facecolor='white', histtype='stepfilled');
plt.fill(x, myfit, 'r', alpha=.5);
plt.ylabel('Number of observations');
plt.xlabel('Value');