In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [41]:
import sys
sys.path.append('../scripts/')
import bicorr as bicorr
import bicorr_plot as bicorr_plot
import bicorr_math as bicorr_math
In [3]:
%load_ext autoreload
%autoreload 2
In [7]:
import inspect
print(inspect.getsource(bicorr_plot.step_plot))
In [8]:
x_samples = np.random.normal(0,5,150)
In [9]:
plt.figure(figsize=(4,2))
plt.hist(x_samples,np.arange(-10,10.1,1))
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.title('Histogram of randomly generated x data')
plt.show()
Store histogram data
In [10]:
counts, bin_edges = np.histogram(x_samples,np.arange(-10,10.1,1))
In [11]:
print(bin_edges.shape)
print(counts.shape)
In [12]:
bin_edges
Out[12]:
In [13]:
counts
Out[13]:
Look at it using my step plot function.
In [16]:
plt.figure(figsize=(4,2))
bicorr_plot.step_plot(bin_edges,counts)
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Histogram of randomly generated x data')
plt.show()
Calculate the average x value for a distribution where we have $C$ counts for each $x$ value. Assume there is no error in $x_i$.
$x_{ave} = \frac{\Sigma(x_i \cdot C_i)}{\Sigma C_i}$
I am going to split this up into the numerator and the denomenator in order to propagate the error.
$num = \Sigma(x_i \cdot C_i)$
$denom = \Sigma C_i$
Think of this as the sum of many products:
$num = x_1 c_1 + x_2 c_2 + ... = a + b + ...$
The error in each quantity is:
$\sigma_{x_i c_i} = x_i \sqrt{c_i}$
The error in the sum of these quantities is:
$\sigma_{num} = \sigma_{a+b+...} = \sqrt{\sigma_a^2+\sigma_b^2+...} = \sqrt{x_1^2 c_1 + x_2^2 c_2 + ...}$
$x_{ave} = \frac{num}{denom} = \frac{\Sigma(x_i \cdot C_i)}{\Sigma C_i}$
$\sigma_{x_{ave}} = x_{ave} \sqrt{(\frac{\sigma_{num}}{num})^2+(\frac{\sigma_{denom}}{denom})^2}$
In [34]:
def calc_mean(bin_edges, counts, print_flag = False):
"""
Calculate mean of a count rate distribution, counts vs. x.
Errors are calculated under the assumption that you are working
with counting statistics. (C_err = sqrt(C) in each bin)
Parameters
----------
bin_edges : ndarray
Bin edges for x
counts : ndarray
Bin counts
print_flag : bool
Option to print intermediate values
Returns
-------
x_mean : float
x_mean_err : float
"""
bin_centers = bicorr.centers(bin_edges)
num = np.sum(np.multiply(bin_centers,counts))
num_err = np.sqrt(np.sum(np.multiply(bin_centers**2,counts)))
denom = np.sum(counts)
denom_err = np.sqrt(denom)
if print_flag:
print('num: ',num)
print('num_err: ',num_err)
print('denom: ',denom)
print('denom_err: ',denom_err)
x_mean = num/denom
x_mean_err = x_mean * np.sqrt((num_err/num)**2+(denom_err/denom)**2)
if print_flag:
print('x_mean: ',x_mean)
print('x_mean_err:',x_mean_err)
return x_mean, x_mean_err
In [35]:
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
Plot this and look at the position of the mean.
In [42]:
plt.figure(figsize=(4,2))
bicorr.step_plot(bin_edges,counts)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Histogram of randomly generated x data')
plt.show()
Look more closely to make sure it is evenly spaced.
In [44]:
plt.figure(figsize=(4,2))
bicorr.step_plot(bin_edges,counts)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.xlabel('x')
plt.xlim([-1,2])
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Histogram of randomly generated x data')
plt.show()
Only 10 samples... Expect mean to be incorrect and large error bars.
In [62]:
real_mean = 25
real_std = 5
num_samples = 10
x_samples = np.random.normal(real_mean,real_std,num_samples)
counts, bin_edges = np.histogram(x_samples,np.arange(15,35.1,1))
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
plt.figure(figsize=(4,2))
plt.axvline(real_mean,color='b',linewidth=2)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
bicorr.step_plot(bin_edges,counts)
plt.legend(['Real mean','Calculated mean'], loc='lower right')
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Number of samples = {}'.format(num_samples))
plt.show()
Increase the number of samples. Expect mean to be closer, and errors to be smaller.
In [65]:
real_mean = 25
real_std = 5
num_samples = 30
x_samples = np.random.normal(real_mean,real_std,num_samples)
counts, bin_edges = np.histogram(x_samples,np.arange(15,35.1,1))
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
plt.figure(figsize=(4,2))
plt.axvline(real_mean,color='b',linewidth=2)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
bicorr.step_plot(bin_edges,counts)
plt.legend(['Real mean','Calculated mean'], loc='lower right')
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Number of samples = {}'.format(num_samples))
plt.show()
In [66]:
real_mean = 25
real_std = 5
num_samples = 100
x_samples = np.random.normal(real_mean,real_std,num_samples)
counts, bin_edges = np.histogram(x_samples,np.arange(15,35.1,1))
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
plt.figure(figsize=(4,2))
plt.axvline(real_mean,color='b',linewidth=2)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
bicorr.step_plot(bin_edges,counts)
plt.legend(['Real mean','Calculated mean'], loc='lower right')
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Number of samples = {}'.format(num_samples))
plt.show()
In [67]:
real_mean = 25
real_std = 5
num_samples = 1000
x_samples = np.random.normal(real_mean,real_std,num_samples)
counts, bin_edges = np.histogram(x_samples,np.arange(15,35.1,1))
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
plt.figure(figsize=(4,2))
plt.axvline(real_mean,color='b',linewidth=2)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
bicorr.step_plot(bin_edges,counts)
plt.legend(['Real mean','Calculated mean'], loc='lower right')
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Number of samples = {}'.format(num_samples))
plt.show()
In [68]:
real_mean = 25
real_std = 5
num_samples = 10000
x_samples = np.random.normal(real_mean,real_std,num_samples)
counts, bin_edges = np.histogram(x_samples,np.arange(15,35.1,1))
x_mean, x_mean_err = calc_mean(bin_edges,counts,True)
plt.figure(figsize=(4,2))
plt.axvline(real_mean,color='b',linewidth=2)
plt.axvline(x_mean,color='r',linewidth=1)
plt.axvline(x_mean+x_mean_err,linestyle='--',color='r',linewidth=.5)
plt.axvline(x_mean-x_mean_err,linestyle='--',color='r',linewidth=.5)
bicorr.step_plot(bin_edges,counts)
plt.legend(['Real mean','Calculated mean'], loc='lower right')
plt.xlabel('x')
plt.ylabel('Number of counts')
plt.ylim([-1,1.1*np.max(counts)])
plt.title('Number of samples = {}'.format(num_samples))
plt.show()
I think this looks reasonable. Call it a day.
In [ ]:
Calculate the average x value for a distribution where we have $C$ counts for each $x$ value. Assume there is no error in $x_i$ or $y_i$.
$(x+y)_{ave} = \frac{\Sigma((x_i+y_i) \cdot C_i)}{\Sigma C_i}$
I am going to split this up into the numerator and the denomenator in order to propagate the error.
$num = \Sigma((x_i+y_i) \cdot C_i)$
$denom = \Sigma C_i$
Think of this as the sum of many products:
$num = x_1 y_1 c_{11} + x_2 y_1 c_{21} + ... = a + b + ...$
The error in each quantity is:
$\sigma_{(x_i+y_i) c_i} = (x_i+y_i) \sqrt{c_i}$
The error in the sum of these quantities is:
$\sigma_{num} = \sigma_{a+b+...} = \sqrt{\sigma_a^2+\sigma_b^2+...} = \sqrt{(x_1+y_1)^2 c_{11} + (x_2+y_1)^2 c_{21} + ...}$
$(x+y)_{ave} = \frac{num}{denom} = \frac{\Sigma((x_i+y_i) \cdot C_i)}{\Sigma C_i}$
$\sigma_{x_{ave}} = x_{ave} \sqrt{(\frac{\sigma_{num}}{num})^2+(\frac{\sigma_{denom}}{denom})^2}$
In [26]:
x_samples = np.random.normal(150,20,150);
y_samples = np.random.normal(75,10,150);
sum_samples = x_samples + y_samples
In [27]:
sns.jointplot(x_samples,y_samples)
plt.show()
In [90]:
np.mean(x_samples+y_samples)
Out[90]:
In [23]:
bicorr_plot.histogram_metrics(sum_samples,'sum','counts')
Create a 2d-histogram.
In [47]:
x_edges = np.arange(0,201,10)
y_edges = np.arange(0,111,10)
H, x_edges, y_edges = np.histogram2d(x_samples,y_samples, bins=[x_edges,y_edges])
H = H.T
In [48]:
fig = plt.figure(figsize=(4,3))
ax = plt.gca()
mesh = ax.pcolormesh(x_edges, y_edges, H)
plt.xlabel('x'); plt.ylabel('y')
plt.show()
In [49]:
H.shape
Out[49]:
In [36]:
x_edges.shape
Out[36]:
In [37]:
y_edges.shape
Out[37]:
In [50]:
X, Y = np.meshgrid(bicorr_math.calc_centers(x_edges),bicorr_math.calc_centers(y_edges))
In [53]:
X.shape
Out[53]:
In [54]:
Y.shape
Out[54]:
In [52]:
H.shape
Out[52]:
In [57]:
X = X.reshape(X.shape[0]*X.shape[1])
Y = Y.reshape(Y.shape[0]*Y.shape[1])
H = H.reshape(H.shape[0]*H.shape[1])
In [82]:
plt.figure(figsize=(4,3))
plt.scatter(X, Y, c=H, marker='s', s=100)
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
In [87]:
plt.figure(figsize=(4,3))
plt.scatter(X+Y, H, marker='s', s=100)
plt.xlabel('X+Y')
plt.ylabel('Counts')
plt.show()
Now try calculating the average.
In [92]:
bicorr_math.calc_histogram_mean(X+Y,H,True,True)
Out[92]:
This can be used for bhp
and bhp_e
.
In [ ]: