Comparison with psuedo-random error defined as percent of I(0)

Though it oversimplifies the error, using a percentage of I(0) to define the error estimates provides a quick and simple difinition method. This was first performed by Pinfield and Scott in their publication "Anomalous Small Angle X-ray Scattering Simulations: Proof of Concept for Distance Measurements for Nanoparticle-Labelled Biomacromolecules in Solution". \cite{pinfield_anomalous_2014}

The purpose of this notebook is to examine the change in $\chi^2$ as a function of the number of grid points used to represent experimental small-angle scattering (SAS) data. I will use the scattering of a uniform sphere of radius $r_1$ to create an mock experimental data set. This will be supplemented with random noise added to the scattering, and Gaussian error estimates similar to experimental values. The theoretical data set will be the scattering of a uniform sphere of radius $r_2$.

I will compare $\chi^2$ from the original data set (500 data points), with the $\chi^2$ of data sets with fewer $Q$-values. To transform the original experimental data to have a lower number of data points, I will use the following methods:

  1. sub-sample the data, e.g., take every other point, or every tenth point
  2. interpolate the data
  3. rebin the data assuming the number of pixels
  4. rebin the data without knowing the number of pixels

In [1]:
import numpy as np
np.set_printoptions(suppress=True)

In [2]:
import sas_modeling


cluster_ensemble not available, depends on sasmol: https://github.com/madscatt/sasmol
calc_pr not available, depends on sasmol: https://github.com/madscatt/sasmol

In [3]:
from bokeh.io import output_notebook, show
from bokeh.resources import INLINE
output_notebook(resources=INLINE)


Loading BokehJS ...

In [4]:
from bokeh.plotting import figure
from bokeh.models import Legend, Range1d
from bokeh.layouts import gridplot
try:
    from sas_modeling.make_figures import solarized as palette
    print('using solarized palette')
except ImportError:
    from bokeh.palettes import Colorblind7 as palette
    print('using colorblind palette')


using solarized palette

In [5]:
# load the sassie interpolation function
import data_interpolation

In [6]:
def errorbar(fig, x, y, xerr=None, yerr=None, color='red', 
             point_kwargs={}, error_kwargs={}):

    # fig.background_fill_color = "#ffffd7"

    fig.circle(x, y, color=color, **point_kwargs)

    if xerr is not None:
        x_err_x = []
        x_err_y = []
        for px, py, err in zip(x, y, xerr):
            x_err_x.append((px - err, px + err))
            x_err_y.append((py, py))
        fig.multi_line(x_err_x, x_err_y, color=color, **error_kwargs)

    if yerr is not None:
        y_err_x = []
        y_err_y = []
        for px, py, err in zip(x, y, yerr):
            y_err_x.append((px, px))
            y_err_y.append((py - err, py + err))
        fig.multi_line(y_err_x, y_err_y, color=color, **error_kwargs)

In [7]:
def plot_res(exper, model, pq_sim, components, label=''):
    edges = np.empty(len(exper) + 1)
    dq = exper[1, 0] - exper[0, 0]
    edges[0] = pq_sim[0, 0] - dq / 2
    edges[1:] = exper[:, 0] + dq / 2

    p1 = figure(# x_axis_label='Q (1/A)', 
                y_axis_label='I(Q)', 
                width=400, height=300,
                title='dQ={:0.4f}'.format(dq),
                )
    p2 = figure(# x_axis_label='Q (1/A)', 
                y_axis_label='log(I(Q))', 
                width=400, height=300,
                x_axis_type='log')
    p3 = figure(x_axis_label='Q (1/A)', y_axis_label='% of x^2',
                width=400, height=100,
                )
    p4 = figure(x_axis_label='Q (1/A)', y_axis_label='% of x^2',
                width=400, height=100,
                x_axis_type='log') #, y_axis_type='log')

    errorbar(p1, pq_sim[:, 0], pq_sim[:, 1], yerr=pq_sim[:, 2], color=palette[1])
    errorbar(p1, exper[:, 0], exper[:, 1], yerr=exper[:, 2], color=palette[3])
    p1.line(model[:, 0], model[:, 1], color=palette[0])

    errorbar(p2, pq_sim[:, 0], np.log(pq_sim[:, 1]), yerr=pq_sim[:, 2]/pq_sim[:, 1], 
             color=palette[1], point_kwargs={'legend': 'exp'})
    errorbar(p2, exper[:, 0], np.log(exper[:, 1]), yerr=exper[:, 2]/exper[:, 1], 
             color=palette[3], point_kwargs={'legend': label})
    p2.line(model[:, 0], np.log(model[:, 1]), color=palette[0], legend='model')

    x_lin_range = np.array([q_min * 0.9, q_max * 1.1])
    y_lin_range = np.array([pq_sim[:, 1].min(), pq_sim[:, 1].max() * 1.1])
    p1.x_range = Range1d(*x_lin_range)
    p1.y_range = Range1d(*y_lin_range)
    p2.x_range = Range1d(*x_lin_range)
    p2.y_range = Range1d(*np.log(y_lin_range))
    p2.legend.location = 'bottom_left'
    
    per_x2 = components/components.sum() * 100
    p3.quad(top=per_x2, bottom=0, left=edges[:-1], right=edges[1:],
            fill_color=palette[3], line_color=palette[3])
    p3.x_range = p1.x_range
    
    p4.quad(top=per_x2, bottom=0, left=edges[:-1], right=edges[1:],
            fill_color=palette[3], line_color=palette[3])
    p4.x_range = p2.x_range

    # for p in [p1, p2, p3, p4]:
    # p1.background_fill_color = "#ffffd7"
    # p2.background_fill_color = "#ffffd7"
    # p3.background_fill_color = "#ffffd7"
    # p4.background_fill_color = "#ffffd7"
    
    plots = gridplot([[p1, p2], [p3, p4]])
    show(plots)

In [8]:
# create scattering for sphere
q_min = 1e-3
q_max = 0.2
n_points = 500
q = np.linspace(q_min, q_max, n_points)

scale = 1.0
bkg = 1e-7
p_scat = 2e-6
p_sol = 1e-6
r = 50.0

pq = sas_modeling.geometric.sphere(r, q, p_scat, p_sol, scale=scale, bkg=bkg)

In [9]:
i0, rg, i0_err, rg_err = sas_modeling.calc_i0.guinier_fit(pq[:, 0], pq[:, 1], np.ones_like(pq[:, 0]), q_max=0.02) #, refine=True, view_fit=True)

In [10]:
i0


Out[10]:
62.359999999999999

In [11]:
q_test = np.linspace(0.000001, 0.001, 100)
pq_test = sas_modeling.geometric.sphere(r, q_test, p_scat, p_sol, scale=scale, bkg=bkg)

In [12]:
i0, rg, i0_err, rg_err = sas_modeling.calc_i0.guinier_fit(pq_test[:, 0], pq_test[:, 1], np.ones_like(pq_test[:, 0]), q_max=0.01) #, refine=True, view_fit=True)
print(i0)


62.36

In [13]:
pq_sim = np.empty([n_points, 3])
pq_sim[:, 0] = pq[:, 0]

# add pseudo-random error or noise based on I(0)
# c = 0.0001  # 0.01%
# c = 0.001  # 0.1%
c = 0.01  # 1%
prng = np.random.RandomState(123)  # get a consistent random result
noise = c * prng.normal(0, i0, len(pq))
pq_sim[:, 1] = pq[:, 1] + noise

# add pseudo-random error estimates also based on I(0)
# err = c * np.random.normal(0, i0, len(pq))
err = np.abs(noise)
pq_sim[:, 2] = err

In [14]:
i0_sim, rg_sim, i0_sim_err, rg_sim_err = sas_modeling.calc_i0.guinier_fit(pq_sim[:, 0], pq_sim[:, 1], pq_sim[:, 2], q_max=0.02) #, refine=True, view_fit=True)
i0_sim, i0_sim_err


Out[14]:
(62.356000000000002, 0.017000000000000001)

In [15]:
p1 = figure(x_axis_label='Q^2 (1/A^2)', y_axis_label='log(P(Q))', 
               width=400, height=300,
               )

p2 = figure(x_axis_label='log(Q (1/A))', y_axis_label='log(P(Q))', 
               width=400, height=300,
               )

p3 = figure(x_axis_label='Q (1/A)', y_axis_label='noise', 
               width=400, height=300,
               )

p4 = figure(x_axis_label='Q (1/A)', y_axis_label='error estimate', 
               width=400, height=300,
           )# x_axis_type='log') #, y_axis_type='log')

p1.circle(pq_sim[:20, 0]**2, np.log(pq_sim[:20, 1]), color=palette[1])
p1.line(pq[:20, 0]**2, np.log(pq[:20, 1]), color=palette[0])

p2.line(np.log(pq[:, 0]), np.log(pq[:, 1]), color=palette[0], legend='raw')
errorbar(p2, np.log(pq_sim[:, 0]), np.log(pq_sim[:, 1]), yerr=pq_sim[:, 2]/pq_sim[:, 1], color=palette[1], point_kwargs={'legend': 'sim'})
# p2.circle(np.log(q), np.log(pq_noise), color=palette[2], legend='noise: sqrt(pq/q)')

x_lin_range = np.array([q_min * 0.9, q_max * 1.1])
y_lin_range = np.array([pq_sim[:, 1].min(), pq_sim[:, 1].max() * 1.1])
p2.x_range = Range1d(*np.log(x_lin_range))
p2.y_range = Range1d(*np.log(y_lin_range))
p2.legend.location = 'bottom_left'

p3.circle(pq_sim[:, 0], noise, color=palette[1])

p4.circle(np.log(pq_sim[:, 0]), pq_sim[:, 2], color=palette[1])
p4.x_range = p2.x_range

plots = gridplot([[p1, p2], [p3, p4]])
show(plots)



In [16]:
pq_sim[:, 1].min()


Out[16]:
8.4076494408781901

In [17]:
x2, components = sas_modeling.compare.get_x2_components(pq_sim, pq)
print('x2 = {}'.format(x2))


x2 = 1.0000000000000069

In [18]:
plot_res(pq_sim, pq, pq_sim, components, label='exp')



In [19]:
100/.2


Out[19]:
500.0

Subsample the data, selecting every $m^\text{th}$ point to reduce to $n_\text{sub}$ points

Only keep every tenth point: $n_\text{sub}=25$, $m=20$


In [20]:
n_sub = 25
m = int(n_points/n_sub)
pq_25 = pq_sim[::m]
pq_25m = sas_modeling.geometric.sphere(r, pq_25[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)
# pq_25

In [21]:
x2_25, components_25 = sas_modeling.compare.get_x2_components(pq_25, pq_25m)
print('x2 = {}'.format(x2))


x2 = 1.0000000000000069

In [22]:
plot_res(pq_25, pq_25m, pq_sim, components_25, label='n = 25 samples')


Only keep every fiftieth point: $n_\text{sub}=10$, $m=50$


In [23]:
n_sub = 10
m = int(n_points/n_sub)
pq_10 = pq_sim[::m]
pq_10m = sas_modeling.geometric.sphere(r, pq_10[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)
# pq_10

In [24]:
x2_10, components_10 = sas_modeling.compare.get_x2_components(pq_10, pq_10m)
print('x2 = {}'.format(x2))


x2 = 1.0000000000000069

In [25]:
plot_res(pq_10, pq_10m, pq_sim, components_10, label='n = 10 sample')


Interpolate the data to reduce to $n_\text{sub}$ points, keeping the first and last point

Interpolate to 30 data points: $n_\text{sub}=30$ ($m=16.67$, not an even integer).


In [26]:
np.savetxt('sphere.iq', pq_sim, fmt='%0.6f')

In [27]:
n_sub = 30
pq_30 = np.empty([n_sub, 3])
pq_30[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)

pq_30m = sas_modeling.geometric.sphere(r, pq_30[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)
# pq_30m

In [28]:
yp1 = 1.0
ypn = 1.0
nval = n_points

x = pq_sim[:, 0]
y = pq_sim[:, 1]
y_spline = data_interpolation.spline(x, y, nval, yp1, ypn)

z = pq_sim[:, 2]
z_spline = data_interpolation.spline(x, z, nval, yp1, ypn)

In [29]:
for i in range(n_sub):
    x_i = pq_30[i, 0]
    pq_30[i, 1] = data_interpolation.splint(x, y, y_spline, nval, x_i)  # I(Q_i)
    pq_30[i, 2] = data_interpolation.splint(x, z, z_spline, nval, x_i)  # dI(Q_i)

In [30]:
from scipy import interpolate

In [31]:
pq_30_v2 = np.empty([n_sub, 3])
pq_30_v2[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)
interp_iq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 1])
interp_diq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 2])  
pq_30_v2[:, 1] = interpolate.splev(pq_30_v2[:, 0], interp_iq)         
pq_30_v2[:, 2] = interpolate.splev(pq_30_v2[:, 0], interp_diq)

In [32]:
pq_30_v3 = np.empty([n_sub, 3])
pq_30_v3[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)
interp_iq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 1], w=1.0/pq_sim[:, 2])
interp_diq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 2])  
pq_30_v3[:, 1] = interpolate.splev(pq_30_v3[:, 0], interp_iq)         
pq_30_v3[:, 2] = interpolate.splev(pq_30_v3[:, 0], interp_diq)

In [33]:
print(np.allclose(pq_30, pq_30_v2), np.allclose(pq_30, pq_30_v3))


True False

In [34]:
p = figure()

errorbar(p, pq_sim[:, 0], pq_sim[:, 1], yerr=pq_sim[:, 2], color=palette[4], 
         point_kwargs={'legend': 'original'})
errorbar(p, pq_30[:, 0], pq_30[:, 1], yerr=pq_30[:, 2], color=palette[3], 
         point_kwargs={'legend': 'sassie', 'size': 10}, error_kwargs={'line_width': 5})
errorbar(p, pq_30_v2[:, 0], pq_30_v2[:, 1], yerr=pq_30_v2[:, 2], color=palette[1], 
         point_kwargs={'legend': 'scipy'})
errorbar(p, pq_30_v3[:, 0], pq_30_v3[:, 1], yerr=pq_30_v3[:, 2], color=palette[5], 
         point_kwargs={'legend': 'weighted scipy'})
p.line(pq_30m[:, 0], pq_30m[:, 1], color=palette[0], legend='model')


show(p)



In [35]:
x2_30, components_30 = sas_modeling.compare.get_x2_components(pq_30, pq_30m)
print('x2 = {}'.format(x2_30))


x2 = 0.813487001667323

Interpolation decreases the chi-square.


In [36]:
plot_res(pq_30, pq_30m, pq_sim, components_30, label='n = 30 interp')



In [37]:
x2_30_v3, components_30_v3 = sas_modeling.compare.get_x2_components(pq_30_v3, pq_30m)
print('x2 = {}'.format(x2_30_v3))


x2 = 0.008316312390862087

So for this ideal case, performing a weighted interpolation leads to error estimates that are too large, making the model fit overly well.


In [38]:
plot_res(pq_30_v3, pq_30m, pq_sim, components_30_v3, label='n = 30 weighted interp')



In [39]:
n_sub = 12
pq_12 = np.empty([n_sub, 3])
pq_12[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)

pq_12m = sas_modeling.geometric.sphere(r, pq_12[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)
# pq_30m

In [40]:
yp1 = 1.0
ypn = 1.0
nval = n_points

x = pq_sim[:, 0]
y = pq_sim[:, 1]
y_spline = data_interpolation.spline(x, y, nval, yp1, ypn)

z = pq_sim[:, 2]
z_spline = data_interpolation.spline(x, z, nval, yp1, ypn)

In [41]:
for i in range(n_sub):
    x_i = pq_12[i, 0]
    pq_12[i, 1] = data_interpolation.splint(x, y, y_spline, nval, x_i)  # I(Q_i)
    pq_12[i, 2] = data_interpolation.splint(x, z, z_spline, nval, x_i)  # dI(Q_i)

In [42]:
pq_12_v2 = np.empty([n_sub, 3])
pq_12_v2[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)
interp_iq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 1])
interp_diq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 2])  
pq_12_v2[:, 1] = interpolate.splev(pq_12_v2[:, 0], interp_iq)         
pq_12_v2[:, 2] = interpolate.splev(pq_12_v2[:, 0], interp_diq)

In [43]:
pq_12_v3 = np.empty([n_sub, 3])
pq_12_v3[:, 0] = np.linspace(pq_sim[0, 0], pq_sim[-1, 0], n_sub)
interp_iq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 1], w=1.0/pq_sim[:, 2])
interp_diq = interpolate.splrep(pq_sim[:, 0], pq_sim[:, 2])  
pq_12_v3[:, 1] = interpolate.splev(pq_12_v3[:, 0], interp_iq)         
pq_12_v3[:, 2] = interpolate.splev(pq_12_v3[:, 0], interp_diq)

In [44]:
print(np.allclose(pq_12, pq_12_v2), np.allclose(pq_12, pq_12_v3))


True False

In [45]:
p = figure()

errorbar(p, pq_sim[:, 0], pq_sim[:, 1], yerr=pq_sim[:, 2], color=palette[4], 
         point_kwargs={'legend': 'original'})
errorbar(p, pq_12[:, 0], pq_12[:, 1], yerr=pq_12[:, 2], color=palette[3], 
         point_kwargs={'legend': 'sassie', 'size': 10}, error_kwargs={'line_width': 5})
errorbar(p, pq_12_v2[:, 0], pq_12_v2[:, 1], yerr=pq_12_v2[:, 2], color=palette[1], 
         point_kwargs={'legend': 'scipy'})
errorbar(p, pq_12_v3[:, 0], pq_12_v3[:, 1], yerr=pq_12_v3[:, 2], color=palette[5], 
         point_kwargs={'legend': 'weighted scipy'})
p.line(pq_12m[:, 0], pq_12m[:, 1], color=palette[0], legend='model')


show(p)



In [46]:
x2_12, components_12 = sas_modeling.compare.get_x2_components(pq_12, pq_12m)
print('x2 = {}'.format(x2_12))


x2 = 1.1652366816084232

With only 12 points, interpolating increases the chi-square.


In [47]:
plot_res(pq_12, pq_12m, pq_sim, components_12, label='n = 12 interp')



In [48]:
x2_12_v3, components_12 = sas_modeling.compare.get_x2_components(pq_12_v3, pq_12m)
print('x2 = {}'.format(x2_12_v3))


x2 = 0.007695419873997829

With only 12 points, wheighted interpolating still decreases the chi-square.


In [49]:
plot_res(pq_12_v3, pq_12m, pq_sim, components_12, label='n = 12 weighted interp')


Rebinning

Assuming equal number of pixels in each original $Q$-bin.

Following Ken Tatebe's paper titled "Combining Multiple Data Points and Their Errors", if the number of counts in each bin is unknown, we can take the risk of assuming the number of points in each bin is equal. In this case, following equation (10), the combined value is simply an average of the combined points. To get the combined error, we must use equation (50), which describes how to combine the error two points $$\varepsilon_S \approx \sqrt{\frac{\varepsilon_a^2 + \varepsilon_b^2}{4}}\,.$$ This is based on the assumption that the number of counts in each original bin is much larger than 1.

Extending this to more than just 2 points, to combine $m$ points, this becomes $$\varepsilon_S \approx \sqrt{\frac{\sum_{i=1}^m\varepsilon_i^2}{m^2}}\,.$$


In [50]:
(pq_sim[1:, 0] - pq_sim[:-1, 0]).mean()


Out[50]:
0.00039879759519038076

In [51]:
def combine_equal_n(data, n_points):
    
    dq = (data[1:, 0] - data[:-1, 0]).mean()
    edges = np.linspace(data[0, 0]-dq/2, data[-1, 0]+dq/2, n_points+1)
        
    rebinned = np.empty([n_points, 3])
    rebinned[:, 0] = (edges[1:] + edges[:-1])/2
    
    inds = np.digitize(data[:, 0], edges)
    for i in range(n_points):
        rebinned[i, 1] = data[inds-1 == i, 1].mean()
        
        m2 = (inds-1 == i).sum() ** 2
        rebinned[i, 2] = np.sqrt((data[inds-1 == i, 2] ** 2).sum()/m2)
        
    return rebinned

In [52]:
pq_12r_equal = combine_equal_n(pq_sim, 12)

In [53]:
pq_12r_equal_m = sas_modeling.geometric.sphere(r, pq_12r_equal[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)

In [54]:
x2, components_12r_equal = sas_modeling.compare.get_x2_components(pq_12r_equal, pq_12r_equal_m)
print('x2 = {}'.format(x2))


x2 = 5.4507764876213125

Rebinning to 10 points increased the chi-square statistic.


In [55]:
plot_res(pq_12r_equal, pq_12r_equal_m, pq_sim, components_12r_equal, label='n = 12 equal bins')



In [56]:
pq_30r_equal = combine_equal_n(pq_sim, 30)

In [57]:
pq_30r_equal_m = sas_modeling.geometric.sphere(r, pq_30r_equal[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)

In [58]:
x2, components_30r_equal = sas_modeling.compare.get_x2_components(pq_30r_equal, pq_30r_equal_m)
print('x2 = {}'.format(x2))


x2 = 1.1240076370521517

Rebinning to 30 points increased the chi-square statistic only slightly for 30 data points.


In [59]:
plot_res(pq_30r_equal, pq_30r_equal_m, pq_sim, components_30r_equal, label='n = 30 equal bins')


Recognize that the number of pixels in each $Q$-bin is proportional to $Q$.

The volume of each annulus is $$ V_j = \pi r_j^2 - \pi r_{j-1}^2\,,$$ or $$ V_j = \pi (r_j^2 - r_{j-1}^2)\,.$$

We can relate $Q$ to $r$ as follows. We know $$ Q = 4\pi \frac{\sin\vartheta}{\lambda}$$ and using geometry we have $$\sin 2\vartheta = \frac{r}{h} $$ and $$\sin 2\vartheta = 2\sin\vartheta\cos\vartheta = \frac{r}{h}\,,$$ where $h$ is the hypotenuse. Using the small angle approximation we can simplify this to $$ 2\sin\vartheta \approx \frac{r}{l}$$ which becomes $$ \sin\vartheta \approx \frac{r}{2l}\,$$ where $l$ is the sample to detector distance.

Replacing the $\sin \vartheta$ in the expression for $Q$, we get $$Q \approx \frac{2\pi r}{\lambda l}\,,$$ or $$r = \frac{\lambda l}{2 \pi} Q\,.$$

Using this, we can rewrite $V_j$ as $$V_j = \frac{\lambda^2 l^2}{4\pi}(Q_j^2 - Q_{j-1}^2)\,.$$

Typically, the pixel side length is used as the annular spacing, so each $Q$-bins corresponds to one pixel width, which means the pixel volume would be $$V_\text{pixel} = \left(r_j - r_{j-1}\right)^2\,.$$ Converting $r$ to $Q$, we get this volume to be $$V_\text{pixel} = \frac{\lambda^2 l^2}{4 \pi^2}\left(Q_j - Q_{j-1}\right)^2\,$$ and the number of pixels in each annulus would be $$n_j = \frac{V_j}{V_\text{pixel}} = \frac{\lambda^2 l^2}{4\pi} \left(Q_j^2 - Q_{j-1}^2\right) \frac{4 \pi^2}{\lambda^2 l^2} \frac{1}{\left(Q_j - Q_{j-1}\right)^2}\,,$$

or more simply, $$n_j = \pi \frac{Q_j^2 - Q_{j-1}^2}{\left(Q_j - Q_{j-1}\right)^2}\,.$$

This provides the $n_j$ required for Tatebe's combinging data points, equation (10), and combining error, equation (48). Note that in the situation of even spacing, $Q_j - Q_{j-1}$ is independent of $j$.

To combine several points, we can iteratively combine one more point (rather than solve the formulae to combine them all at once).


In [60]:
def combine_weight_by_q(iq_data, n_points):
    
    dq = (iq_data[1:, 0] - iq_data[:-1, 0]).mean()
    dq2 = dq ** 2
    edges = np.linspace(iq_data[0, 0]-dq/2, iq_data[-1, 0]+dq/2, n_points+1)
    
    rebinned = np.empty([n_points, 3])
    rebinned[:, 0] = (edges[1:] + edges[:-1])/2
    
    inds = np.digitize(iq_data[:, 0], edges)
    for j in range(n_points):
    # for j in range(1):    
        this_data = iq_data[inds-1 == j]

        av_old = 0
        er_old = 0
        n_old = 0
        N_old = n_old ** 2 - n_old
        
        for i in range(len(this_data)):
            av_i = this_data[i, 1]  # I(Q_i)
            er_i = this_data[i, 2]  # dI(Q_i)
            q_i = this_data[i, 0]  # Q_i
            n_i = np.pi * (q_i ** 2 - (q_i - dq) ** 2) / dq2  # number of pixels
            N_i = n_i ** 2 - n_i
            
            n_new = n_old + n_i
            N_new = n_new ** 2 - n_new
            
            av_new = (n_old * av_old + n_i * av_i) / n_new

            term_1 = N_old * er_old ** 2
            term_2 = N_i * er_i ** 2
            term_3 = n_old * n_i * (av_old - av_i) ** 2 / n_new
            
            er_new = np.sqrt((term_1 + term_2 + term_3) / N_new)
            
            av_old = av_new
            er_old = er_new
            n_old = n_new
            N_old = N_new
            
        rebinned[j, 1] = av_new
        rebinned[j, 2] = er_new
        
    return rebinned

In [61]:
pq_12r = combine_weight_by_q(pq_sim, 10)

In [62]:
pq_12r_m = sas_modeling.geometric.sphere(r, pq_12r[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)

In [63]:
x2_12r, components_12r = sas_modeling.compare.get_x2_components(pq_12r, pq_12r_m)
print('x2 = {}'.format(x2_12r))


x2 = 49.60204010061659

In [67]:
plot_res(pq_12r, pq_12r_m, pq_sim, components_12r, label='n = 12 weighted rebin')



In [68]:
pq_30r = combine_weight_by_q(pq_sim, 30)

pq_30r_m = sas_modeling.geometric.sphere(r, pq_30r[:, 0], p_scat, p_sol, scale=scale, bkg=bkg)

x2_30r, components_30r = sas_modeling.compare.get_x2_components(pq_30r, pq_30r_m)
print('x2 = {}'.format(x2_30r))


x2 = 1.238141258786778

In [69]:
plot_res(pq_30r, pq_30r_m, pq_sim, components_30r, label='n = 30 weighted rebin')



In [ ]:


In [ ]: