In [15]:
import numpy

import matplotlib # TODO bug in ipython? (sometime can't call the module directly)
from matplotlib import pyplot

import scipy
from scipy import stats
from scipy import optimize

# %matplotlib tk
# %matplotlib notebook ???
%matplotlib inline



# define unnormalized gaussian
def generalGaussian(x, mu, sigma, area):
    return area * scipy.stats.norm.pdf(x, mu, sigma)

# conversion between x ticks and bin delimiters and viceversa
def binDelimitersFromX(x):
    binSize = x[1] - x[0]
    binDelimiters = numpy.append(x, x[-1]+binSize) - 0.5*binSize
    return binDelimiters
def xFromBinDelimiters(binDelimiters):
    binSize = binDelimiters[1] - binDelimiters[0]
    x = numpy.delete(binDelimiters, -1) + 0.5*binSize
    return x

# define a function to fit histogram's results
def fitHistogram(f, histogramResults, dictAdditionalArguments={}):
    """
    f
        function to fit the histogram
    dictAdditionalArguments
        dictionary of additional arguments to pass to scipy.optimize.curve_fit(...)
        example: p0=startingValuesList --> {"p0": startingValues}
    histogramResults
        the result of matplotlib.pyplot.hist(...)
        or results of numpy.histogram(...)
    """
    # pay attention shift of half binSize
    binDelimiters = histogramResults[1]
    x = xFromBinDelimiters(binDelimiters)
    y = histogramResults[0]
    values, covariance = scipy.optimize.curve_fit(f, x, y, **dictAdditionalArguments)
    # unpack: (TODO just like pointers?)
    # *list
    # *touple
    # **dictionary
    return values, covariance





# function to use in the fit
f = generalGaussian

# prior parameters
muPrior = 0
sigmaPrior = 1

# montecarlo sample generation
sampleLenght = 1000
dataSample = numpy.random.normal(muPrior,sigmaPrior,sampleLenght)

# binning
bins=100
# TODO how many bins?
# vedere come l'accuratezza del fit
# varia con la grandezza della canalizzazione
# cercare canalizzazione ottimale:
# non troppo poco altrimenti si perdono dettagli sulla forma
# non troppo perché  si va in bassa statistica e si introduce rumore
# vedere optimal sampling con analogo discreto
# della frequenza di Nyquist sui canali
# vedere numero di canali in funzione del numero di dati che si hanno
binDelimiters = numpy.linspace(min(dataSample), max(dataSample), bins)
binSize = binDelimiters[1] - binDelimiters[0]

area = sampleLenght*binSize
priorParameters = muPrior, sigmaPrior, area




# initialize the figure
matplotlib.pyplot.figure(figsize=(20,12))

# create prior curve
x = xFromBinDelimiters(binDelimiters)
yPrior = f(x, *priorParameters)
matplotlib.pyplot.plot(x, yPrior, label="prior")

# make the histogram of the generated sample
histogramResults = matplotlib.pyplot.hist(dataSample, bins=binDelimiters, histtype="step", label="generated", align="mid")
# matplotlib.pyplot.axvline(x=min(dataSample))
# matplotlib.pyplot.axvline(x=max(dataSample))

# generalized fit
values, covariance = fitHistogram(f, histogramResults, {"p0": [muPrior, sigmaPrior, area]})
# TODO there's a problem in the gaussian fit documentation:
# muFit, sigmaFit = scipy.stats.norm.fit(dataSample) # area is missing

# plot fit results
print values
print covariance

# create reconstructed curve
yFit =  f(x, *values)
matplotlib.pyplot.plot(x, yFit, label="reconstructed")

# set the legend of the figure
matplotlib.pyplot.legend(loc='best', frameon=False)
# matplotlib.pyplot.show()

# matplotlib.pyplot.xscale('log')
# matplotlib.pyplot.yscale('log')
# la gaussiana (con media lontano da 0) è una parabola nel grafico log-log


[  3.43088371e-02   1.00090572e+00   6.19313091e+01]
[[  9.93435422e-04  -1.58753765e-07  -5.45419402e-06]
 [ -1.58753765e-07   9.95789901e-04   3.08150680e-02]
 [ -5.45419402e-06   3.08150680e-02   2.85473744e+00]]
Out[15]:
<matplotlib.legend.Legend at 0x7f4bac242690>

In [ ]: