In [1]:
import smps
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import random
sns.set("notebook", style='ticks', font_scale=1.5, palette='dark')
smps.set()
%matplotlib inline
print ("smps v{}".format(smps.__version__))
print ("seaborn v{}".format(sns.__version__))
In [2]:
s = smps.io.load_sample("boston")
# plot the histogram
ax = smps.plots.histplot(s.dndlogdp, s.bins, plot_kws={'linewidth': .1}, fig_kws={'figsize': (12, 6)})
ax.set_title("Wintertime in Cambridge, MA", y=1.02)
ax.set_ylabel("$dN/dlogD_p \; [cm^{-3}]$")
# remove the spines of the plot
sns.despine()
In [3]:
# Grab the LogNormal class from the library
from smps.fit import LogNormal
# Initiate an instance of the class
model = LogNormal()
# Gather our X and Y data
X = s.midpoints
Y = s.dndlogdp.mean()
# Go ahead and fit
results = model.fit(X, Y, modes=1)
# print the results
print (results.summary())
Above, we see the results for the three fit parameters: Number of particles per cubic centimeter, the geometric mean diameter, and the geometric standard deviation. All three have error estimates (standard deviation) as shown in parentheses next to each value.
Now that we successfully fit our data, let's go ahead and plot it over the histogram:
In [4]:
# plot the histogram
ax = smps.plots.histplot(s.dndlogdp, s.bins, plot_kws={'linewidth': 0, 'alpha': .6, 'edgecolor': None},
fig_kws={'figsize': (12, 6)})
# Plot the fit values
ax.plot(X, results.fittedvalues, lw=6, label="Fit Data")
ax.set_ylabel("$dN/dlogD_p \; [cm^{-3}]$")
ax.set_title("Wintertime in Cambridge, MA with Fit Data")
# remove the spines of the plot
sns.despine()
What else is stored in the fit results? Glad you asked!
We can go ahead and retrieve our fit parameters at results['params']
. They are stored as an array with order [N, GM, GSD].
In [5]:
print (results.params)
We can also go ahead and look at the error associated with those values at results['error']
. They are in the same order as above.
In [6]:
print (results.errors)
Upon fitting, an instance of the LogNormalFitResults
class is returned and has available a couple of useful methods. The first is .summary()
which we saw above. It simply prints out a very basic overview of the fit statistics in table format.
We can also make new predictions using the LogNormalFitResults.predict
method which takes two arguments:
Ex.
In [7]:
results.predict(1)
Out[7]:
In [8]:
newX = np.logspace(np.log10(.01), np.log10(1), 1000)
# plot the histogram
ax = smps.plots.histplot(s.dndlogdp, s.bins, plot_kws={'linewidth': 0., 'alpha': .5},
fig_kws={'figsize': (12, 6)})
# Plot the fit values
ax.plot(newX, results.predict(newX), lw=6, label="Fit Data")
ax.set_title("Wintertime in Cambridge, MA with Fit Data")
ax.set_ylabel("$dN/dlogD_p \; [cm^{-3}]$")
# remove the spines of the plot
sns.despine()
In [9]:
dp = np.logspace(np.log10(0.0001), np.log10(1), 500)
# Sample data pulled from S+P pg371
N = np.array([9.93e4, 3.64e4])
GM = np.array([1.3e-3, 20e-3])
GSD = np.array([10**.245, 10**0.336])
total = 0
for j in range(len(N)):
total += smps.fit.dndlogdp(dp, N[j], GM[j], GSD[j])
# Let's confuzzle our data
twisted = total* [random.uniform(0.95, 1.05) for i in range(len(dp))]
with sns.axes_style('ticks'):
fig, ax = plt.subplots(1, figsize=(12, 6))
ax.plot(dp, twisted, 'o', label="Twisted Data")
ax.set_xlabel("$D_p \; [\mu m]$")
ax.set_ylabel("$dN/dlogD_p$")
ax.semilogx()
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter("%.4g"))
ax.legend()
sns.despine()
plt.show()
Now that we have some fake data, let's go ahead and fit it! We're also going to go ahead and throw some initial guesses at it. There needs to be 3n guesses where n is the number of modes you are fitting. They should be in the order [Ni, GMi, GSDi] for i=1 to i=number of modes.
In [10]:
model = LogNormal()
X = dp
Y = twisted
# Let's set some initial guesses
p0 = [1e5, 1e-3, 2, 3e4, 20e-3, 2]
results = model.fit(X, Y, modes=2, p0=p0)
print (results.summary())
Now that we have our results, let's go ahead and plot them!
In [11]:
with sns.axes_style('ticks'):
fig, ax = plt.subplots(1, figsize=(12, 6))
ax.plot(dp, twisted, 'o', label="Twisted Data")
ax.plot(dp, results.fittedvalues, lw=6, label="Fitted Values")
ax.set_xlabel("$D_p \; [\mu m]$")
ax.set_ylabel("$dN/dlogD_p$")
ax.semilogx()
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter("%.4g"))
ax.legend()
sns.despine()
plt.show()
In [12]:
model = LogNormal()
X = dp
Y = twisted
results = model.fit(X, Y, modes=1, xmax=8.5, xmin=0)
print (results.summary())
with sns.axes_style('ticks'):
fig, ax = plt.subplots(1, figsize=(12, 6))
ax.plot(dp, twisted, 'o', label="Twisted Data")
ax.plot(X[X <= 8.5], results.fittedvalues, lw=6, label="Fitted Values")
ax.set_xlabel("$D_p \; [\mu m]$")
ax.set_ylabel("$dN/dlogD_p$")
ax.semilogx()
ax.xaxis.set_major_formatter(mtick.FormatStrFormatter("%.4g"))
ax.legend()
sns.despine()
plt.show()
In [ ]: