This notebook displays the results of the regression model for percent biophony and the sound exposure level (SEL).
In [1]:
from pymc3 import Model, sample, Normal, Uniform, stats
from pymc3.backends import SQLite
from pymc3.backends.sqlite import load
from os import path
In [2]:
import pandas
import numpy
import seaborn
from figutils import style
from matplotlib import pyplot
%matplotlib inline
In [3]:
style.set_font()
In [7]:
seaborn.set()
In [4]:
trace_input_filepath = "/Users/jake/OneDrive/Documents/alpine soundscapes/data/model traces/biophony"
In [5]:
trace_input_filepath = "/Users/jake/OneDrive/Documents/alpine soundscapes/data/model traces/sel"
In [6]:
data_filepath = "/Users/jake/OneDrive/Documents/alpine soundscapes/data/dataset.csv"
In [9]:
data = pandas.read_csv(data_filepath)
data = data.loc[data.site<=30]
In [10]:
data_sorted = data.sort_values(by=['site', 'sound']).reset_index(drop=True)
In [11]:
column_list = ['sel', 'sel_anthrophony', 'sel_biophony', 'biophony', 'week',
'building_50m', 'pavement_50m', 'forest_50m', 'field_50m',
'building_100m', 'pavement_100m', 'forest_100m', 'field_100m',
'building_200m', 'pavement_200m', 'forest_200m', 'field_200m',
'building_500m', 'pavement_500m', 'forest_500m', 'field_500m',
'd2n_50m', 'd2n_100m', 'd2n_200m', 'd2n_500m',
'temperature', 'wind_speed', 'pressure', 'bus_stop',
'construction', 'crossing', 'cycleway', 'elevator', 'escape', 'footway',
'living_street', 'motorway', 'motorway_link', 'path', 'pedestrian',
'platform', 'primary_road', 'primary_link', 'proposed', 'residential',
'rest_area', 'secondary', 'secondary_link', 'service', 'services',
'steps', 'tertiary', 'tertiary_link', 'track', 'unclassified', 'combo']
data_centered = data_sorted.copy()
for column in column_list:
data_centered[column] = data_sorted[column] - data_sorted[column].mean()
In [12]:
sites = numpy.copy(data_sorted.site.values) - 1
In [13]:
site_predictors = [
'building_50m', 'pavement_50m', 'forest_50m', 'field_50m',
'building_100m', 'pavement_100m', 'forest_100m', 'field_100m',
'building_200m', 'pavement_200m', 'forest_200m', 'field_200m',
'building_500m', 'pavement_500m', 'forest_500m', 'field_500m',
'd2n_50m', 'd2n_100m', 'd2n_200m', 'd2n_500m'
]
In [14]:
model1_models = dict()
In [15]:
def define_model1(predictor):
with Model() as model1:
# intercept
g_a = Normal('g_a', mu=0, tau=0.001)
g_as = Normal('g_as', mu=0, tau=0.001)
sigma_a = Uniform('sigma_a', lower=0, upper=100)
tau_a = sigma_a**-2
mu_a = g_a + (g_as * data_centered.groupby('site')[predictor].mean())
a = Normal('a', mu=mu_a, tau=tau_a, shape=len(set(sites)))
# slope
g_b = Normal('g_b', mu=0, tau=0.001)
g_bs = Normal('g_bs', mu=0, tau=0.001)
sigma_b = Uniform('sigma_b', lower=0, upper=100)
tau_b = sigma_b**-2
mu_b = g_b + (g_bs * data_centered.groupby('site')[predictor].mean())
b = Normal('b', mu=mu_b, tau=tau_b, shape=len(set(sites)))
# model error (data-level)
sigma_y = Uniform('sigma_y', lower=0, upper=100)
tau_y = sigma_y**-2
# expected values
y_hat = a[sites] + (b[sites] * data_centered.week)
# likelihood
y_like = Normal('y_like', mu=y_hat, tau=tau_y, observed=data_centered.biophony)
return model1
In [16]:
for predictor in site_predictors:
model1_models[predictor] = define_model1(predictor)
In [17]:
def load_model1(predictor):
with model1_models[predictor]:
return load(path.join(trace_input_filepath,
"model1_{0}.sqlite".format(predictor)))
In [18]:
model1_samples = dict()
for predictor in site_predictors:
model1_samples[predictor] = load_model1(predictor)
In [24]:
predictor = "d2n_500m"
fig, ax = pyplot.subplots(2, 1, sharex=True, sharey=False)
fig.set_figwidth(3.30)
fig.set_figheight(3.30 * 1.8)
pyplot.subplots_adjust(left=0.19, bottom=0.10, right=0.96, top = 0.98, hspace=0.1, wspace=0)
# organize results
samples = model1_samples[predictor]
model1_data = pandas.DataFrame({'site': data_centered.site.unique(),
'site_name': data_centered.site_name.unique()}).set_index('site')
model1_data[predictor] = data_centered.groupby('site')[predictor].mean()
model1_data['quantiles_a'] = [stats.quantiles(samples.a[:5000, i]) for i in range(len(set(sites)))]
model1_data['quantiles_b'] = [stats.quantiles(samples.b[:5000, i]) for i in range(len(set(sites)))]
xd = numpy.array([-2, 2])
for d in numpy.random.randint(5000, 9999, 100):
yd = samples.g_a[d] + (xd * samples.g_as[d])
ax[0].plot(xd, yd, color='forestgreen', alpha=0.1, linewidth=0.25)
for d in numpy.random.randint(5000, 9999, 100):
xd = numpy.array([-1, 1])
yd = samples.g_b[d] + (xd * samples.g_bs[d])
ax[1].plot(xd, yd, color='forestgreen', alpha=0.1, linewidth=0.25)
# plot quantiles
x = model1_data[predictor]
y = [r[50] for r in model1_data['quantiles_a']]
ax[0].scatter(x, y, color='forestgreen', marker='.', zorder=1000, s=30)
ax[0].plot([x, x], [[r[25] for r in model1_data['quantiles_a']], [r[75] for r in model1_data['quantiles_a']]], color='forestgreen', linewidth=0.75)
ax[0].plot([x, x], [[r[2.5] for r in model1_data['quantiles_a']], [r[97.5] for r in model1_data['quantiles_a']]], color='forestgreen', linewidth=0.25)
ax[0].plot(xd, samples.g_a[:5000].mean() + (xd * samples.g_as[:5000].mean()), color='forestgreen', linestyle='--', linewidth=1)
x = model1_data[predictor]
y = [r[50] for r in model1_data['quantiles_b']]
ax[1].scatter(x, y, color='forestgreen', marker='.', zorder=1000, s=30)
ax[1].plot([x, x], [[r[25] for r in model1_data['quantiles_b']], [r[75] for r in model1_data['quantiles_b']]], color='forestgreen', linewidth=0.75)
ax[1].plot([x, x], [[r[2.5] for r in model1_data['quantiles_b']], [r[97.5] for r in model1_data['quantiles_b']]], color='forestgreen', linewidth=0.25)
ax[1].plot(xd, samples.g_b[:5000].mean() + (xd * samples.g_bs[:5000].mean()), color='forestgreen', linestyle='--', linewidth=1)
# format plot
#zero = 0 - data_sorted[predictor].mean()
#xticks = numpy.arange(zero, zero + 2, 0.1)
#xticklabels = [str(n) for n in range(-1, 2, 1)]
#l1 = ax[1].set_xlim([-10, 110])
#lx0 = ax[0].set_xlim([zero, zero + 1])
#lx1 = ax[1].set_xlim([zero, zero + 1])
#ax[0].set_xticks(xticks)
#ax[0].set_xticklabels(xticklabels)
#ax[1].set_xticks(xticks)
#ax[1].set_xticklabels(xticklabels)
#ly0 = ax[0].set_ylim([0, 6])
lx0 = ax[0].set_xlim([-0.2, 0.6])
ly1 = ax[1].set_ylim([-1, 1])
yl0 = ax[0].set_ylabel("starting percent biophony")
xl1 = ax[1].set_xlabel("d2n in 500-meter radius")
yl1 = ax[1].set_ylabel("weekly change of percent biophony")
#ax[0].set_yticklabels = ['-4.0', '-2.0', '0.0', '2.0', '4.0']
zero_y = 0 - data_sorted['biophony'].mean()
ly0 = ax[0].set_ylim([zero_y, zero_y + 20])
yticks = numpy.arange(zero_y, zero_y + 21, 1)
yticklabels = [" {0}".format(n) for n in range(0, 21, 1)]
ly0 = ax[0].set_yticks(yticks)
ly0 = ax[0].set_yticklabels(yticklabels)
lx1 = ax[1].set_xlim([-0.2, 0.6])
yticks = [-1, 0, 1]
yticklabels = [str(n) for n in yticks]
ly1 = ax[1].set_yticks(yticks)
ly1 = ax[1].set_yticklabels(yticks)
#xticks = [-1, 0, 1]
#xticklabels = [str(n) for n in yticks]
#lx1 = ax[1].set_xticks(xticks)
#lx1 = ax[1].set_xticklabels(xticks)
In [21]:
#fig.savefig("/home/ubuntu/download/figure6.tiff", dpi=150)