In [40]:
from __future__ import print_function
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time
import tarfile
from IPython.display import display, Image
from scipy import ndimage
from sklearn.linear_model import LogisticRegression
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import tensorflow as tf
import scipy
In [131]:
network_out = np.loadtxt("../code/test_track_out.dat")
true_out = np.loadtxt("../data/ann_dataset_10points_combined.out")
print(network_out.shape)
predicted_vs_actual = np.hstack((network_out, true_out[:,8:]))
print(predicted_vs_actual[:2,:])
In [128]:
plt.plot(network_out[:2*193,0])
plt.show()
In [132]:
i = 5
k = np.argmax(np.abs(predicted_vs_actual[:,i]-predicted_vs_actual[:,i+10]))
print(k)
max_error_case = [np.abs(predicted_vs_actual[k,i]-predicted_vs_actual[k,i+10]) for i in range(10)]
print(max_error_case)
start = 0 #(k // 193)*193
stop = 193*20 #start + 193
#start = 0
#stop = 20*193
print(predicted_vs_actual.shape)
fig = plt.figure(figsize=(10, 6), dpi=80)
for i in range(10):
sp = fig.add_subplot(10,1,i+1)
if i <= 4:
sp.set_ylim([-0.5, 3.0])
else:
sp.set_ylim([-0.5, 3.5])
sp.plot(predicted_vs_actual[start:stop,i],color="blue", linewidth=1.5, linestyle="-", label="prediction")
sp.plot(predicted_vs_actual[start:stop,i+10],color="red", linewidth=1.5, linestyle="-", label="observation")
#plt.legend(loc='upper right')
plt.show()
In [134]:
from sklearn.metrics import explained_variance_score
import scipy.stats as stats
import pylab
diff = (network_out-true_out[:,8:])
diff = diff[:,5]
print(diff.shape)
#stats.probplot(diff, dist="norm", plot=pylab)
stats.probplot(diff, dist="t", sparams=(2), plot=pylab)
pylab.show()
num_bins = 100
# the histogram of the errors
n, bins, patches = plt.hist(diff, num_bins, normed=1, facecolor='blue', alpha=0.5)
params = stats.t.fit(diff)
print(params)
dist = stats.t(params[0], params[1], params[2])
x = np.linspace(-2, 2, num_bins)
plt.plot(x, dist.pdf(x), 'r-', lw = 3, alpha=0.5, label='t pdf')
plt.show()
print(params)
Test distribution of errors: The best fit (R^2 = 0.855) is Student's T with 1.067 DOF, location 0.003919 (network slightly overestimates), scale = 0.01165 However, T overestimates the probability of large errors
In [135]:
import scipy
import scipy.stats
diff = (network_out-true_out[:,8:])
#print(diff.shape)
y = diff[:,5]
print(y.shape)
#y = np.square(y)
x = np.arange(-3,3,0.01)
size = diff.shape[0]
h = plt.hist(y, bins=100, color='w')
plt.xlim(-3,3)
plt.ylim(0,1000)
dist_names = ['t']
for dist_name in dist_names:
dist = getattr(scipy.stats, dist_name)
param = dist.fit(y)
#param = (1.5, param[1], param[2])
pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1])*2000
plt.plot(x, pdf_fitted, label=dist_name)
plt.legend(loc='upper right')
plt.show()
In [136]:
import scipy.stats as stats
stats.probplot(y, dist="t", sparams=(2), plot=pylab)
pylab.show()
print(param)
print(*param[:-2])
print(param[-2])
print(param[-1])
Attempts to fit nonparametric distributions
In [143]:
# Gaussian kernel density estimation
from scipy import stats
import matplotlib.pyplot as plt
y = diff[:,4]
print(y.shape)
kde1 = stats.gaussian_kde(y)
#kde2 = stats.gaussian_kde(y, bw_method='silverman')
fig = plt.figure()
ax = fig.add_subplot(111)
#ax.plot(y, np.zeros(y.shape), 'b+', ms=20) # rug plot
x_eval = np.linspace(-2, 2, num=200)
#ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
#ax.plot(x_eval, kde2(x_eval), 'r-', label="Silverman's Rule")
#plt.legend(loc='upper right')
#plt.show()
err_min, err_max = -0.1,0.1
print("Probability of the error between %.2f and %.2f meters: %.4f"%(err_min, err_max,kde1.integrate_box_1d(err_min, err_max)))
In [189]:
### Gaussian kernel density estimation for "active" portions of the data only
from scipy import stats
import matplotlib.pyplot as plt
active_start = 130
active_stop = 160
y = diff[:,0]
active_y = y.reshape(-1,193).transpose()[active_start:active_stop,:].reshape(-1)
print("Min: %.4fm, Max: %.4fm" %(np.amin(active_y),np.amax(active_y)))
kde1 = stats.gaussian_kde(active_y)
kde2 = stats.gaussian_kde(y, bw_method='silverman')
#fig = plt.figure()
#ax = fig.add_subplot(111)
#ax.plot(active_y, np.zeros(active_y.shape), 'b+', ms=20) # rug plot
x_eval = np.linspace(-2, 2, num=200)
#ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
#ax.plot(x_eval, kde2(x_eval), 'r-', label="Silverman's Rule")
#plt.legend(loc='upper right')
#plt.show()
err_min, err_max = -0.13,0.13
print("Probability of the error between %.2f and %.2f meters: %.4f"%(err_min, err_max,kde1.integrate_box_1d(err_min, err_max)))