In [1]:
%matplotlib inline
In [2]:
import numpy as np
import matplotlib.pylab as plt
from PyAstronomy import pyasl
# Convert data given at:
# http://www.itl.nist.gov/div898/handbook/eda/section3/eda35h3.htm
# to array.
x = np.array(map(lambda x: float(x),
"-0.25 0.68 0.94 1.15 1.20 1.26 1.26 1.34 1.38 1.43 1.49 1.49 \
1.55 1.56 1.58 1.65 1.69 1.70 1.76 1.77 1.81 1.91 1.94 1.96 \
1.99 2.06 2.09 2.10 2.14 2.15 2.23 2.24 2.26 2.35 2.37 2.40 \
2.47 2.54 2.62 2.64 2.90 2.92 2.92 2.93 3.21 3.26 3.30 3.59 \
3.68 4.30 4.64 5.34 5.42 6.01".split()))
# Apply the generalized ESD
r = pyasl.generalizedESD(x, 10, 0.05, fullOutput=True)
print "Number of outliers: ", r[0]
print "Indices of outliers: ", r[1]
print " R Lambda"
for i in range(len(r[2])):
print "%2d %8.5f %8.5f" % ((i+1), r[2][i], r[3][i])
# Plot the "data"
plt.plot(x, 'b.')
# and mark the outliers.
for i in range(r[0]):
plt.plot(r[1][i], x[r[1][i]], 'rp')
plt.show()
In [4]:
import pandas as pd
MPI_MATRIX = pd.read_csv('potatoes.tsv', sep='\t')
In [8]:
from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from IPython.html.widgets.interaction import interact
# all the metrics avail. in scipy.spatial.distance.pdist
column_names = list(MPI_MATRIX.columns)
columns_dropdown = widgets.DropdownWidget(values=column_names, value='12D39077-7A6C-4BA8-9EBB-CFFF87CD9770')
@interact(column_name=columns_dropdown)
def plot_column(column_name):
plt.plot(MPI_MATRIX[column_name], 'b.')
In [9]:
def apply_generalizedESD(column_name, max_num_outliers=10, significance=0.05):
array = MPI_MATRIX[column_name]
r = pyasl.generalizedESD(array, max_num_outliers, significance,
fullOutput=True)
# Plot the "data"
plt.plot(array, 'b.')
# and mark the outliers.
for i in range(r[0]):
plt.plot(r[1][i], array[r[1][i]], 'rp')
plt.show()
print "Number of outliers: ", r[0]
print "Indices of outliers: ", r[1]
print " R Lambda"
for i in range(len(r[2])):
print "%2d %8.5f %8.5f" % ((i+1), r[2][i], r[3][i])
In [10]:
interact(apply_generalizedESD, column_name=columns_dropdown)
Out[10]: