In [1]:
%matplotlib inline
In [2]:
import tad
In [4]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import scatter_matrix
from sklearn import datasets
from sklearn.decomposition import PCA
from tad.TADClassifier import tad_classify
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
res = tad_classify(df)
# df['anomaly']=0
# df.anomaly.ix[res['classed']['anomalies']] = 1
# scatter_matrix(df.ix[:,:4], c=df.anomaly, s=(25 + 50*df.anomaly), alpha=.8)
# plt.show()
# print 'Anomalies:', res['classed']['anomalies']
# g = res['g']
# X_pca = PCA().fit_transform(df)
# pos = dict((i,(X_pca[i,0], X_pca[i,1])) for i in range(X_pca.shape[0]))
# colors = [node[1]['color'] for node in g.nodes(data=True)]
# labels = {}
# for node in g.nodes():
# if node in res['classed']['anomalies']:
# labels[node] = node
# else:
# labels[node] = ''
# nx.draw(g, pos=pos, node_color = colors, labels=labels)
# plt.show()
In [5]:
from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook
from IPython.html.widgets.interaction import interact
# all the metrics avail. in scipy.spatial.distance.pdist
distance_metrics = [
'braycurtis', 'canberra', 'chebyshev', 'cityblock',
'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
'jaccard', 'kulsinski', 'mahalanobis', 'matching',
'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'
]
metrics_dropdown = widgets.DropdownWidget(values=distance_metrics, value='euclidean')
In [6]:
def anomalies_graph(method, p, r):
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
res = tad_classify(df, method=method, p=p, r=r)
print 'Anomalies:', res['classed']['anomalies']
g = res['g']
X_pca = PCA().fit_transform(df)
pos = dict((i,(X_pca[i,0], X_pca[i,1])) for i in range(X_pca.shape[0]))
colors = [node[1]['color'] for node in g.nodes(data=True)]
labels = {}
for node in g.nodes():
if node in res['classed']['anomalies']:
labels[node] = node
else:
labels[node] = ''
nx.draw(g, pos=pos, node_color = colors, labels=labels)
plt.show()
In [7]:
@interact(method=metrics_dropdown, p=[0.0, 1.0], r=[0.0, 1.0])
def classifier_demo(method, p, r):
iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
res = tad_classify(df, method=method, p=p, r=r)
df['anomaly']=0
df.anomaly.ix[res['classed']['anomalies']] = 1
scatter_matrix(df.ix[:,:4], c=df.anomaly, s=(25 + 50*df.anomaly), alpha=.8)
plt.show()
# TODO: test w/ graph
# TODO: mpld3?
# TODO: can this be used w/ R code?
# http://www.youtube.com/watch?v=ucJ2-5a2CAA
# Combining the powerful worlds of Python and R
# (https://rforge.net/Rserve/ "Rserve"), and [pyRserve](https://pypi.python.org/pypi/pyRserve/ "pyRserve")
In [8]:
metrics = widgets.DropdownWidget(values=distance_metrics, value='euclidean')
interact(anomalies_graph, method=metrics, p=[0.0, 1.0], r=[0.0, 1.0])
Out[8]: