In [1]:
%matplotlib inline

In [2]:
import tad

In [4]:
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from pandas.tools.plotting import scatter_matrix
from sklearn import datasets
from sklearn.decomposition import PCA
from tad.TADClassifier import tad_classify

iris = datasets.load_iris()
df = pd.DataFrame(iris.data)
res = tad_classify(df)

# df['anomaly']=0
# df.anomaly.ix[res['classed']['anomalies']] = 1
# scatter_matrix(df.ix[:,:4], c=df.anomaly, s=(25 + 50*df.anomaly), alpha=.8)
# plt.show()

# print 'Anomalies:', res['classed']['anomalies']
# g = res['g']
# X_pca = PCA().fit_transform(df)
# pos = dict((i,(X_pca[i,0], X_pca[i,1])) for i in range(X_pca.shape[0]))
# colors = [node[1]['color'] for node in g.nodes(data=True)]
# labels = {}
# for node in g.nodes():
#     if node in res['classed']['anomalies']:
#         labels[node] = node
#     else:
#         labels[node] = ''
# nx.draw(g, pos=pos, node_color = colors, labels=labels)
# plt.show()


q: 1117
r: 0.59160797831
min_pts_bgnd: 15.0

In [5]:
from IPython.html import widgets # Widget definitions
from IPython.display import display # Used to display widgets in the notebook

from IPython.html.widgets.interaction import interact

# all the metrics avail. in scipy.spatial.distance.pdist
distance_metrics = [
    'braycurtis', 'canberra', 'chebyshev', 'cityblock',
    'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
    'jaccard', 'kulsinski', 'mahalanobis', 'matching',
    'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
    'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'
]

metrics_dropdown = widgets.DropdownWidget(values=distance_metrics, value='euclidean')

In [6]:
def anomalies_graph(method, p, r):
    iris = datasets.load_iris()
    df = pd.DataFrame(iris.data)
    res = tad_classify(df, method=method, p=p, r=r)
    print 'Anomalies:', res['classed']['anomalies']
    g = res['g']
    X_pca = PCA().fit_transform(df)
    pos = dict((i,(X_pca[i,0], X_pca[i,1])) for i in range(X_pca.shape[0]))
    colors = [node[1]['color'] for node in g.nodes(data=True)]
    labels = {}
    for node in g.nodes():
        if node in res['classed']['anomalies']:
            labels[node] = node
        else:
            labels[node] = ''
    nx.draw(g, pos=pos, node_color = colors, labels=labels)
    plt.show()

In [7]:
@interact(method=metrics_dropdown, p=[0.0, 1.0], r=[0.0, 1.0])
def classifier_demo(method, p, r):
    iris = datasets.load_iris()
    df = pd.DataFrame(iris.data)
    res = tad_classify(df, method=method, p=p, r=r)
    df['anomaly']=0
    df.anomaly.ix[res['classed']['anomalies']] = 1
    scatter_matrix(df.ix[:,:4], c=df.anomaly, s=(25 + 50*df.anomaly), alpha=.8)
    plt.show()

# TODO: test w/ graph
# TODO: mpld3?

# TODO: can this be used w/ R code?
# http://www.youtube.com/watch?v=ucJ2-5a2CAA
# Combining the powerful worlds of Python and R
# (https://rforge.net/Rserve/ "Rserve"), and [pyRserve](https://pypi.python.org/pypi/pyRserve/ "pyRserve")


r: 0.5
min_pts_bgnd: 75.0

In [8]:
metrics = widgets.DropdownWidget(values=distance_metrics, value='euclidean')
interact(anomalies_graph, method=metrics, p=[0.0, 1.0], r=[0.0, 1.0])


r: 0.5
min_pts_bgnd: 75.0
Anomalies: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 57, 98, 60, 93, 105, 122, 118, 68, 87, 131, 117]
Out[8]:
<function __main__.anomalies_graph>