In [1]:
import pandas as pd
from pylab import rcParams
import seaborn as sb
import matplotlib.pyplot as plt

from sklearn.cluster import DBSCAN
from collections import Counter

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [3]:
%matplotlib inline
rcParams['figure.figsize']=7,4
sb.set_style('whitegrid')

In [29]:
from sklearn import datasets
# import some data to play with

iris= datasets.load_iris()

X = iris.data[:, [1,2]]  # only take two features
y = iris.target

#columns=['Sepal Length','Sepal Width','Petal Length','Petal Width']
columns=['Sepal Width','Petal Length']
df = pd.DataFrame(data=X,columns=columns)
df.head()


Out[29]:
Sepal Width Petal Length
0 3.5 1.4
1 3.0 1.4
2 3.2 1.3
3 3.1 1.5
4 3.6 1.4

In [71]:
model = DBSCAN(eps=0.5, min_samples=19).fit(X)
model


Out[71]:
DBSCAN(algorithm='auto', eps=0.5, leaf_size=30, metric='euclidean',
    min_samples=19, n_jobs=1, p=None)

In [72]:
outliers_df = pd.DataFrame(df)

print(Counter(model.labels_))
outliers_df[model.labels_==-1].head(4)


Counter({1: 91, 0: 48, -1: 11})
Out[72]:
Sepal Width Petal Length
15 4.4 1.5
41 2.3 1.3
57 2.4 3.3
60 2.0 3.5

In [75]:
fig = plt.figure()
ax = fig.add_axes([.1,.1,1,1])

colors = model.labels_
#colors = ['r','b','orange']

def labels_to_colors(x):
    if x == -1:
        return 'red'
    elif x == 0 :
        return 'blue'
    elif x == 1:
        return 'black'
    else:
        return 'grey'

colors = list(colors)
colors = list(map(lambda x: labels_to_colors(x), colors))


x = X[:,1]
y = X[:,0]
ax.scatter(x,y,c=colors,s=120,marker='o')

ax.set_xlabel(columns[1])
ax.set_ylabel(columns[0])


Out[75]:
<matplotlib.text.Text at 0x11e836b38>

In [ ]:


In [ ]:


In [ ]: