In [1]:
import pandas as pd
from pylab import rcParams
import seaborn as sb
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from collections import Counter
In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
In [3]:
%matplotlib inline
rcParams['figure.figsize']=7,4
sb.set_style('whitegrid')
In [29]:
from sklearn import datasets
# import some data to play with
iris= datasets.load_iris()
X = iris.data[:, [1,2]] # only take two features
y = iris.target
#columns=['Sepal Length','Sepal Width','Petal Length','Petal Width']
columns=['Sepal Width','Petal Length']
df = pd.DataFrame(data=X,columns=columns)
df.head()
Out[29]:
In [71]:
model = DBSCAN(eps=0.5, min_samples=19).fit(X)
model
Out[71]:
In [72]:
outliers_df = pd.DataFrame(df)
print(Counter(model.labels_))
outliers_df[model.labels_==-1].head(4)
Out[72]:
In [75]:
fig = plt.figure()
ax = fig.add_axes([.1,.1,1,1])
colors = model.labels_
#colors = ['r','b','orange']
def labels_to_colors(x):
if x == -1:
return 'red'
elif x == 0 :
return 'blue'
elif x == 1:
return 'black'
else:
return 'grey'
colors = list(colors)
colors = list(map(lambda x: labels_to_colors(x), colors))
x = X[:,1]
y = X[:,0]
ax.scatter(x,y,c=colors,s=120,marker='o')
ax.set_xlabel(columns[1])
ax.set_ylabel(columns[0])
Out[75]:
In [ ]:
In [ ]:
In [ ]: