Here we generate a and load the data and set the relevant parameters



In [8]:

    
import numpy as np
import matplotlib.pyplot as plt 
from sklearn.cluster import KMeans
# Active in line mode 
%pylab inline  
# This is for making plots appeared in the document and not as an external window









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['mean', 'cov']
`%matplotlib` prevents importing * from pylab and numpy



In [9]:

    
#Quantityt of data 
N_total = 1000 
# Percentage of data is that is anomalous 
p_anomaly = 0.10
N_normal = (1 - p_anomaly) * N_total
N_anomal = p_anomaly * N_total
# Make the number integeres
N_normal = int(N_normal)
N_anomal = int(N_anomal)



In [27]:

    
# Generate anomaly data
mean = np.array([5, 5])
cov = np.array([[1, 0], [0, 1]])
data_anomal = np.random.multivariate_normal(mean, cov, N_anomal)



In [28]:

    
# Generate normal data 
mean = np.array([0, 0])
cov = np.array([[1, 0], [0, 1]])
data_normal = np.random.multivariate_normal(mean, cov, N_normal)



In [29]:

    
# Gather the data in one vector
data = np.concatenate([data_normal, data_anomal])



In [30]:

    
print data_normal.shape
print data_anomal.shape
print data.shape









    



(900, 2)
(100, 2)
(1000, 2)



In [31]:

    
plt.plot(data_normal[:,0], data_normal[:,1], '*r')
plt.plot(data_anomal[:,0], data_anomal[:, 1], '*b')
plt.show()

This is an exmaple of header

Now we use the K-means algorithm to cluster the data



In [51]:

    
clusters = 2
anomaly_distance = 4
k_means = KMeans(n_clusters=clusters, init='k-means++', n_init=10, max_iter=300, tol=0.0001, precompute_distances=True)



In [52]:

    
k_means.fit(data)









    Out[52]:





KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=2, n_init=10,
    n_jobs=1, precompute_distances=True, random_state=None, tol=0.0001,
    verbose=0)



In [53]:

    
cluster_centers = k_means.cluster_centers_



In [54]:

    
data_labels = k_means.labels_



In [57]:

    
circle_1 = plt.Circle(cluster_centers[0], anomaly_distance, color='b', fill=False)
circle_2 = plt.Circle(cluster_centers[1], anomaly_distance, color='b', fill=False)
plt.plot(data[:,0], data[:, 1], '*')
fig = plt.gcf()
fig.gca().add_artist(circle_1)
fig.gca().add_artist(circle_2)
plt.show()



In [ ]: