In [ ]:

    
'''
in this program you can run differnent clustering method for plenty of  DataSet
Author: Mohamadjavad Bahmani <mohamadjavad.bahmani@gmail.com>.
'''
#import your data in DATA, we use our data set beside code file.
DATA = "iris.data"
# import some library
#================================================
from pandas import read_table
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import seaborn
#================================================
def GetData():
    #prepare data for proccessing
    frame = read_table(DATA,encoding='latin-1',
    sep=',',skipinitialspace=True,na_values=['?'],
    index_col=None, header=None,)
    del frame[frame.columns[4]]
    return frame    
#================================================
def GetFeatures(frame):
    #convert data to float
    arr = np.array(frame,dtype=np.float)
    #fill missing values
    from sklearn.preprocessing import Imputer
    imputer = Imputer(strategy='mean')
    arr = imputer.fit_transform(arr)
    #normalize the entire data
    from sklearn.preprocessing import scale
    arr = scale(arr)
    return arr
#=================================================
def ReduceDimension(X):
    from sklearn.decomposition import FastICA 
    reducer = FastICA(n_components=2)
    x_r = reducer.fit_transform(X)
    yield 'ICA',x_r[:,0],x_r[:,1]
 
#=================================================
def Learning(X):
    from sklearn.cluster import AgglomerativeClustering

    learner = AgglomerativeClustering(n_clusters=3)
    y = learner.fit_predict(X)
    yield 'Agglomerative clusters(n=3)', y

    
#=================================================

def Plot(Xs, predictions):

    # We will use subplots to display the results in a grid
    nrows = len(Xs)
    ncols = len(predictions)

    fig = plt.figure(figsize=(16, 8))
    fig.canvas.set_window_title('Clustering data from ' + DATA)

    # Show each element in the plots returned from plt.subplots()
    
    for row, (row_label, X_x, X_y) in enumerate(Xs):
        for col, (col_label, y_pred) in enumerate(predictions):
            ax = plt.subplot(nrows, ncols, row * ncols + col + 1)
            if row == 0:
                plt.title(col_label)
            if col == 0:
                plt.ylabel(row_label)

            # Plot the decomposed input data and use the predicted
            # cluster index as the value in a color map.
            plt.scatter(X_x, X_y, c=y_pred.astype(np.float), cmap='prism', alpha=0.5)
            
            # Set the axis tick formatter to reduce the number of ticks
            ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
            ax.yaxis.set_major_locator(MaxNLocator(nbins=4))

    # Let matplotlib handle the subplot layout
    plt.tight_layout()

    plt.show()

    plt.close()
    
    
    
    
    
    
    
if __name__ == '__main__':
    #import the data set from DATA
    frame = GetData()
    print('1-Downloading data from {} is done'.format(DATA))
    X = GetFeatures(frame)
    print("2-Processing {} samples with {} attributes is done".format(len(frame.index), len(frame.columns)))
    Xs = list(ReduceDimension(X))
    print("3-Reducing dimensionality is done")
    predictions = list(Learning(X))
    print("4-Evaluating clustering learners is done")

    # Display the results
    Plot(Xs, predictions)
    print("5-ploting results is done")