notebook.community

Edit and run



In [1]:

    
import numpy as np
from sklearn import neighbors, preprocessing
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib import style

%matplotlib notebook

# Importing Datasets.
df = pd.read_csv('breastdata.txt')
df.replace('?',-99999,inplace=True)

# Dropping features that do not help in clustering.
df.drop(['id'],1,inplace=True)

# Establishing Objective. Clustering done based on the label.
label = 'class'



In [2]:

    
# Fitting.
# X axis has everything but the labels, and Y axis has only labels.
X = np.array(df.drop([label],1))
y = np.array(df[label])
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA','#00AAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00','#00AAFF'])
plt.scatter(X[:, 7], X[:, 8],s=200,c=y, cmap=cmap_bold)
plt.scatter(X[:, 6], X[:, 7],s=150,c=y, cmap=cmap_bold)
plt.scatter(X[:, 3], X[:, 4],s=100,c=y, cmap=cmap_bold)
plt.scatter(X[:, 1], X[:, 2],s=50,c=y, cmap=cmap_bold)
plt.scatter(X[:, 0], X[:, 1],s=10,c=y, cmap=cmap_bold)


# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

# Actual K Nearest Neighbor Classifier Training.
clf = neighbors.KNeighborsClassifier()
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
print(accuracy)

plt.xlim(0, 20)
plt.ylim(0, 20)
plt.title("Data Points(2 features)")
plt.show()









    














    











    



0.978571428571



In [3]:

    
# Scoring and Predicting
example_measures = np.array([[9,2,2,3,2,2,5,4,2], [4,2,1,1,1,2,3,2,1], [3,2,2,5,2,2,5,4,2]])
# example_measures = example_measures.reshape(len(example_measures),-1)
prediction = clf.predict(example_measures)

# Printing
color = 'r'
for idx,x in enumerate(prediction):
    print(example_measures[idx], end=', ')
    if(x==4):
        print("Malignant")
    elif(x==2):
        print("Benign")









    



[9 2 2 3 2 2 5 4 2], Malignant
[4 2 1 1 1 2 3 2 1], Benign
[3 2 2 5 2 2 5 4 2], Benign



In [ ]: