In [1]:
import pandas as pd
import numpy as np
In [ ]:
iris = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
header = False)
print iris.shape
print iris.columns
print iris.head()
print iris.describe
In [17]:
# renaming columns
iris.columns = ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Species"]
In [49]:
# normalization function
def normalize(x):
num = x - np.min(x)
denom = np.max(x) - np.min(x)
return (num / denom)
In [52]:
# normalization
iris_subset = iris.iloc[:, 1:4]
iris_norm = (iris_subset - np.min(iris_subset)) / (np.max(iris_subset) - np.min(iris_subset))
print iris_subset.head()
print iris_norm.head()
In [57]:
# Train test split
from sklearn.cross_validation import train_test_split
iris_features = iris_norm
iris_labels = iris['Species']
features_train, features_test, labels_train, labels_test = train_test_split(
iris_features, iris_labels, test_size = 0.33, random_state = 77)
In [66]:
# Machine learning
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(features_train, labels_train)
predictions = knn.predict(features_test)
Out[66]:
In [69]:
# Model evaluation
from sklearn.metrics import accuracy_score
accuracy_score(labels_test, predictions)
Out[69]: