In [ ]:
import numpy as np
import matplotlib as mp
from sklearn import datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
In [ ]:
# Load the sample data set from the datasets module
dataset = datasets.load_iris()
In [ ]:
# Display the data in the test dataset
dataset
In [ ]:
# Species of Iris in the dataset
dataset['target_names']
In [ ]:
# Names of the type of information recorded about an Iris - called features
dataset['feature_names']
In [ ]:
# First 10 sets of Iris data
dataset['data'][:10]
In [ ]:
# The classification of each of the first 10 sets of Iris data - the target
dataset['target'][:10]
Here 0 equates to setosa the first entry in the 'target_names' array
In [ ]:
# Now we create our model
model = LogisticRegression()
# We train it by passing in the test data and the actual results
model.fit(dataset.data, dataset.target)
In [ ]:
# We use the model to create predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# Using the metrics module we see the results of the model
metrics.accuracy_score(expected, predicted, normalize=True, sample_weight=None)
In [ ]:
y_true = ["cat", "ant", "cat", "cat", "ant", "bird", "bird"]
y_pred = ["ant", "ant", "cat", "cat", "ant", "cat", "bird"]
In [ ]:
metrics.accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)
5 correct predictions out of 7 values. 71% accuracy
In [ ]:
print(metrics.classification_report(y_true, y_pred,
target_names=["ant", "bird", "cat"]))
Here we can see that the predictions:
In [ ]:
metrics.confusion_matrix(y_true, y_pred)
In the confusion_matrix the labels give the order of the rows.
In [ ]:
print(metrics.classification_report(expected, predicted,target_names=dataset['target_names']))
In [ ]:
print (metrics.confusion_matrix(expected, predicted))
In the confusion_matrix the labels give the order of the rows.