In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [2]:
df = pd.read_csv("/data/iris.csv")
df.head()
Out[2]:
In [3]:
features = ["SepalLengthCm", "PetalLengthCm"]
In [4]:
df.Species.value_counts()
Out[4]:
In [5]:
fig, ax = plt.subplots()
colors = ["red", "green", "blue"]
for i, v in enumerate(df.Species.unique()):
df[df.Species == v].plot.scatter(features[0], features[1], label = v
, ax = ax, color = colors[i])
In [6]:
from sklearn import *
from mlxtend.plotting import plot_decision_regions
In [7]:
y = np.where(df.Species == "Iris-setosa", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=2
# , include_bias=False)),
("scaler", preprocessing.StandardScaler()),
("est", linear_model.LogisticRegression(random_state = 1, solver="lbfgs"))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[7]:
In [8]:
y = np.where(df.Species == "Iris-virginica", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=2
# , include_bias=False)),
("scaler", preprocessing.StandardScaler()),
("est", linear_model.LogisticRegression(random_state = 1, solver="lbfgs"))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[8]:
In [9]:
y = np.where(df.Species == "Iris-versicolor", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=2
# , include_bias=False)),
("scaler", preprocessing.StandardScaler()),
("est", linear_model.LogisticRegression(random_state = 1, solver="lbfgs"))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[9]:
In [10]:
y = np.where(df.Species == "Iris-versicolor", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
("poly", preprocessing.PolynomialFeatures(degree=4
, include_bias=False)),
("scaler", preprocessing.StandardScaler()),
("est", linear_model.LogisticRegression(random_state = 1, solver="lbfgs"))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[10]:
In [11]:
y = np.where(df.Species == "Iris-versicolor", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=4
# , include_bias=False)),
#("scaler", preprocessing.StandardScaler()),
("est", tree.DecisionTreeClassifier(random_state = 1, max_depth=3))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[11]:
In [16]:
y = np.where(df.Species == "Iris-versicolor", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=4
# , include_bias=False)),
#("scaler", preprocessing.StandardScaler()),
("est", ensemble.RandomForestClassifier(random_state = 1, max_depth=3, n_estimators=20))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[16]:
In [13]:
y = np.where(df.Species == "Iris-versicolor", 1, 0)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=4
# , include_bias=False)),
#("scaler", preprocessing.StandardScaler()),
("est", neighbors.KNeighborsClassifier(n_neighbors=5))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
print("precision:", metrics.precision_score(y_test, y_test_pred))
print("recall:", metrics.recall_score(y_test, y_test_pred))
print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[13]:
In [14]:
y = preprocessing.LabelEncoder().fit_transform(df.Species)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
#("poly", preprocessing.PolynomialFeatures(degree=4
# , include_bias=False)),
#("scaler", preprocessing.StandardScaler()),
("est", neighbors.KNeighborsClassifier(n_neighbors=5))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
#print("precision:", metrics.precision_score(y_test, y_test_pred))
##print("recall:", metrics.recall_score(y_test, y_test_pred))
#print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[14]:
In [15]:
y = preprocessing.LabelEncoder().fit_transform(df.Species)
X = df[features].values
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y
, test_size = 0.3, random_state = 1)
pipe = pipeline.Pipeline([
("poly", preprocessing.PolynomialFeatures(degree=3
, include_bias=False)),
("scaler", preprocessing.StandardScaler()),
("est", linear_model.LogisticRegression(random_state=1
, multi_class="ovr"
, solver = "liblinear"))
])
pipe.fit(X_train, y_train)
y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)
y_test_prob = pipe.predict_proba(X_test)[:,1]
print("accuracy:", metrics.accuracy_score(y_test, y_test_pred))
#print("precision:", metrics.precision_score(y_test, y_test_pred))
##print("recall:", metrics.recall_score(y_test, y_test_pred))
#print("f1_score:", metrics.f1_score(y_test, y_test_pred))
plt.figure(figsize = (8,8))
plot_decision_regions(X, y, pipe, X_highlight=X_test)
Out[15]:
In [ ]: