In [61]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline
In [62]:
# Load Iris dataset from csv file
data_url = 'https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/datasets/iris.csv'
df = pd.read_csv(data_url)
In [63]:
df.head()
Out[63]:
In [64]:
# Let's remove first colum (Unnamed: 0)
# iloc means index location
# REMEMBER: Python is a zero-indexed programming language
# [:, 1:] - : means select all rows; 1: - means select all columns starting from second column(index 1)
df = df.iloc[:, 1:]
df.head()
Out[64]:
In [65]:
# Let's rename the columns to remove '.' in names
new_columns = ["Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width", "Species"]
df.columns = new_columns
In [66]:
df.head()
Out[66]:
In [67]:
# Total number of unique Species
df['Species'].unique()
Out[67]:
In [68]:
# Create a new column named Class and convert Species names into numeric labels
df['Class'] = df['Species'].map({'setosa': 0, 'versicolor': 1, 'virginica': 2})
df.head()
Out[68]:
In [69]:
# Select 2 classes (0 and 1) for predictions
df_two = df[df['Class'] != 2]
In [70]:
# Get basic statistics
df_two.describe()
Out[70]:
In [71]:
# import Logistic regression classifier
from sklearn.linear_model import LogisticRegression
In [72]:
# Assign features to X and Class(label) to y
# Selecting all rows and all columns besides the last two
X = df_two.iloc[:, :-2].values
# Select all rows and the last colomn
y = df_two.iloc[:, -1]
In [73]:
# Split data into a training and testing datasets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)
In [74]:
# Initialize the model
logistic = LogisticRegression()
# train the model
logistic.fit(X_train, y_train)
# Get accuracy score of testing data
score = logistic.score(X_test, y_test)
print("Accuracy is: {}%".format(score.round(4) * 100))
In [75]:
# Predict probabilities instead of labels
logistic.predict_proba(X_test)[:5]
Out[75]:
In [76]:
df = pd.read_csv(data_url)
df.head()
Out[76]:
In [ ]: