In [1]:
%matplotlib inline
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy import *
import sys
import csv
In [34]:
input_filename = "iris.data.txt"
att = pd.read_csv(input_filename, sep=',', header=None)
H = att.iloc[:,0:-1] # Get content to be trained
H = np.c_[np.ones(len(H)), H]
y = np.where(att.iloc[:,-1]=="Iris-setosa", 1.0, 0.0)
label = list(["Iris-virginica/versicolor","Iris-setosa"])
In [3]:
def compute_norma(vector):
norma = np.sqrt( np.sum( vector ** 2 ) )
return norma
In [16]:
def sigmoid(x):
sig = 1 / ( 1 + exp( - x ) )
return sig
In [17]:
def step_gradient(H, w_current, y, learning_rate):
diff = y - sigmoid( np.dot( H, w_current ) )
partial = np.sum( ( diff * ( H.transpose() ) ).transpose(), axis = 0 )
norma = compute_norma(partial)
w = w_current + ( learning_rate * partial )
return [w, norma]
In [21]:
def gradient_ascendent(H, y, learning_rate, epsilon):
w = np.zeros((H.shape[1])) #has the same size of output
num_iterations = 0
gradient = 1
while(gradient > epsilon):
[w, gradient] = step_gradient(H, w, y, learning_rate)
num_iterations += 1
return [w, num_iterations, gradient]
In [22]:
learning_rate = 0.0053
epsilon = 0.001
[w, num_iterations, norm_gradient] = gradient_ascendent(H, y, learning_rate, epsilon)
print("Norma: {0}\nw: {1}\nnum_iterations: {2}\n\n".format(norm_gradient, w, num_iterations))
In [31]:
# C value is used as regularization factor
# This is a inverse function, a high C value turn off regularization
reg = LogisticRegression(C=1e15)
reg.fit(H[:,1:], y)
print("\nCoef with scikit-learn: {0}".format(reg.coef_))
print("\nIntercept with scikit-learn: {0}".format(reg.intercept_))
In [104]:
#Return the flower name and probability
def predict(w, x, label):
pred = sigmoid( np.dot( w, x.transpose() ) )
class_name = np.where( np.round(pred), label[1], label[0] ) #Flower name
pred = np.where( pred<0.5, 1-pred, pred ) # Flower probability
return [ class_name, np.around( pred*100, 3 ) ]
In [105]:
# Iris-virginica Iris-versicolor Iris-setosa
x = np.array( [[1,7.2,3.2,6.0,1.8], [1,5.0,2.3,3.3,1.0], [1,5.1,3.8,1.5,0.3]] )
[class_name, prob] = predict(w, x, label)
for name, p in zip( class_name, prob ):
print("Class: {0}, probability: {1}%.".format(name, p))