In [32]:
from numpy import *
def loadSimpData():
dataMat = matrix([[1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.]])
classLabels=[1.0, 1.0, -1.0, -1.0, 1.0]
return dataMat, classLabels
In [35]:
%matplotlib inline
import matplotlib
from matplotlib import pyplot as plt
def plotData(dataMat, classLabels):
c1, c2 = [], []
dataMat = dataMat.getA()
#print(dataMat)
for index in range(len(classLabels)):
if classLabels[index] == 1.0:
c1.append(dataMat[index])
else:
c2.append(dataMat[index])
#print(c1)
c1 = array(c1)
c2 = array(c2)
#print(c1)
plt.scatter(c1[:, 0], c1[:, 1])
plt.scatter(c2[:, 0], c2[:, 1], marker='s')
plt.show()
In [36]:
dataMat, classLabels=loadSimpData()
plotData(dataMat, classLabels)
In [38]:
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
retArray = ones((shape(dataMatrix)[0], 1))
if threshIneq == 'lt':
retArray[dataMatrix[:, dimen] <= threshVal] = -1.0
else:
retArray[dataMatrix[:, dimen] > threshVal] = 1.0
return retArray
def buildStump(dataArr,classLabels,D):
dataMatrix = mat(dataArr)
labelMat = mat(classLabels).T
m,n = shape(dataMatrix)
numSteps = 10.0
bestStump = {}
bestClassEst = mat(zeros((m,1)))
minError = inf
for i in range(n):
rangeMin = dataMatrix[:,i].min()
rangeMax = dataMatrix[:,i].max()
stepSize = (rangeMax - rangeMin) / numSteps
for j in range(-1, int(numSteps)+1):
for inequal in ['lt', 'gt']:
threshVal = (rangeMin + float(j) * stepSize)
predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
errArr = mat(ones((m,1)))
errArr[predictedVals == labelMat] = 0
weightedError = D.T*errArr
print("split: dim %d, thresh %.2f, thresh inequal: %s, the weighted error is %.3f"
%(i,threshVal,inequal,weightedError))
if weightedError < minError:
minError = weightedError
bestClassEst = predictedVals.copy()
bestStump['dim'] = i
bestStump['thresh'] = threshVal
bestStump['ineq'] = inequal
return bestStump,minError,bestClassEst
In [ ]: