notebook.community

Edit and run



In [32]:

    
from numpy import *
def loadSimpData():
    dataMat = matrix([[1., 2.1], [2., 1.1], [1.3, 1.], [1., 1.], [2., 1.]])
    classLabels=[1.0, 1.0, -1.0, -1.0, 1.0]
    return dataMat, classLabels



In [35]:

    
%matplotlib inline
import matplotlib 
from matplotlib import pyplot as plt
def plotData(dataMat, classLabels):
    c1, c2 = [], []
    dataMat = dataMat.getA()
    #print(dataMat)
    for index in range(len(classLabels)):
        if classLabels[index] == 1.0:
            c1.append(dataMat[index])
        else:
            c2.append(dataMat[index])
    #print(c1)
    c1 = array(c1)
    c2 = array(c2)
    #print(c1)
    plt.scatter(c1[:, 0], c1[:, 1])
    plt.scatter(c2[:, 0], c2[:, 1], marker='s')
    plt.show()



In [36]:

    
dataMat, classLabels=loadSimpData()
plotData(dataMat, classLabels)



In [38]:

    
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
    retArray = ones((shape(dataMatrix)[0], 1))
    if threshIneq == 'lt':
        retArray[dataMatrix[:, dimen] <= threshVal] = -1.0
    else:
        retArray[dataMatrix[:, dimen] > threshVal] = 1.0
    return retArray
def buildStump(dataArr,classLabels,D):
    dataMatrix = mat(dataArr)
    labelMat = mat(classLabels).T
    m,n = shape(dataMatrix)
    numSteps = 10.0
    bestStump = {}
    bestClassEst = mat(zeros((m,1)))
    minError = inf
    for i in range(n):
        rangeMin = dataMatrix[:,i].min()
        rangeMax = dataMatrix[:,i].max()
        stepSize = (rangeMax - rangeMin) / numSteps
        for j in range(-1, int(numSteps)+1):
            for inequal in ['lt', 'gt']:
                threshVal = (rangeMin + float(j) * stepSize)
                predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
                errArr = mat(ones((m,1)))
                errArr[predictedVals == labelMat] = 0
                weightedError = D.T*errArr
                print("split: dim %d, thresh %.2f, thresh inequal: %s, the weighted error is %.3f" 
                      %(i,threshVal,inequal,weightedError))
                if weightedError < minError:
                    minError = weightedError
                    bestClassEst = predictedVals.copy()
                    bestStump['dim'] = i
                    bestStump['thresh'] = threshVal
                    bestStump['ineq'] = inequal
        return bestStump,minError,bestClassEst



In [ ]: