notebook.community

Edit and run



In [4]:

    
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy import linalg
from scipy import io



In [5]:

    
from sklearn import linear_model as lmd



In [6]:

    
InFile1          = 'LinSepC1.mat'
InFile2          = 'LinSepC2.mat'
C1Dict           = io.loadmat(InFile1)
C2Dict           = io.loadmat(InFile2)
C1               = C1Dict['LinSepC1']
C2               = C2Dict['LinSepC2']
NSampsClass    = 200
NSamps         = 2*NSampsClass



In [7]:

    
### Set Target Outputs ###
TargetOutputs                     =  np.ones((NSamps,1))
TargetOutputs[NSampsClass:NSamps] = -TargetOutputs[NSampsClass:NSamps]



In [8]:

    
AllSamps     = np.concatenate((C1,C2),axis=0)



In [9]:

    
AllSamps.shape









    Out[9]:





(400, 2)



In [10]:

    
#import sklearn
#sklearn.__version__



In [11]:

    
LinMod = lmd.LinearRegression.fit?



In [ ]:

    
LinMod = lmd.LinearRegression.fit



In [12]:

    
M = lmd.LinearRegression()



In [13]:

    
print(M)









    



LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [15]:

    
LinMod = lmd.LinearRegression.fit(M, AllSamps, TargetOutputs, sample_weight=None)



In [16]:

    
R = lmd.LinearRegression.score(LinMod, AllSamps, TargetOutputs, sample_weight=None)



In [17]:

    
print(R)









    



0.911269176982



In [18]:

    
LinMod









    Out[18]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [19]:

    
w = LinMod.coef_
w









    Out[19]:





array([[ 0.81592447,  0.94178188]])



In [20]:

    
w0 = LinMod.intercept_
w0









    Out[20]:





array([-0.01663028])



In [21]:

    
### Question:  How would we compute the outputs of the regression model?

Learn About Kernels

Do some SVM Classification



In [22]:

    
from sklearn.svm import SVC



In [23]:

    
### SVC wants a 1d array, not a column vector
Targets = np.ravel(TargetOutputs)



In [24]:

    
InitSVM = SVC()
InitSVM









    Out[24]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)



In [25]:

    
TrainedSVM = InitSVM.fit(AllSamps, Targets)



In [26]:

    
y = TrainedSVM.predict(AllSamps)



In [27]:

    
plt.figure(1)
plt.plot(y)
plt.show()



In [28]:

    
d = TrainedSVM.decision_function(AllSamps)



In [29]:

    
plt.figure(1)
plt.plot(d)
plt.show()

Can try it with Outliers if we have time

Let's look at some spectra



In [30]:

    
### Look at some Pine and Oak spectra from
### NEON Site D03 Ordway-Swisher Biological Station
### at UF
### Pinus palustris
### Quercus virginiana
InFile1 = 'Pines.mat'
InFile2 = 'Oaks.mat'
C1Dict  = io.loadmat(InFile1)
C2Dict  = io.loadmat(InFile2)
Pines   = C1Dict['Pines']
Oaks    = C2Dict['Oaks']



In [31]:

    
WvFile  = 'NEONWvsNBB.mat'
WvDict  = io.loadmat(WvFile)
Wv      = WvDict['NEONWvsNBB']



In [32]:

    
Pines.shape









    Out[32]:





(809, 346)



In [33]:

    
Oaks.shape









    Out[33]:





(1731, 346)



In [34]:

    
NBands=Wv.shape[0]
print(NBands)

Notice that these training sets are unbalanced



In [35]:

    
NTrainSampsClass = 600
NTestSampsClass  = 200
Targets          = np.ones((1200,1))
Targets[range(600)] = -Targets[range(600)]
Targets             = np.ravel(Targets)
print(Targets.shape)



In [36]:

    
plt.figure(111)
plt.plot(Targets)
plt.show()



In [37]:

    
TrainPines = Pines[0:600,:]
TrainOaks  = Oaks[0:600,:]
#TrainSet   = np.concatenate?



In [38]:

    
TrainSet   = np.concatenate((TrainPines, TrainOaks), axis=0)
print(TrainSet.shape)









    



(1200, 346)



In [39]:

    
plt.figure(3)
### Plot Pine Training Spectra ###
plt.subplot(121)
plt.plot(Wv, TrainPines.T)
plt.ylim((0.0,0.8))
plt.xlim((Wv[1], Wv[NBands-1]))
### Plot Oak Training Spectra ###
plt.subplot(122)
plt.plot(Wv, TrainOaks.T)
plt.ylim((0.0,0.8))
plt.xlim((Wv[1], Wv[NBands-1]))
plt.show()



In [40]:

    
InitSVM= SVC()



In [41]:

    
TrainedSVM=InitSVM.fit(TrainSet, Targets)

d = TrainedSVM.decision_function(TrainSet)



In [42]:

    
plt.figure(4)
plt.plot(d)
plt.show()

Does this seem to be too good to be true?



In [43]:

    
TestPines = Pines[600:800,:]
TestOaks  = Oaks[600:800,:]



In [44]:

    
TestSet = np.concatenate((TestPines, TestOaks), axis=0)
print(TestSet.shape)



In [45]:

    
dtest = TrainedSVM.decision_function(TestSet)



In [46]:

    
plt.figure(5)
plt.plot(dtest)
plt.show()

Yeah, too good to be true...What can we do?

Error Analysis: Identify characteristics of Errors, Try different Magic Numbers using Cross Validation, etc.



In [ ]: