Python-CGLS:



In [17]:

    
import numpy as np
from sklearn.linear_model import Ridge



In [18]:

    
def line_search(weights, weights_bar, lmbda, outputs, outputs_bar, y, c, d, num_of_examples):

    diff = y * (outputs_bar - outputs)
    #p = np.where(diff != 0).shape[0] # number of breakpoints(?)
    neg_idxs = np.where(y * outputs < 1 & diff < 0)[0]
    pos_idxs = np.where(y * outputs < 1 & diff > 0)[0]
    
    # compute L, R from  sorted_idxs
    s = np.zeros_like(y)
    s[neg_idxs] = -1
    s[pos_idxs] = 1
    
    omegaL = np.sum(weights * (weights_bar - weights) * lmbda)
    omegaL = np.sum(weights_bar * (weights_bar - weights) * lmbda)
    
    all_idxs = np.where(y * outputs < 1)
    L = np.sum((o[all_idxs] - y[all_idxs]) * 
               (C[all_idxs]*(outputs_bar[all_idxs] - outputs[all_idxs]))) 
    L += omegaL
    R = np.sum((o_bar[all_idxs] - y[all_idxs]) * 
               (C[all_idxs]*(outputs_bar[all_idxs] - outputs[all_idxs]))) 
    R += omegaR
    
    delta_prime = 0.0
    
    np_idxs = np.union1d(neg_idxs, pos_idxs) # negative AND positives indices
    deltas = (1 - y[np_idxs] * outputs[np_idxs])/diff[np_idxs]
    sorted_delta_ids = np.argsort(deltas)

    for sid in sorted_delta_ids:
        ii = np_idxs[sid]  # instance id
        
        delta_prime = L + deltas[sid] * (R - L)
        if delta_prime >= 0:
            break
        d = s[ii] * c[ii] * (outputs_bar[ii] - outputs[ii])
        L += d * (outputs[ii] - y[ii])
        R += d * (outputs_bar[ii] - y[ii])

    return -L/(R - L)



In [54]:

    
def cgls(x, y, c):
    alpha = 1.0
    x_p = np.diag(1.0/np.sqrt(c))*x
    r = Ridge(alpha=alpha, fit_intercept=False, solver='sparse_cg')
    r.fit(x_p, y)
    return r

Run CGLS:



In [20]:

    
from sklearn.datasets import load_svmlight_file



In [21]:

    
%time X, y = sklearn.datasets.load_svmlight_file('qn-s3vm-2014-paper/svmlight.testL.90')









    



CPU times: user 16.3 ms, sys: 717 µs, total: 17.1 ms
Wall time: 16.5 ms



In [53]:

    
c = np.ones(X.shape[0])
res = cgls(X, y, c)









    



(90, 20883) (90,)



In [55]:

    
res.coef_.shape









    Out[55]:





(20883,)



In [62]:

    
y_prediction = res.predict(X)



In [66]:

    
y_predicted_labels = np.array(map(lambda k: -1.0 if k < 0 else 1.0, y_prediction))



In [67]:

    
np.mean(np.abs(y_predicted_labels - y))









    Out[67]:





0.0



In [81]:

    
import pandas as pd









    



/usr/local/Cellar/python/2.7.6/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /usr/local/Cellar/python/2.7.6/Frameworks/Python.framework/Versions/2.7/lib/python2.7/argparse.pyc, but /usr/local/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream



In [114]:

    
df = pd.read_csv('qn-s3vm-2014-paper/svmlin.testL.examples.90.outputs', header=None, names=['blah'])
df2 = pd.Series(y_predicted_labels)



In [115]:

    
df = df.blah.map(lambda k: -1.0 if k < 0 else 1.0)



In [116]:

    
df.shape, df2.shape









    Out[116]:





((90,), (90,))



In [118]:

    
(df - df2).mean()









    Out[118]:





0.0



In [73]:

    
import matplotlib.pyplot as plt



In [74]:

    
plt.hist(y_prediction)









    Out[74]:





(array([ 26.,  35.,   1.,   0.,   0.,   0.,   0.,   0.,   4.,  24.]),
 array([-0.74527391, -0.60895801, -0.47264212, -0.33632622, -0.20001032,
        -0.06369443,  0.07262147,  0.20893737,  0.34525326,  0.48156916,
         0.61788506]),
 <a list of 10 Patch objects>)