Python-CGLS:


In [17]:
import numpy as np
from sklearn.linear_model import Ridge

In [18]:
def line_search(weights, weights_bar, lmbda, outputs, outputs_bar, y, c, d, num_of_examples):

    diff = y * (outputs_bar - outputs)
    #p = np.where(diff != 0).shape[0] # number of breakpoints(?)
    neg_idxs = np.where(y * outputs < 1 & diff < 0)[0]
    pos_idxs = np.where(y * outputs < 1 & diff > 0)[0]
    
    # compute L, R from  sorted_idxs
    s = np.zeros_like(y)
    s[neg_idxs] = -1
    s[pos_idxs] = 1
    
    omegaL = np.sum(weights * (weights_bar - weights) * lmbda)
    omegaL = np.sum(weights_bar * (weights_bar - weights) * lmbda)
    
    all_idxs = np.where(y * outputs < 1)
    L = np.sum((o[all_idxs] - y[all_idxs]) * 
               (C[all_idxs]*(outputs_bar[all_idxs] - outputs[all_idxs]))) 
    L += omegaL
    R = np.sum((o_bar[all_idxs] - y[all_idxs]) * 
               (C[all_idxs]*(outputs_bar[all_idxs] - outputs[all_idxs]))) 
    R += omegaR
    
    delta_prime = 0.0
    
    np_idxs = np.union1d(neg_idxs, pos_idxs) # negative AND positives indices
    deltas = (1 - y[np_idxs] * outputs[np_idxs])/diff[np_idxs]
    sorted_delta_ids = np.argsort(deltas)

    for sid in sorted_delta_ids:
        ii = np_idxs[sid]  # instance id
        
        delta_prime = L + deltas[sid] * (R - L)
        if delta_prime >= 0:
            break
        d = s[ii] * c[ii] * (outputs_bar[ii] - outputs[ii])
        L += d * (outputs[ii] - y[ii])
        R += d * (outputs_bar[ii] - y[ii])

    return -L/(R - L)

In [54]:
def cgls(x, y, c):
    alpha = 1.0
    x_p = np.diag(1.0/np.sqrt(c))*x
    r = Ridge(alpha=alpha, fit_intercept=False, solver='sparse_cg')
    r.fit(x_p, y)
    return r

Run CGLS:


In [20]:
from sklearn.datasets import load_svmlight_file

In [21]:
%time X, y = sklearn.datasets.load_svmlight_file('qn-s3vm-2014-paper/svmlight.testL.90')


CPU times: user 16.3 ms, sys: 717 µs, total: 17.1 ms
Wall time: 16.5 ms

In [53]:
c = np.ones(X.shape[0])
res = cgls(X, y, c)


(90, 20883) (90,)

In [55]:
res.coef_.shape


Out[55]:
(20883,)

In [62]:
y_prediction = res.predict(X)

In [66]:
y_predicted_labels = np.array(map(lambda k: -1.0 if k < 0 else 1.0, y_prediction))

In [67]:
np.mean(np.abs(y_predicted_labels - y))


Out[67]:
0.0

In [81]:
import pandas as pd


/usr/local/Cellar/python/2.7.6/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /usr/local/Cellar/python/2.7.6/Frameworks/Python.framework/Versions/2.7/lib/python2.7/argparse.pyc, but /usr/local/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

In [114]:
df = pd.read_csv('qn-s3vm-2014-paper/svmlin.testL.examples.90.outputs', header=None, names=['blah'])
df2 = pd.Series(y_predicted_labels)

In [115]:
df = df.blah.map(lambda k: -1.0 if k < 0 else 1.0)

In [116]:
df.shape, df2.shape


Out[116]:
((90,), (90,))

In [118]:
(df - df2).mean()


Out[118]:
0.0

In [73]:
import matplotlib.pyplot as plt

In [74]:
plt.hist(y_prediction)


Out[74]:
(array([ 26.,  35.,   1.,   0.,   0.,   0.,   0.,   0.,   4.,  24.]),
 array([-0.74527391, -0.60895801, -0.47264212, -0.33632622, -0.20001032,
        -0.06369443,  0.07262147,  0.20893737,  0.34525326,  0.48156916,
         0.61788506]),
 <a list of 10 Patch objects>)