In [2]:
import os
import subprocess
import tempfile
import matplotlib.pyplot as plt
import numpy as np
import sklearn.datasets
import sklearn.metrics
import pandas as pd
In [3]:
# Define a convenience function for running shell commands safely.
def run_shell_cmd(cmd, echo=True):
"""
Run a command in a sub-shell, capturing stdout and stderr
to temporary files that are then read.
"""
_, stdout_f = tempfile.mkstemp()
_, stderr_f = tempfile.mkstemp()
print("Running command")
print(cmd)
p = subprocess.Popen(
'{} >{} 2>{}'.format(cmd, stdout_f, stderr_f), shell=True)
p.wait()
with open(stdout_f) as f:
stdout = f.read()
os.remove(stdout_f)
with open(stderr_f) as f:
stderr = f.read()
os.remove(stderr_f)
if echo:
print("stdout:")
print(stdout)
print("stderr:")
print(stderr)
return stdout, stderr
In [4]:
def write_feats_for_vw(X, y, dirname='.'):
"""
Writes vw-formatted features to canonical filename.
Also writes a numpy array of labels.
"""
# Permute data
ind = np.random.permutation(X.shape[0])
X = X[ind]
y = y[ind]
# Write out
filename = '{}/data.txt'.format(dirname)
with open(filename, 'w') as f:
for row, label in zip(X, y):
vals = ' '.join('{}:{}'.format(i, v) for i, v in enumerate(row) if v != 0)
f.write('{:f} |a {}\n'.format(label, vals))
np.save('{}/labels.npy'.format(dirname), y)
In [15]:
def get_madelon():
dirname = 'data/vw_tests/madelon'
try:
os.makedirs(dirname)
except:
pass
X, y = sklearn.datasets.make_classification(n_samples=100, n_classes=2)
assert(len(np.unique(y)) == 2)
y[y==0] = -1.0 # vw expects this
ax = plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.1)
write_feats_for_vw(X, y, dirname)
return dirname
dirname = get_madelon()
In [15]:
def get_circles():
dirname = 'data/vw_tests/circles'
try:
os.makedirs(dirname)
except:
pass
X, y = sklearn.datasets.make_circles(n_samples=10000)
assert(len(np.unique(y)) == 2)
y[y==0] = -1 # vw expects this
ax = plt.scatter(X[:, 0], X[:, 1], c=y, alpha=0.1)
write_feats_for_vw(X, y, dirname)
return dirname
dirname = get_circles()
In [5]:
def get_iris():
dirname = 'data/vw_tests/iris'
try:
os.makedirs(dirname)
except:
pass
data = sklearn.datasets.load_iris()
X, y = data['data'], data['target']
y += 1 # vw expectes labels starting from 1
write_feats_for_vw(X, y, dirname)
return dirname
dirname = get_iris()
In [6]:
def run_vw(dirname, model_name, num_passes=10, train_options='', pred_options='', echo=False):
# Run training and output predictions.
data_filename = '{}/data.txt'.format(dirname)
labels_filename = '{}/labels.npy'.format(dirname)
model_filename = '{}/{}_model.vw'.format(dirname, model_name)
cache_filename = '{}/{}_cache.vw'.format(dirname, model_name)
pred_filename = '{}/{}_pred.txt'.format(dirname, model_name)
raw_pred_filename = '{}/{}_raw.txt'.format(dirname, model_name)
cache_cmd = 'vw -k --cache_file {} -d {} --noop {}'.format(
cache_filename, data_filename, train_options)
stdout, stderr = run_shell_cmd(cache_cmd, False)
train_cmd = 'vw --cache_file {} -f {} --passes {} {}'.format(
cache_filename, model_filename, num_passes, train_options)
stdout, stderr = run_shell_cmd(train_cmd, echo)
pred_cmd = 'vw -t --cache_file {} -i {} -p {} -r {} {}'.format(
cache_filename, model_filename, pred_filename, raw_pred_filename, pred_options)
stdout, stderr = run_shell_cmd(pred_cmd, echo)
# Load predictions.
y_pred = pd.read_csv(pred_filename, sep=' ', header=None)[0].values
print(y_pred[:5])
thresh = 0
y_pred[y_pred <= thresh] = -1
y_pred[y_pred > thresh] = 1
print(y_pred[:5])
# Compute training score.
y_true = np.load(labels_filename)
print(y_true[:5])
score = sklearn.metrics.accuracy_score(y_true, y_pred)
return score
In [6]:
dirname = 'data/vw_tests/flickr'
In [10]:
score = run_vw(dirname, 'hinge', 500, '--loss_function=hinge --holdout_off --oaa=4', '', echo=True)
print(score)
Running command
vw -k --cache_file data/vw_tests/iris/hinge_cache.vw -d data/vw_tests/iris/data.txt --noop --loss_function=hinge --holdout_off --oaa=4
Running command
vw --cache_file data/vw_tests/iris/hinge_cache.vw -f data/vw_tests/iris/hinge_model.vw --passes 500 --loss_function=hinge --holdout_off --oaa=4
stdout:
stderr:
final_regressor = data/vw_tests/iris/hinge_model.vw
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
using cache_file = data/vw_tests/iris/hinge_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.333333 0.333333 3 3.0 1 1 5
0.333333 0.333333 6 6.0 1 1 5
0.545455 0.800000 11 11.0 3 1 5
0.545455 0.545455 22 22.0 3 3 5
0.545455 0.545455 44 44.0 1 1 5
0.448276 0.348837 87 87.0 1 1 5
0.379310 0.310345 174 174.0 3 3 5
0.359195 0.339080 348 348.0 1 1 5
0.346264 0.333333 696 696.0 2 3 5
0.340517 0.334770 1392 1392.0 1 1 5
0.336925 0.333333 2784 2784.0 2 3 5
0.320761 0.304598 5568 5568.0 2 2 5
0.279479 0.238189 11135 11135.0 3 3 5
0.224168 0.168852 22269 22269.0 1 1 5
0.175292 0.126415 44537 44537.0 1 1 5
finished run
number of examples per pass = 150
passes used = 500
weighted example sum = 75000
weighted label sum = 0
average loss = 0.14356
best constant = 0
total feature number = 375000
Running command
vw -t --cache_file data/vw_tests/iris/hinge_cache.vw -i data/vw_tests/iris/hinge_model.vw -p data/vw_tests/iris/hinge_pred.txt -r data/vw_tests/iris/hinge_raw.txt
stdout:
stderr:
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = data/vw_tests/iris/hinge_pred.txt
raw predictions = data/vw_tests/iris/hinge_raw.txt
using cache_file = data/vw_tests/iris/hinge_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 3 3.0 1 1 5
0.166667 0.333333 6 6.0 1 1 5
0.181818 0.200000 11 11.0 3 3 5
0.090909 0.000000 22 22.0 3 3 5
0.113636 0.136364 44 44.0 1 1 5
0.080460 0.046512 87 87.0 1 1 5
finished run
number of examples per pass = 150
passes used = 1
weighted example sum = 150
weighted label sum = 0
average loss = 0.0733333
best constant = -0.00671141
total feature number = 750
[ 1. 2. 1. 3. 1.]
[ 1. 1. 1. 1. 1.]
[1 2 1 2 1]
0.333333333333
In [19]:
score = run_vw(dirname, 'hinge', 1000, '--loss_function=hinge --holdout_period=5', '', echo=True)
print(score)
Running command
vw -k --cache_file data/vw_tests/flickr/hinge_cache.vw -d data/vw_tests/flickr/data.txt --noop --loss_function=hinge --holdout_period=5
Running command
vw --cache_file data/vw_tests/flickr/hinge_cache.vw -f data/vw_tests/flickr/hinge_model.vw --passes 1000 --loss_function=hinge --holdout_period=5
stdout:
stderr:
final_regressor = data/vw_tests/flickr/hinge_model.vw
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
using cache_file = data/vw_tests/flickr/hinge_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
1.052461 1.052461 3 3.0 1.0000 -0.8188 418
0.805416 0.558372 6 6.0 -1.0000 -0.3911 61
0.594434 0.341255 11 11.0 -1.0000 -0.7236 111
0.400629 0.206823 22 22.0 -1.0000 -0.5533 96
0.389175 0.377721 44 44.0 -1.0000 -0.9203 354
0.253984 0.115650 87 87.0 -1.0000 -1.0000 152
0.335743 0.417502 174 174.0 1.0000 1.0000 485
0.566557 0.566557 348 348.0 -1.0000 0.4377 168 h
0.581723 0.596888 696 696.0 -1.0000 -1.0000 269 h
finished run
number of examples = 1276
weighted example sum = 1276
weighted label sum = 4
average loss = 0.396731
best constant = 0.0031348
total feature number = 354644
Running command
vw -t --cache_file data/vw_tests/flickr/hinge_cache.vw -i data/vw_tests/flickr/hinge_model.vw -p data/vw_tests/flickr/hinge_pred.txt -r data/vw_tests/flickr/hinge_raw.txt
stdout:
stderr:
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = data/vw_tests/flickr/hinge_pred.txt
raw predictions = data/vw_tests/flickr/hinge_raw.txt
using cache_file = data/vw_tests/flickr/hinge_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.355880 0.355880 3 3.0 1.0000 1.0000 418
1.199245 2.042610 6 6.0 -1.0000 -0.2253 11
1.042707 0.854861 11 11.0 -1.0000 -0.2910 11
1.389209 1.735711 22 22.0 -1.0000 -0.2710 52
1.362571 1.335933 44 44.0 1.0000 1.0000 442
1.818542 2.285117 87 87.0 -1.0000 1.0000 151
1.765157 1.711771 174 174.0 1.0000 1.0000 319
1.062736 0.360315 348 348.0 1.0000 0.9747 256
finished run
number of examples = 398
weighted example sum = 398
weighted label sum = -2
average loss = 1.00599
best constant = -0.00755668
total feature number = 110459
[-0.258559 -0.280344 1. 0.371763 0.909438]
[-1. -1. 1. 1. 1.]
[-1. -1. 1. -1. -1.]
0.668341708543
In [20]:
score = run_vw(dirname, 'logistic', 100, '--loss_function=logistic --holdout_off', '', echo=True)
print(score)
Running command
vw -k --cache_file data/vw_tests/flickr/logistic_cache.vw -d data/vw_tests/flickr/data.txt --noop --loss_function=logistic --holdout_off
Running command
vw --cache_file data/vw_tests/flickr/logistic_cache.vw -f data/vw_tests/flickr/logistic_model.vw --passes 100 --loss_function=logistic --holdout_off
stdout:
stderr:
final_regressor = data/vw_tests/flickr/logistic_model.vw
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
using cache_file = data/vw_tests/flickr/logistic_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
1.246528 1.246528 3 3.0 1.0000 -2.5911 418
0.842738 0.438949 6 6.0 -1.0000 -0.9312 11
0.631988 0.379088 11 11.0 -1.0000 -0.6482 11
0.455387 0.278785 22 22.0 -1.0000 -0.9857 52
0.577333 0.699278 44 44.0 1.0000 -1.3761 442
0.388873 0.196031 87 87.0 -1.0000 -2.4313 151
0.404501 0.420128 174 174.0 1.0000 -1.1616 319
0.459146 0.513791 348 348.0 1.0000 2.4428 256
0.484589 0.510032 696 696.0 1.0000 1.0270 246
0.458527 0.432464 1392 1392.0 1.0000 -1.1966 16
0.413595 0.368664 2784 2784.0 1.0000 1.1651 379
0.363571 0.313547 5568 5568.0 1.0000 1.5018 293
0.312551 0.261522 11135 11135.0 1.0000 6.0470 365
0.264592 0.216629 22269 22269.0 -1.0000 -0.3852 255
finished run
number of examples = 39800
weighted example sum = 39800
weighted label sum = -200
average loss = 0.22835
best constant = -0.00502513
total feature number = 11045900
Running command
vw -t --cache_file data/vw_tests/flickr/logistic_cache.vw -i data/vw_tests/flickr/logistic_model.vw -p data/vw_tests/flickr/logistic_pred.txt -r data/vw_tests/flickr/logistic_raw.txt
stdout:
stderr:
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = data/vw_tests/flickr/logistic_pred.txt
raw predictions = data/vw_tests/flickr/logistic_raw.txt
using cache_file = data/vw_tests/flickr/logistic_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
45.176078 45.176078 3 3.0 1.0000 12.2020 418
23.421518 1.666958 6 6.0 -1.0000 -2.7346 11
21.151163 18.426736 11 11.0 -1.0000 -2.8035 11
12.208123 3.265083 22 22.0 -1.0000 -3.2797 52
13.213549 14.218975 44 44.0 1.0000 4.0784 442
9.893287 6.495810 87 87.0 -1.0000 -0.9675 151
7.959947 6.026607 174 174.0 1.0000 2.7225 319
12.084162 16.208377 348 348.0 1.0000 1.3980 256
finished run
number of examples = 398
weighted example sum = 398
weighted label sum = -2
average loss = 13.1603
best constant = -0.00755668
total feature number = 110459
[ -2.518554 -3.781759 12.201966 -1.94178 0.051162]
[-1. -1. 1. -1. 1.]
[-1. -1. 1. -1. -1.]
0.947236180905
In [48]:
score = run_vw(dirname, 'logistic', 100, '--loss_function=logistic --holdout_off', '', echo=True)
print(score)
Running command
vw -k --cache_file data/vw_tests/flickr/logistic_cache.vw -d data/vw_tests/flickr/data.txt --noop --loss_function=logistic --holdout_off
Running command
vw --cache_file data/vw_tests/flickr/logistic_cache.vw -f data/vw_tests/flickr/logistic_model.vw --passes 100 --loss_function=logistic --holdout_off
stdout:
stderr:
final_regressor = data/vw_tests/flickr/logistic_model.vw
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
using cache_file = data/vw_tests/flickr/logistic_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
1.246528 1.246528 3 3.0 1.0000 -2.5911 418
0.842738 0.438949 6 6.0 -1.0000 -0.9312 11
0.631988 0.379088 11 11.0 -1.0000 -0.6482 11
0.455387 0.278785 22 22.0 -1.0000 -0.9857 52
0.577333 0.699278 44 44.0 1.0000 -1.3761 442
0.388873 0.196031 87 87.0 -1.0000 -2.4313 151
0.404501 0.420128 174 174.0 1.0000 -1.1616 319
0.459146 0.513791 348 348.0 1.0000 2.4428 256
0.484589 0.510032 696 696.0 1.0000 1.0270 246
0.458527 0.432464 1392 1392.0 1.0000 -1.1966 16
0.413595 0.368664 2784 2784.0 1.0000 1.1651 379
0.363571 0.313547 5568 5568.0 1.0000 1.5018 293
0.312551 0.261522 11135 11135.0 1.0000 6.0470 365
0.264592 0.216629 22269 22269.0 -1.0000 -0.3852 255
finished run
number of examples = 39800
weighted example sum = 39800
weighted label sum = -200
average loss = 0.22835
best constant = -0.00502513
total feature number = 11045900
Running command
vw -t --cache_file data/vw_tests/flickr/logistic_cache.vw -i data/vw_tests/flickr/logistic_model.vw -p data/vw_tests/flickr/logistic_pred.txt -r data/vw_tests/flickr/logistic_raw.txt
stdout:
stderr:
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = data/vw_tests/flickr/logistic_pred.txt
raw predictions = data/vw_tests/flickr/logistic_raw.txt
using cache_file = data/vw_tests/flickr/logistic_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
45.176078 45.176078 3 3.0 1.0000 12.2020 418
23.421518 1.666958 6 6.0 -1.0000 -2.7346 11
21.151163 18.426736 11 11.0 -1.0000 -2.8035 11
12.208123 3.265083 22 22.0 -1.0000 -3.2797 52
13.213549 14.218975 44 44.0 1.0000 4.0784 442
9.893287 6.495810 87 87.0 -1.0000 -0.9675 151
7.959947 6.026607 174 174.0 1.0000 2.7225 319
12.084162 16.208377 348 348.0 1.0000 1.3980 256
finished run
number of examples = 398
weighted example sum = 398
weighted label sum = -2
average loss = 13.1603
best constant = -0.00755668
total feature number = 110459
[ -2.51855400e+00 -3.78175900e+00 1.22019660e+01 -1.94178000e+00
5.11620000e-02 -2.73464200e+00 -4.01475000e+00 -9.51287400e+00
7.34605000e-01 -3.07713600e+00 -2.80349700e+00 -1.48164000e+00
-4.86660800e+00 -2.63144300e+00 -1.28053900e+00 5.95411000e-01
-5.87086000e-01 -2.80374100e+00 -1.49373500e+00 -2.26862400e+00
-3.23016000e+00 -3.27966500e+00 -4.01208000e+00 -3.70278700e+00
-5.09172000e-01 -9.85837000e-01 -1.33993300e+01 -3.33871900e+00
-3.05157700e+00 2.72435300e+00 -2.54370600e+00 -4.02353500e+00
-2.94173700e+00 -5.72616100e+00 -2.93107200e+00 4.22650000e-02
-5.63145700e+00 -3.14603100e+00 4.20807700e+00 -3.09277000e+00
-5.81248900e+00 -3.14173200e+00 -4.06678100e+00 4.07842500e+00
-3.70101000e-01 -1.80410500e+00 -2.78201800e+00 -2.41530900e+00
-1.52486200e+00 -5.19325800e+00 -1.87506600e+00 -2.79377900e+00
-2.29983000e+00 -4.75431000e+00 -3.51300000e+00 -2.34522700e+00
-8.02377000e-01 -1.96015900e+00 -5.51950000e-01 -4.52902000e-01
-3.41421400e+00 -3.20288000e-01 -1.48269600e+00 -7.20505000e-01
-7.42886000e-01 -7.34885000e-01 -5.52003700e+00 -8.14064000e-01
-2.58527600e+00 -2.96427800e+00 -1.68971000e+00 -6.65260300e+00
-1.50511300e+00 -2.11229700e+00 -1.25212800e+00 -2.19097500e+00
-1.33558000e-01 -6.63473000e+00 -1.08971720e+01 3.04870000e-02
-2.92379900e+00 -5.08395800e+00 -1.85110400e+00 -9.70782000e-01
7.51860000e-02 -2.76909800e+00 -9.67501000e-01 -2.51104200e+00
-3.20790700e+00 -3.46788200e+00 -1.78427100e+00 -3.29940800e+00
-7.83475900e+00 -4.01439000e+00 -7.48307000e-01 -2.15584000e+00
-3.75770900e+00 3.31350000e-01 -2.25294600e+00 4.02147200e+00
-7.55309000e-01 -3.80091700e+00 -1.53459300e+00 -4.36664200e+00
-1.65789200e+00 -4.47096200e+00 -5.85218400e+00 -1.70041400e+00
-4.96328400e+00 -2.73492200e+00 -2.22184500e+00 -6.26201000e-01
-1.75456000e+00 -4.91502200e+00 -2.97952700e+00 -3.20739600e+00
-2.59144900e+00 -3.52786100e+00 -2.84882000e-01 -3.24134800e+00
-1.33460100e+00 -4.14200500e+00 -3.65651100e+00 -4.60784900e+00
-5.15369400e+00 -2.48885400e+00 -7.87555000e-01 -4.40442900e+00
-5.92307300e+00 -4.01025800e+00 -2.37525900e+00 -2.12723700e+00
-2.33747100e+00 -2.12111800e+00 -3.48600000e-02 -2.80616900e+00
-2.08105800e+00 -5.17066100e+00 -3.63314000e+00 -3.05989000e+00
-3.44344200e+00 -2.81323100e+00 6.62864000e-01 -4.06475700e+00
-3.16520500e+00 -1.59631600e+00 -3.43463500e+00 -3.18372900e+00
6.52572000e-01 -1.09355400e+00 1.04224800e+00 4.86275100e+00
-1.63479400e+00 -1.13394900e+00 -2.66917900e+00 -1.94087700e+00
-5.55880300e+00 -5.24750000e-01 -2.72240000e+00 2.11988200e+00
-3.35861600e+00 -2.26086600e+00 2.98038500e+00 1.75873400e+00
-4.71143800e+00 -4.02199500e+00 6.20959700e+00 3.07147700e+00
-4.44815000e-01 1.07831800e+00 4.51899100e+00 2.28111200e+00
-4.21171500e+00 2.72247100e+00 7.19737700e+00 8.34130000e-01
6.94841400e+00 2.84644800e+00 2.13323100e+00 1.08290000e-02
6.93709000e-01 6.55907400e+00 2.93778200e+00 1.16054500e+00
3.90002200e+00 -4.45221000e-01 1.04230300e+00 5.20497600e+00
2.02499600e+00 -5.13956300e+00 2.35738800e+00 1.98525000e+00
-7.04792100e+00 6.99128200e+00 7.22422200e+00 3.32094500e+00
4.78728300e+00 -5.93751000e-01 -3.30827200e+00 1.96473200e+00
2.47834000e+00 3.96069600e+00 8.77545800e+00 5.18985000e+00
-3.37205900e+00 3.91791900e+00 -2.93877800e+00 5.59420000e+00
2.69771100e+00 5.72021400e+00 1.59471500e+00 5.16434600e+00
3.52453800e+00 2.02000800e+00 5.85173000e-01 7.25039100e+00
9.43516200e+00 3.34336900e+00 -4.41733200e+00 4.58973800e+00
3.58495100e+00 1.13165000e+00 1.43578440e+01 8.30888000e-01
2.34199800e+00 8.38978000e-01 3.50114400e+00 7.78279600e+00
7.29769000e-01 7.92534000e-01 1.76874300e+00 -1.29284100e+00
6.32009600e+00 3.87559300e+00 8.91497100e+00 5.37897900e+00
5.95401700e+00 3.77212700e+00 9.08034600e+00 1.23041780e+01
4.66646200e+00 1.18696900e+00 3.77212700e+00 2.89285100e+00
1.13951500e+00 -1.10840000e+00 1.47154500e+00 -5.94287800e+00
1.63842300e+00 3.95430600e+00 -2.29571100e+00 2.74175300e+00
1.89557800e+00 2.43638100e+00 -1.32911640e+01 7.45052000e-01
-4.87852100e+00 1.30599800e+00 7.57747000e-01 1.37183800e+00
4.04641200e+00 8.27168100e+00 5.73508400e+00 -4.94552000e-01
4.09761800e+00 -9.02349800e+00 1.86386800e+01 3.65104500e+00
-4.88993300e+00 -5.62873000e-01 2.44083000e-01 5.35716400e+00
-9.85840000e-01 2.68785000e+00 2.54069100e+00 5.43691300e+00
2.03051400e+00 1.91208900e+00 6.69488000e-01 -2.51085200e+00
3.74900700e+00 1.05771060e+01 -3.18274000e+00 2.68384900e+00
-1.01775100e+00 3.16224100e+00 9.38036300e+00 -1.23539100e+00
1.03488730e+01 1.40353200e+00 1.60381300e+00 1.66132400e+00
-1.08616900e+00 3.33514500e+00 -3.22334000e+00 4.88928100e+00
3.92546900e+00 1.72730000e-02 -1.59701800e+00 3.18640900e+00
3.84126900e+00 1.04096330e+01 4.40502700e+00 -4.84881800e+00
7.18222600e+00 2.34088400e+00 7.66785500e+00 2.89566000e+00
8.64717200e+00 4.13705400e+00 -1.20637300e+00 6.00842600e+00
1.29597800e+00 1.23332400e+00 7.51951600e+00 2.62488600e+00
3.10529300e+00 -4.88648100e+00 -3.04787000e+00 2.03608700e+00
4.08098900e+00 8.90782000e-01 1.97531500e+00 -4.54009100e+00
-9.70353000e-01 1.75525000e+00 5.36482500e+00 5.40152500e+00
2.53384200e+00 4.24520000e+00 3.91373600e+00 3.59697000e-01
3.00091000e+00 2.77524900e+00 4.04981100e+00 1.19451900e+00
2.06104900e+00 1.00956800e+00 1.23335200e+00 3.30077800e+00
2.25026800e+00 2.88744100e+00 2.11684800e+00 1.90917700e+00
2.43557200e+00 3.07218200e+00 -3.78649200e+00 1.39797000e+00
-1.19839100e+00 5.13623500e+00 4.46973600e+00 2.17082600e+00
4.24876000e-01 8.84846000e-01 -2.90572600e+00 -1.03536000e-01
3.04788700e+00 7.21542300e+00 2.47457600e+00 -7.16525700e+00
1.27191140e+01 4.86878100e+00 2.28708230e+01 5.09937000e+00
-3.99557000e-01 2.58844500e+00 -3.40101100e+00 4.05445100e+00
3.76541300e+00 1.90860000e-01 2.15038000e-01 4.64901500e+00
-9.99218000e-01 -2.50321000e-01 2.27594800e+00 2.37514400e+00
5.00468800e+00 -1.90256100e+00 -8.96394000e-01 4.08240100e+00
2.11195900e+00 3.98310500e+00 1.20526400e+00 3.71525500e+00
3.19207000e+00 1.34354900e+00 1.02131270e+01 -1.43907000e-01
8.08001800e+00 1.77489700e+00 2.87471700e+00 -4.41419500e+00
-1.89663900e+00 1.66882500e+00 -1.85477200e+00 1.36128100e+00
2.80758600e+00 -3.42796600e+00]
[-1. -1. 1. -1. 1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. 1.
-1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1.
-1. -1. -1. -1. 1. -1. 1. 1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
1. 1. -1. -1. 1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. -1. 1. 1. 1. -1. 1. 1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. 1.
1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. -1.
1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1. -1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. 1. -1. -1. 1. -1. 1.
1. -1.]
[-1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1.
-1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. 1. 1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
1. 1. -1. -1. 1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. -1. 1. 1. 1. -1. 1. 1. -1. 1. 1. 1. 1. 1.
-1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. -1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1.
1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1.
-1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1. -1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. -1. 1. -1. 1.
1. -1.]
0.947236180905
In [49]:
score = run_vw(dirname, 'logistic_binary', 100, '--loss_function=logistic --binary --holdout_off', '', echo=True)
print(score)
Running command
vw -k --cache_file data/vw_tests/flickr/logistic_binary_cache.vw -d data/vw_tests/flickr/data.txt --noop --loss_function=logistic --binary --holdout_off
Running command
vw --cache_file data/vw_tests/flickr/logistic_binary_cache.vw -f data/vw_tests/flickr/logistic_binary_model.vw --passes 100 --loss_function=logistic --binary --holdout_off
stdout:
stderr:
final_regressor = data/vw_tests/flickr/logistic_binary_model.vw
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
decay_learning_rate = 1
using cache_file = data/vw_tests/flickr/logistic_binary_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.333333 0.333333 3 3.0 1 -1 418
0.166667 0.000000 6 6.0 -1 -1 11
0.090909 0.000000 11 11.0 -1 -1 11
0.045455 0.000000 22 22.0 -1 -1 52
0.113636 0.181818 44 44.0 1 -1 442
0.068966 0.023256 87 87.0 -1 -1 151
0.103448 0.137931 174 174.0 1 -1 319
0.183908 0.264368 348 348.0 1 1 256
0.222701 0.261494 696 696.0 1 1 246
0.204741 0.186782 1392 1392.0 1 -1 16
0.179598 0.154454 2784 2784.0 1 1 379
0.148707 0.117816 5568 5568.0 1 1 293
0.119084 0.089456 11135 11135.0 1 1 365
0.093403 0.067720 22269 22269.0 -1 -1 255
finished run
number of examples = 39800
weighted example sum = 39800
weighted label sum = 0
average loss = 0.0765327
best constant = 0
total feature number = 11045900
Running command
vw -t --cache_file data/vw_tests/flickr/logistic_binary_cache.vw -i data/vw_tests/flickr/logistic_binary_model.vw -p data/vw_tests/flickr/logistic_binary_pred.txt -r data/vw_tests/flickr/logistic_binary_raw.txt
stdout:
stderr:
only testing
Num weight bits = 18
learning rate = 10
initial_t = 1
power_t = 0.5
predictions = data/vw_tests/flickr/logistic_binary_pred.txt
raw predictions = data/vw_tests/flickr/logistic_binary_raw.txt
using cache_file = data/vw_tests/flickr/logistic_binary_cache.vw
ignoring text input in favor of cache input
num sources = 1
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 3 3.0 1 1 418
0.166667 0.333333 6 6.0 -1 -1 11
0.181818 0.200000 11 11.0 -1 -1 11
0.136364 0.090909 22 22.0 -1 -1 52
0.090909 0.045455 44 44.0 1 1 442
0.068966 0.046512 87 87.0 -1 -1 151
0.051724 0.034483 174 174.0 1 1 319
0.048851 0.045977 348 348.0 1 1 256
finished run
number of examples = 398
weighted example sum = 398
weighted label sum = 0
average loss = 0.0527638
best constant = -0.00251889
total feature number = 110459
[-1. -1. 1. -1. 1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. 1.
-1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1.
-1. -1. -1. -1. 1. -1. 1. 1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
1. 1. -1. -1. 1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. -1. 1. 1. 1. -1. 1. 1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. 1.
1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. -1.
1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1. -1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. 1. -1. -1. 1. -1. 1.
1. -1.]
[-1. -1. 1. -1. 1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. 1.
-1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. 1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1.
-1. -1. -1. -1. 1. -1. 1. 1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
1. 1. -1. -1. 1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. -1. 1. 1. 1. -1. 1. 1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. 1.
1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. -1. -1.
1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1. -1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. 1. -1. -1. 1. -1. 1.
1. -1.]
[-1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1.
-1. -1. 1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
-1. -1. -1. -1. -1. -1. 1. 1. -1. -1. -1. -1. -1. -1. -1. 1. -1. -1.
1. 1. -1. -1. 1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. 1. -1. 1. 1. 1. -1. 1. 1. -1. 1. 1. 1. 1. 1.
-1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. -1. 1. 1. -1. 1. 1. 1. 1. 1. 1. 1. -1. 1. -1. 1. 1.
1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1.
1. 1. -1. 1. -1. 1. 1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1.
-1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. -1. 1. -1. 1. 1. -1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. -1. 1. 1. 1. 1. -1. 1. 1.
1. 1. 1. 1. -1. 1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1. -1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
1. 1. 1. 1. -1. 1. -1. 1. 1. 1. 1. 1. -1. -1. 1. 1. 1. -1.
1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1. -1. 1. 1. 1. 1. 1.
-1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. -1. -1. 1. -1. 1.
1. -1.]
0.947236180905
In [ ]:
Content source: Jai-Chaudhary/vislab
Similar notebooks: