In [32]:
df = dataset['val_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)
In [33]:
df = dataset['test_df']
yt = (df['label'] > 0).astype(int)
yt_pred = np.zeros_like(yt).astype(float)
#yt_pred = np.random.rand(len(yt))
vislab.results.get_pr_curve(yt, yt_pred)
import sklearn.metrics
print sklearn.metrics.average_precision_score(yt, yt_pred)
print sklearn.metrics.accuracy_score(yt, yt_pred)
In [1]:
%load_ext autoreload
%autoreload 2
import vislab.vw3
import vislab.datasets
import vislab.predict
feat_dirname = '/Users/sergeyk/work/aphrodite/data/feats/ava_style'
feat_names = ['lab_hist']
label_df = vislab.datasets.ava.get_style_df()
all_styles = [x for x in label_df.columns if not x.startswith('_')]
dataset = vislab.predict.get_multiclass_dataset(
label_df, 'ava_style', 'all_styles', all_styles,
.2, False, 42)
vw = vislab.vw3.VW('/Users/sergeyk/work/vislab/_temp' + '/vw_ava_style_all_styles')
pred_df, test_score, val_score, train_score = vw.fit_and_predict(
dataset, feat_names, feat_dirname, force=True)
print test_score, val_score, train_score
In [7]:
import vislab.results
metrics = vislab.results.multiclass_metrics(pred_df[pred_df['split'] == 'test'], 'pred')
metrics['binary_metrics_df']['ap'].plot(kind='bar')
Out[7]:
In [100]:
pdf = pred_df[pred_df['split'] == 'test']
pdf = pdf.join(dataset['test_df'])
_ = vislab.results.binary_metrics(pdf, balanced=False, with_print=True, with_plot=True)
In [83]:
import pandas as pd
feat_df = pd.read_hdf(
'/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
'df')
feat_df
Out[83]:
In [14]:
X = np.array([row[0] for row in feat_df.values])
X[X < 1e-4] = 0
feat_df = pd.DataFrame([(row,) for row in X], feat_df.index)
In [23]:
feat_df.to_hdf(
'/Users/sergeyk/work/aphrodite/data/feats/ava_style/lab_hist.h5',
'df', mode='w')
In [84]:
feat = feat_df.values[80][0]
print np.unique(feat)[:10]
matshow(feat.reshape(49, 16).T)
Out[84]:
In [85]:
import vislab.datasets
import vislab.predict
label_df = vislab.datasets.ava.get_style_df()
dataset = vislab.predict.get_binary_or_regression_dataset(
label_df, 'ava_style', 'style_Complementary_Colors',
.2, -1, 42)
print(dataset['train_df'].shape)
print(dataset['train_df']['label'].iloc[:3])
In [86]:
feat_df_train = feat_df.ix[dataset['train_df'].index].dropna()
feat_df_test = feat_df.ix[dataset['test_df'].index].dropna()
In [87]:
X = np.array([row[0] for row in feat_df_train.values])
X[X < 1e-4] = 0
y = dataset['train_df']['label'].ix[feat_df_train.index].values > 0
Xt = np.vstack([row[0] for row in feat_df_test.values])
Xt[Xt < 1e-4] = 0
yt = dataset['test_df']['label'].ix[feat_df_test.index].values > 0
for arr in [X, y, Xt, yt]:
print arr.shape
In [89]:
import sklearn.linear_model
import sklearn.grid_search
clf = sklearn.linear_model.SGDClassifier(loss='log', n_iter=20, shuffle=True)
grid_clf = sklearn.grid_search.GridSearchCV(
clf,
{'alpha': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]},
n_jobs=1
)
grid_clf.fit(X, y)
print grid_clf.best_params_
In [90]:
import vislab.results
import sklearn.metrics
yt_pred = grid_clf.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred[:, 1])
print sklearn.metrics.average_precision_score(yt, yt_pred[:, 1])
In [47]:
vw = VW('temp_vw_ava_style')
vw.fit(X, y)
Out[47]:
In [44]:
yt_pred = vw.predict_proba(Xt)
vislab.results.get_pr_curve(yt, yt_pred)
print sklearn.metrics.average_precision_score(yt, yt_pred)
In [45]:
plt.hist(yt_pred, bins=50)
Out[45]: