In [1]:
%matplotlib inline
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from scipy import stats
from matplotlib.legend_handler import HandlerPathCollection
from matplotlib.collections import PathCollection
from matplotlib import rc_context
from bubbly.util import roc_curve
import matplotlib
matplotlib.rcParams['font.size'] = 10
def white_bg():
return rc_context({'axes.facecolor': 'white'})
figures = {}
In [2]:
votes = pd.DataFrame(json.load(open('../data/expert_votes.json')))
votes = votes.join(pd.get_dummies(votes.vote, prefix='v'))
def mode(x):
return stats.mode(x.values)[0][0]
def plurality(x):
return stats.mode(x.values)[1][0] / x.size
def zscore(x):
return (x - x.mean()) / x.std()
meanvote = votes.groupby(['image_id']).vote.agg({'mean_vote':np.mean,
"std_vote": np.std,
'mode': mode,
'plurality': plurality})
fractions = votes.groupby(['image_id'])['v_1', 'v_2', 'v_3'].mean()
meanvote = pd.merge(meanvote, fractions, left_index=True, right_index=True)
meanvote = pd.merge(meanvote, votes[['hitrate', 'lon', 'lat', 'mlconf', 'image_id']],
right_on='image_id', left_index=True, how='left')
meanvote = meanvote.drop_duplicates(cols='image_id')
meanvote['joint'] = zscore(meanvote.mlconf) + zscore(meanvote.hitrate)
brut = meanvote[meanvote.hitrate.isnull()]
mwp = meanvote[~meanvote.hitrate.isnull()]
In [3]:
print mwp.hitrate.mean(), mwp.hitrate.std()
print mwp.mlconf.mean(), mwp.mlconf.std()
In [4]:
def rank(x):
ind = np.argsort(x)
result = np.empty(x.size)
result[ind] = np.arange(x.size)
return result
def colorplot(df, x, y, hide_right=False, **kwargs):
colors = {1: '#E31A1C', 2: '#FF7F00', 3: '#1F78B4'}
labels = {1: "Non-bubble", 2: "Ambiguous/\nIrregular", 3:"Bubble"}
for i in [3, 2, 1]:
sub = df[df.mode == i]
kwargs.setdefault('s', 70)
plt.scatter(sub[x], sub[y], facecolors='none', lw=2, edgecolors=colors[i], label=labels[i], **kwargs)
clf = LogisticRegression(C=1e12)
x = df[x].values.reshape(-1, 1)
y = (df.mode == 3).values
clf.fit(x, y)
prob = clf.predict_log_proba(x)
like = prob[y==0, 0].sum() + prob[y==1, 1].sum()
xlo, xhi = x.min(), x.max()
x = np.linspace(x.min() - .5 * x.ptp(), x.max() + .5 * x.ptp(), 100).reshape(-1, 1)
y = clf.predict_proba(x)[:, 1]
#plot
ax1 = plt.gca()
ax2 = ax1.twinx()
plt.sca(ax2)
plt.yticks([0, .25, .5, .75, 1])
if hide_right:
ax2.set_yticklabels([' ', ' ', ' ', ' ', ' '])
else:
ax2.set_ylabel("P(Bubble | Score)")
plt.ylim(-.05, 1.05)
plt.grid(False)
plt.plot(x, y, color='#333333', lw=.8, label="P(Bubble | Score)")
plt.title("Log-Likelihood: %0.1f" % like)
plt.sca(ax1)
plt.xlim(xlo - .1 * (xhi - xlo), xhi + .1 * (xhi - xlo))
return clf
def bubble_legend(**kwargs):
h, l = [], []
for ax in plt.gcf().axes:
for hl in zip(*ax.get_legend_handles_labels()):
if len(hl) == 2 and hl[1] not in l:
h.append(hl[0])
l.append(hl[1])
kwargs.setdefault('bbox_to_anchor', (1, 0, 1, 1))
hmap = {PathCollection: HandlerPathCollection(numpoints=1)}
with white_bg():
return plt.legend(h, l, handler_map=hmap, loc='upper left', **kwargs)
In [5]:
def stepplot(binx, bini, values, **kwargs):
assert binx.size == values.size + 1
ind = np.argsort(bini)
x = np.hstack((binx[bini[ind]], binx.max()))
y = np.hstack((values[ind], values[ind][-1]))
plt.plot(x, y, drawstyle='steps-post', **kwargs)
In [6]:
fig = plt.figure(tight_layout=True, figsize=(7, 4.5))
axs = plt.subplot(231), plt.subplot(232), plt.subplot(234), plt.subplot(235)
plt.sca(axs[0])
colorplot(mwp, 'hitrate', 'plurality', hide_right=True, s=30)
plt.xlabel("Hit Rate")
plt.ylabel("Expert Consensus")
plt.annotate('a', (.5, .4), fontsize=20)
plt.yticks([.4, .7, 1])
plt.sca(axs[1])
colorplot(mwp, 'mlconf', 'plurality', s=30)
plt.xlabel("Brut Score")
plt.yticks([.4, .7, 1])
labels = ['' for _ in axs[1].get_yticklabels()]
axs[1].set_yticklabels(labels)
plt.annotate('b', (.8, .4), fontsize=20)
plt.sca(axs[2])
clf = colorplot(mwp, 'joint', 'plurality', hide_right=True, s=30)
plt.xlabel("Joint")
plt.ylabel("Expert Consensus")
plt.yticks([.4, .7, 1])
plt.annotate('c', (4, .4), fontsize=20)
print "Joint regression coef: %f\t intercept: %f" % (clf.coef_, clf.intercept_)
plt.sca(axs[3])
dummy = mwp.copy()
dummy.mode = np.random.randint(0, 2, dummy.shape[0]) * 2 + 1
dummy['test'] = ((dummy.mode == 3).astype(np.int) - 0.5) + np.random.uniform(-.3, .3, dummy.shape[0])
dummy['plurality'] = 1
colorplot(dummy, 'test', 'plurality', s=30)
plt.ylim(.3, 1.1)
plt.yticks([.4, .7, 1])
labels = ['' for _ in axs[1].get_yticklabels()]
axs[3].set_yticklabels(labels)
plt.xlabel("Ideal")
plt.annotate('d', (.6, .4), fontsize=20)
#plt.sca(axs[3])
l = bubble_legend(title="MWP Sample",
bbox_to_anchor=(1.4, 0, 1, 1), fontsize=11, frameon=False)
figures['mwp_score'] = fig
Note that the classifications are somewhat independent, so that adding both metrics improves the ROC curve by 5-10%
In [7]:
is_bubble = mwp.mode == 3
fig = plt.figure(tight_layout=True)
weight = mwp.mlconf.std() / mwp.hitrate.std()
roc_curve(is_bubble, mwp.hitrate, lw=4, label='Hit Rate')
roc_curve(is_bubble, mwp.mlconf, lw=4, label='Brut Score')
roc_curve(is_bubble, mwp.joint, lw=4, label='Joint')
plt.ylim(.4, 1.05)
with white_bg():
plt.legend(title="MWP Sample ROC", loc='lower right')
figures['roc'] = fig
In [8]:
fig = plt.figure(figsize=(7, 4))
plt.subplot(121)
colorplot(brut, 'mlconf', 'plurality')
plt.ylabel("Expert Consensus")
plt.yticks([.4, .7, 1])
plt.xlabel("Brut Score")
plt.ylim(.3, 1.05)
bubble_legend(title='Uniform Sample', bbox_to_anchor=(1.25, 0, .75, 1), frameon=False)
figures['uniform_score'] = fig
In [9]:
from sklearn.cross_validation import train_test_split
f = plt.figure()
def calibration_plot(df):
clf = LogisticRegression(C=1e12)
x = (df.mlconf).values.reshape(-1, 1)
y = (df.mode == 3).values.astype(np.int)
xr, xs, yr, ys = train_test_split(x, y)
clf.fit(xr, yr)
def simulate(clf, x):
yp = clf.predict_proba(x)[:, 1]
return (np.random.uniform(size=yp.size) < yp).astype(np.int)
scores = [clf.score(xs, simulate(clf, xs)) for _ in range(3000)]
plt.hist(scores, bins=np.linspace(.3, 1, 10), label="Realizations of \nLogistic Model")
plt.axvline(clf.score(xs, ys), label="Data", color='k')
with white_bg():
plt.legend(loc='upper left')
calibration_plot(mwp)
plt.title("Milky Way Sample")
plt.xlabel("Classification Score")
plt.show()
calibration_plot(brut)
plt.title("Uniform Sample")
plt.xlabel("Classification Score")
plt.show()
In [11]:
from matplotlib import rcParams as rc
rc['axes.color_cycle']
Out[11]:
In [12]:
minority = dict(red='#FB9A99', orange='#FDBF6F', blue='#A6CEE3')
majority = dict(red='#E31A1C', orange='#FF7F00', blue='#1F78B4')
def vote_plot(df, loc='lower right', figsize=(3,6), labels=False):
fig = plt.figure(figsize=figsize, dpi=100)
df = df.sort(columns=['v_1', 'v_2'], ascending=True)
x = np.arange(df.shape[0])
rows = range(df.shape[0])
c = [majority['blue'] for i in rows]
plt.barh(x, width=df.v_3, left=df.v_1 + df.v_2, color=c,
edgecolor='none', label='Bubble')
c = [majority['orange'] for i in rows]
plt.barh(x, df.v_2, left=df.v_1, color=c, ec='none', label="Ambiguous")
c = [majority['red'] for i in rows]
plt.barh(x, df.v_1, color=c, ec='none', label="Non-bubble")
plt.ylabel("Object")
plt.xlabel("Fraction of votes")
#make a legend. Manually set colors
b = plt.Rectangle((0,0), 1, 1, facecolor=majority['blue'], edgecolor='none')
i = plt.Rectangle((0,0), 1, 1, facecolor=majority['orange'], edgecolor='none')
n = plt.Rectangle((0,0), 1, 1, facecolor=majority['red'], edgecolor='none')
leg = plt.legend([b, i, n], ['Bubble', 'Irregular/\nAmbiguous', 'Non-Bubble'], loc=loc,
fancybox=True)
leg.get_frame().set_edgecolor('none')
#for c, t in zip([majority['blue'], majority['orange'], majority['red']],
# leg.get_texts()):
# t.set_color(c)
plt.grid(False)
if labels:
plt.yticks(np.arange(df.shape[0]), df.image_id.values)
else:
plt.yticks([])
plt.xticks([.25, .5, .75])
plt.ylim(0, df.shape[0])
plt.xlim(0, 1)
ax = plt.gca()
for spine in ['top', 'bottom', 'left', 'right']:
ax.spines[spine].set_visible(False)
ax.yaxis.set_ticks_position('none')
ax.xaxis.set_ticks_position('none')
ax.xaxis.tick_bottom()
#plt.grid(axis='x', color='white')
In [13]:
12. / 45.
Out[13]:
In [14]:
(mwp.mode != 3).sum(), (mwp.mode != 3).mean(), mwp.mode.size
Out[14]:
In [15]:
plt.figure()
vote_plot(mwp, loc='lower right')
plt.title("MWP Sample")
figures['mwp_votes'] = plt.gcf()
In [16]:
vote_plot(brut, loc='upper left')
plt.title("Uniform Sample")
figures['uniform_votes'] = plt.gcf()
In [17]:
from IPython.display import Image, display
def show(df, labels):
f = plt.Figure()
with white_bg():
vote_plot(df.set_index(df.image_id).ix[labels], labels=True, figsize=(3, 2))
plt.title("MWP Sample")
plt.show()
for lbl in labels:
print lbl
display(Image(url='http://wino.herokuapp.com/static/%s.png' % lbl, width=400))
show(mwp, ['m_344426_42_29_-42',
'm_330289_657_77_-42',
'm_44282_206_43_-64',
'm_333689_-444_28_-2',
'm_336268_286_46_-41'])
In [18]:
labels = ['b_44169_91_27_-5',
'b_37886_-353_66_35']
show(brut, labels)
In [19]:
def summarize_consensus(df):
print "Fraction of consensus > .5 : %0.2f" % (df.plurality > 0.5).mean()
print "Fraction of consensus > .75: %0.2f" % (df.plurality > .75).mean()
print "Average consensus: %0.2f" % (df.plurality.mean())
print "Fraction of mode !=3: %0.2f" % (df.mode != 3).mean()
print "MWP Summary"
summarize_consensus(mwp)
print
print "Brut Summary"
summarize_consensus(brut)