In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy
from scipy import stats

In [2]:
import comment_orderings

In [3]:
simulation = comment_orderings.Simulation(
    visitors_per_h = 10,
    prob_comment    = 0.1,
    downvote_prob_leave = 0.5,
    novote_prob_leave = 0.15,
    scorer = comment_orderings.scores.Random(),
)

In [4]:
simulation.scorer = comment_orderings.scores.Perfect()
perfect_vals = [ simulation.run() for _ in range(1000) ]

In [5]:
print(numpy.mean(perfect_vals))


0.9685833333333332

In [6]:
plt.hist(perfect_vals, density=True, bins=30)

x = numpy.linspace(0, max(perfect_vals), 100)
params = stats.gamma.fit(perfect_vals)
pdf_fitted = stats.gamma.pdf(x, *params)
plt.plot(x, pdf_fitted, color='r');



In [36]:
# a, loc, scale
# on Wikipedia, a=k, scale=theta
param


Out[36]:
(4.248022409777739, 0.2001932330273756, 0.18111690594076196)

In [4]:
simulation.scorer = comment_orderings.scores.Ratio()
ratio_vals = [ simulation.run() for _ in range(1000) ]

In [5]:
print(numpy.mean(ratio_vals))
plt.hist(ratio_vals, density=True, bins=30, range=(0,3));


0.8126416666666668

In [7]:
simulation.scorer = comment_orderings.scores.Difference()
diff_vals = [ simulation.run() for _ in range(1000) ]

In [8]:
print(numpy.mean(diff_vals))
plt.hist(diff_vals, density=True, bins=30);


0.856875

In [9]:
simulation.scorer = comment_orderings.scores.BayesAvg()
bayes_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(bayes_vals))
plt.hist(bayes_vals, density=True, bins=30);


0.8667083333333333

In [10]:
simulation.scorer = comment_orderings.scores.Reddit()
reddit_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(reddit_vals))
plt.hist(reddit_vals, density=True, bins=30);


0.8463208333333333

In [11]:
simulation.scorer = comment_orderings.scores.YouTube()
yt_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(yt_vals))
plt.hist(yt_vals, density=True, bins=30);


0.6507083333333332

In [12]:
simulation.scorer = comment_orderings.scores.HN()
hn_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(hn_vals))
plt.hist(hn_vals, density=True, bins=30, range=(0,3));


0.8978375

In [25]:
simulation.scorer = comment_orderings.scores.HN()
simulation.run()
simulation.comments.sort(key=lambda c: c.score, reverse=True)
simulation.comments


Out[25]:
[Comment(created_at=16.067, upvote_prob=0.671, downvote_prob=0.324, num_up=47, num_down=5, score=0.6577170085782645),
 Comment(created_at=9.741, upvote_prob=0.671, downvote_prob=0.076, num_up=82, num_down=3, score=0.5153777993908693),
 Comment(created_at=2.008, upvote_prob=0.496, downvote_prob=0.14, num_up=110, num_down=10, score=0.32472489096363205),
 Comment(created_at=0.603, upvote_prob=0.434, downvote_prob=0.051, num_up=72, num_down=12, score=0.17468019075199578),
 Comment(created_at=15.063, upvote_prob=0.162, downvote_prob=0.03, num_up=8, num_down=0, score=0.09442375303200196),
 Comment(created_at=9.841, upvote_prob=0.112, downvote_prob=0.054, num_up=12, num_down=3, score=0.0601307254899444),
 Comment(created_at=13.054, upvote_prob=0.184, downvote_prob=0.058, num_up=6, num_down=0, score=0.059745770398012044),
 Comment(created_at=18.879, upvote_prob=0.151, downvote_prob=0.008, num_up=2, num_down=0, score=0.05840583285740951),
 Comment(created_at=11.046, upvote_prob=0.226, downvote_prob=0.049, num_up=6, num_down=0, score=0.046088011758903355),
 Comment(created_at=8.937, upvote_prob=0.114, downvote_prob=0.061, num_up=10, num_down=6, score=0.024230383503174892),
 Comment(created_at=16.669, upvote_prob=0.021, downvote_prob=0.009, num_up=1, num_down=0, score=0.017952678727477512),
 Comment(created_at=10.544, upvote_prob=0.071, downvote_prob=0.008, num_up=1, num_down=1, score=0.0),
 Comment(created_at=18.778, upvote_prob=0.489, downvote_prob=0.038, num_up=0, num_down=0, score=0.0),
 Comment(created_at=20.385, upvote_prob=0.151, downvote_prob=0.041, num_up=1, num_down=0, score=0.0),
 Comment(created_at=22.996, upvote_prob=0.579, downvote_prob=0.087, num_up=0, num_down=0, score=0.0),
 Comment(created_at=23.297, upvote_prob=0.047, downvote_prob=0.024, num_up=0, num_down=0, score=0.0),
 Comment(created_at=2.209, upvote_prob=0.158, downvote_prob=0.222, num_up=19, num_down=20, score=-0.003330099004936879),
 Comment(created_at=3.213, upvote_prob=0.048, downvote_prob=0.017, num_up=1, num_down=3, score=-0.007197689727160675),
 Comment(created_at=13.556, upvote_prob=0.055, downvote_prob=0.044, num_up=1, num_down=2, score=-0.010692321175843098),
 Comment(created_at=12.653, upvote_prob=0.041, downvote_prob=0.027, num_up=0, num_down=2, score=-0.018851217612895994),
 Comment(created_at=4.418, upvote_prob=0.104, downvote_prob=0.166, num_up=5, num_down=10, score=-0.019842891860277323),
 Comment(created_at=18.577, upvote_prob=0.045, downvote_prob=0.604, num_up=1, num_down=3, score=-0.054198462500746385)]

In [4]:
simulation.scorer = comment_orderings.scores.ModifiedBayes()
mb_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(mb_vals))
plt.hist(mb_vals, density=True, bins=30);


0.9195958333333333

In [5]:
simulation.scorer = comment_orderings.scores.Random()
rand_vals = [ simulation.run() for _ in range(1000) ]
print(numpy.mean(rand_vals))
plt.hist(rand_vals, density=True, bins=30);


0.5910541666666665