In [8]:
from collections import defaultdict
class DAO(object):
def __init__(self, target, type, da=10):
"""
Parameters
----------
target: str
A target object to improve
type: str, ['doi', 'url']
A type of target
da: int
The discretization of the authorship histogram
"""
self.target = target
self.type = type
self._reputation = {}
self._authorship = {}
self.rating = {}
self._author_log = [] # author transactions
self._rating_log = [] # rating transactions
self._da = da
@property
def url(self):
if self.type == 'doi':
return 'http://doi.og/{}'.format(self.target)
if self.type == 'url':
return self.target
def reputation(self, user_id):
if not user_id in self._reputation:
# Add the user and creates a prior reputation
self.add_new_user(user_id)
return self._reputation[user_id]
def _normalize(self, dic):
sc = sum(dic.values())
for i in dic.keys():
dic[i] /= sc
def add_new_user(self, user_id):
# TODO: ask if other existing users know this user
# -> Use that information to calculate the reputation of the new user
# else: Algorithm for generating prior reputation
# Should we have an algorithm to incentivize first commers in term of reputation?
for i in self._reputation:
self._reputation[i] -= 1.0 / len(self._reputation)
# TODO: formalize the function to favour first commers
self._reputation[user_id] = -0.1 / (len(self._reputation)+4)
for i in self._reputation:
self._reputation[i] += 1.0 / len(self._reputation)
self._normalize(self._reputation)
def authorship(self, author_id):
if not author_id in self._authorship:
self.add_new_author(author_id)
return self._authorship[author_id]
def add_new_author(self, author_id):
self._authorship[author_id] = np.ones([self._da]) / self._da
def add_author(self, user_id, author_id, percentage):
# register the transaction
self._author_log.append({
'user_id': user_id,
'author_id': author_id,
'percentage': percentage})
# get the user reputation
rep = self.reputation(user_id)
# calculate the new authorship percentage
# -> Bayes update using the existing likelihood function of the authorship
# percentage multiplied with the reputation of the user and with the
# prior authorship percentage
ind = int(np.floor(percentage*self._da))
assert percentage >= 0. and percentage <= 1.0, "percentage value should be between [0.0, 1.0], instead percentage={}".format(percentage)
if percentage == 1.0:
# we put it in the last box
ind = self._da -1
prior = np.mean(self.authorship(author_id) * np.arange(self._da)/self._da)
for i, hyp in enumerate(np.arange(self._da)/self._da):
likelihood = self.authorship(author_id)[i] * self.reputation(user_id) * (hyp-percentage)
print(likelihood)
For each author and user there is a joint probability distribution
In [9]:
dao = DAO('10.5194/wes-2015-5-RC2', type='doi')
dao.url
Out[9]:
In [10]:
dao = DAO('http://doi.og/10.5194/wes-2015-5-RC2', type='url')
dao.url
Out[10]:
In [11]:
from numpy.random import random
import numpy as np
In [12]:
for i in range (10):
dao.add_author('user_%d'%(i), 'author%d'%(np.floor(random()*10)), 0.2)
In [6]:
dao._reputation
Out[6]:
In [9]:
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()
In [13]:
order = np.argsort(list(dao._reputation.values()))[::-1]
strip = lambda x: np.array(list(x))[order]
iplot({'data':[{'x':strip(dao._reputation.keys()),
'y':strip(dao._reputation.values())}],
'layout':{'xaxis':{'title':'User names'},
'yaxis':{'title':'Reputation [-]'}}})
In [4]:
import numpy as np
from scipy.optimize import fmin_cobyla
import pandas as pd
In [8]:
class LS_LMSRMarket(object):
def __init__(self, possible_outcomes, vig=0.1, init=1.0, market='LS_LMSR', b=None):
"""
Parameters
----------
possible_outcomes: list
List of strings describing the possible outcomes of the task
vig float
parameter of the `alpha` variable used to calculate the `b` variable.
Corresponds to the market "vig" value - typically between 5 and 30 percent in real-world markets
init float
The initial subsidies of the market, spread equally in this algorithm on all the outcomes.
market srt, 'LS_LMSR' | 'LMSR'
The market type. If 'LMSR' is selected, then a b value should be given.
"""
self.market = market
if self.market == 'LSMR':
if b == None:
raise Exception('b value is needed for LSMR markets')
self._b = b
self.possible_outcomes = possible_outcomes
self.init = init
self.vig = vig
self._x = [np.ones([self.n])*init/self.n]
self._book = []
self.market_value = init
self._history = []
@property
def alpha(self):
return self.vig*self.n/np.log(self.n)
@property
def n(self):
return len(self.possible_outcomes)
@property
def b(self):
if self.market == 'LMSR':
return self._b
elif self.market == 'LS_LMSR':
return self._b_func(self.x)
else:
raise Exception('market must be set to either "LMSR" or "LS_LMSR"')
def _b_func(self, x):
"""Calculate the `b` equation: b=\alpha \Sigma x"""
return self.alpha * x.sum()
@property
def book(self):
return pd.DataFrame(self._book)
@property
def x(self):
return self._x[-1].copy()
def cost(self, x):
return self.b*np.log(np.exp(x/self.b).sum())
def _new_x(self, shares, outcome):
new_x = self.x
new_x[outcome] += shares
return new_x
def price(self, shares, outcome):
return self._price(self._new_x(shares, outcome))
def _price(self, x):
return self.cost(x)-self.cost(self.x)
def register_x(self, x):
self._x.append(x)
def calculate_shares(self, paid, outcome):
obj_func = lambda s: np.abs(self.price(s, outcome) - paid)
return fmin_cobyla(obj_func, paid/self.p[outcome], [])
def buy_shares(self, name, paid, outcome):
shares = self.calculate_shares(paid, outcome)
self.register_x(self._new_x(shares, outcome))
self._book.append({'name':name,
'shares':shares,
'outcome':outcome,
'paid':paid})
self._history.append(self.p)
self.market_value += paid
print("%s paid %2.2f EUR, for %2.2f shares of outcome %d, which will give him %2.2f EUR if he wins"%(
name, paid, shares, outcome, shares/self.x[outcome]*self.market_value))
return shares
def sell_shares(self, name, shares, outcome):
price = self.price(-share, outcome)
self._book.append({'name':name,
'shares':-shares,
'outcome':outcome,
'paid':-price})
self.market_value -= price
self._history.append(self.p)
return price
def outcome_probability(self):
K = np.exp(self.x/self.b)
return K/K.sum()
def add_outcome(self, new_outcome):
new_x = self.x - np.ones([self.n])*self.init/self.n
new_x = np.array(new_x.tolist() + [0.])
new_x += np.ones([self.n + 1]) * self.init/(self.n + 1)
self.register_x(new_x)
self.possible_outcomes.append(new_outcome)
@property
def p(self):
return self.outcome_probability()
def history(self):
return np.array(self._history)
In [9]:
ls = LS_LMSRMarket(['author1'])
In [10]:
ls.p
Out[10]:
In [ ]:
ls.buy_shares('bob', 10, 0)
In [ ]:
ls.p
In [107]:
ls.add_outcome('author2', )
In [108]:
ls.init
Out[108]:
In [109]:
ls.p
Out[109]:
In [110]:
ls.x
Out[110]:
Scenario: A reviewer $r_j$ with a reputation $R(r_j)$ is telling the DAO that an author $a_i$ has an authorship percentage $A(a_i|r_j)$.
$A$ is a space of $\mathbb{N} \times \mathbb{N}\rightarrow[0,1]$. Given a list of author addresses it will return a percentage of authorship: e.g. $A(a_x|r_y) = 0.3$
Furthermore $A$ has the following properties that should be enforced:
The belief of authorship $B_A$ is a function $\mathbb{N} \rightarrow [0,1] \rightarrow \mathbb{R}$. For a given author it gives the current PDF of the belief its authorship percentage.
In [1]:
import numpy as np
In [47]:
In [113]:
# discretization of the authorship percentage interval
n = 1000
# The different values of the PMF
id_a = np.linspace(0,1,n)
# Uniform prior PMF
authorship = np.ones([n])/n
# Uniform prior PMF
authorship = np.exp(-(0.4-id_a)**2/(2*0.05))
fig = pl.figure(figsize=(15,5))
ax = pl.subplot(1,2,1)
m = 100
vals = np.random.random(m)
maxli = np.zeros([m])
freq = np.zeros([m])
for i, a in enumerate(vals):
for b in range(n):
if a < 0 or a > 1:
likelihood = 0.0
else:
sig = 0.25
likelihood = np.exp(-abs(id_a[b] - a)**2/(2*sig))
if likelihood < 0:
likelihood = 0.0
if likelihood > 1.0:
likelihood = 1.0
authorship[b] *= likelihood
# normalize
authorship /= authorship.sum()
maxli[i] = id_a[np.argmax(authorship)]
freq[i] = vals[:i].mean()
pl.plot(id_a, authorship)
pl.plot(a, authorship[int(np.floor(a*n))],'o')
ax = pl.subplot(1,2,2)
pl.plot(range(m), maxli, label='Bayesian')
pl.plot(range(m), freq, label='Frequentist')
pl.legend()
print('Frequentists:', vals.mean())
print('Bayesian:', id_a[np.argmax(authorship)])
And now adding the reputation of the reviewer as part of the likelihood function
In [138]:
# discretization of the authorship percentage interval
n = 1000
# The different values of the PMF
id_a = np.linspace(0,1,n)
# Uniform prior PMF
#authorship = np.ones([n])/n
# Uniform prior PMF
authorship = np.exp(-(0.5-id_a)**2/(2*0.5))
fig = pl.figure(figsize=(15,5))
ax = pl.subplot(1,2,1)
m = 40
vals = np.random.random(m)
maxli = np.zeros([m])
freq = np.zeros([m])
for i, a in enumerate(vals):
reviewer_reputation = min(max(0.25 + np.random.random(1)/2.0,0.0), 1.0)
for b in range(n):
if a < 0 or a > 1:
likelihood = 0.0
else:
sig = 0.05
# The reviewer reputation is inversely proportional to the sigma of the normal likelihood function
likelihood = np.exp(-abs(id_a[b] - a)**2/(2*(1-reviewer_reputation+1.E-4)))
if likelihood < 0:
likelihood = 0.0
if likelihood > 1.0:
likelihood = 1.0
authorship[b] *= likelihood
# normalize
authorship /= authorship.sum()
maxli[i] = id_a[np.argmax(authorship)]
if i > 1:
freq[i] = vals[:i].mean()
pl.plot(id_a, authorship)
pl.plot(a, authorship[int(np.floor(a*n))],'o')
ax = pl.subplot(1,2,2)
pl.plot(range(m), maxli, label='Bayesian')
pl.plot(range(m), freq, label='Frequentist')
pl.legend()
print('Frequentists:', vals.mean())
print('Bayesian:', id_a[np.argmax(authorship)])
In [136]:
vals
Out[136]:
In [96]:
import matplotlib.pylab as pl
%matplotlib inline
In [97]:
pl.plot(id_a, authorship)
Out[97]:
In [52]:
id_a[np.argmax(authorship)]
Out[52]:
In [ ]: