Knows What It Knows (KWIK)

A framework for self-aware learning

Combines elements of Probably Approximately Correct (PAC) and mistake-bound models
Useful for active learning

Motivation

Polynomial sample complexity guarantee algorithms
- Rmax algorithm that estimates transition probabilities for each state-action-next action of MDP
- Accuracy bound using Hoeffding bounds
KWIK
1. Only makes accurate predictions
2. Can opt-out of prediction by saying "i don't know", which is polynomially bound

Example 1



In [1]:

    
from collections import Counter


class Kwik:
    def __init__(self, number_of_patrons):
        # Init
        self.current_i_do_not_knows = 0
        self.number_of_patrons = number_of_patrons
        self.max_i_do_not_knows = self.number_of_patrons * (self.number_of_patrons - 1)
        self.instigator = None
        self.peacemaker = None
        self.candidates = {candidate_type: set(range(self.number_of_patrons))
                           for candidate_type in ['instigator', 'peacemaker']}
        self.peacemaker_candidates = set(range(self.number_of_patrons))
        self.solved = False
        self.memory = {}

    def _remove_candidate(self, patron_index, candidate_type):
        if not self.solved and not (candidate_type == 'instigator' and self.instigator is not None) \
                and not (candidate_type == 'peacemaker' and self.peacemaker is not None):
            candidates_for_type = self.candidates[candidate_type]
            candidates_for_type.discard(patron_index)

            if len(candidates_for_type) == 1:
                remaining = candidates_for_type.pop()
                if candidate_type == 'instigator':
                    self.instigator = remaining
                    if self.peacemaker is not None:
                        self.solved = True
                    else:
                        self._remove_candidate(remaining, 'peacemaker')
                else:
                    self.peacemaker = remaining
                    if self.instigator is not None:
                        self.solved = True
                    else:
                        self._remove_candidate(remaining, 'instigator')

    def _learn(self, at_establishment, fight_occurred, counts):
        if counts[True] == 1 and fight_occurred:
            # If only one person is there and a fight breaks out -> he's the instigator
            instigator = at_establishment.index(True)
            self.instigator = instigator
            self.candidates['instigator'] = set()
            self._remove_candidate(instigator, 'peacemaker')
        elif counts[True] == 1 and not fight_occurred:
            # If only one person is there and no fight breaks out -> he's NOT the instigator
            # remove him from the list of instigators
            index = at_establishment.index(True)
            self._remove_candidate(index, 'instigator')
        else:
            # Some people are present, eliminate candidates
            for patron_index, patron_present in enumerate(at_establishment):
                # If the patron was present
                if patron_present:
                    if fight_occurred:
                        # The patron is not a peacemaker
                        self._remove_candidate(patron_index, 'peacemaker')
                    else:
                        # The patron is not an instigator
                        # TODO: this is not correct
                        self._remove_candidate(patron_index, 'instigator')

    def _all_known(self, at_establishment):
        if at_establishment[self.instigator]:
            if at_establishment[self.peacemaker]:
                return 0
            else:
                return 1
        else:
            return 0

    def _determine_and_learn(self, at_establishment, fight_occurred):
        counts = Counter(at_establishment)

        if len(counts) == 1:
            # Everyone is present so no fight and nothing to learn
            return 0
        else:
            self._learn(at_establishment, fight_occurred, counts)
            if self.current_i_do_not_knows == self.max_i_do_not_knows:
                raise ValueError("Exhausted ⟂")
            else:
                self.current_i_do_not_knows += 1
                return -1

    def run_instance(self, at_establishment, fight_occurred):
        # Make it hashable
        at_establishment = tuple(at_establishment)

        if at_establishment in self.memory:
            # We've seen this before, return from memory
            return int(self.memory[at_establishment])
        else:
            self.memory[at_establishment] = fight_occurred
            if self.solved:
                # Instigator and peacemaker are already known
                return self._all_known(at_establishment)
            else:
                # Another case
                return self._determine_and_learn(at_establishment, fight_occurred)

Example 1

2 patrons



In [33]:

    
learner = Kwik(number_of_patrons=2)
# both patrons 0 and 1 are candidates of being both I and P
learner.candidates









    Out[33]:





{'instigator': {0, 1}, 'peacemaker': {0, 1}}



In [34]:

    
# We haven't memorized anything
learner.memory









    Out[34]:





{}



In [35]:

    
# P and I present and no fight
# Since we know that there's at least one I and one P
# if everyone is present or absent we haven't learned anything
# and we know that there's no fight
learner.run_instance([True, True], False)









    Out[35]:





0



In [36]:

    
# Memorize this instance
learner.memory









    Out[36]:





{(True, True): False}



In [37]:

    
# Nothing was learnt from this case
learner.candidates









    Out[37]:





{'instigator': {0, 1}, 'peacemaker': {0, 1}}



In [38]:

    
# Patron 1 present and patron 0 absent
# We still don't know who is who so return -1 (don't know)
learner.run_instance([True, False], True)









    Out[38]:





-1



In [39]:

    
# Memorize
learner.memory









    Out[39]:





{(True, False): True, (True, True): False}



In [40]:

    
# Since a fight broke out we know 0 was the I
# and we can deduce that 1 is P
learner.candidates









    Out[40]:





{'instigator': set(), 'peacemaker': set()}



In [41]:

    
learner.instigator









    Out[41]:





0



In [42]:

    
learner.peacemaker









    Out[42]:





1

Example 2

3 patrons



In [2]:

    
learner = Kwik(3)
learner.candidates









    Out[2]:





{'instigator': {0, 1, 2}, 'peacemaker': {0, 1, 2}}



In [3]:

    
learner.run_instance([True, True, True], False)









    Out[3]:





0



In [4]:

    
learner.run_instance([False, False, True], False)









    Out[4]:





-1



In [5]:

    
learner.candidates









    Out[5]:





{'instigator': {0, 1}, 'peacemaker': {0, 1, 2}}



In [6]:

    
learner.run_instance([True, True, False], True)









    Out[6]:





-1



In [7]:

    
learner.candidates









    Out[7]:





{'instigator': {0, 1}, 'peacemaker': set()}



In [8]:

    
learner.peacemaker









    Out[8]:





2



In [9]:

    
learner.run_instance([False, True, True], False)









    Out[9]:





-1



In [10]:

    
# Is this correct?
# We eliminate patron 1 as an instigator and deduce 0 is I
learner.candidates









    Out[10]:





{'instigator': set(), 'peacemaker': set()}



In [11]:

    
learner.instigator









    Out[11]:





0



In [12]:

    
learner.run_instance([True, False, True], False)









    Out[12]:





0



In [13]:

    
learner.run_instance([True, False, False], True)









    Out[13]:





1



In [14]:

    
learner.run_instance([True, False, False], True)









    Out[14]:





1

Another example

This time the composition is (Normal patron, I, P)



In [21]:

    
learner = Kwik(3)
learner.candidates









    Out[21]:





{'instigator': {0, 1, 2}, 'peacemaker': {0, 1, 2}}



In [22]:

    
learner.run_instance([False, False, True], False)









    Out[22]:





-1



In [23]:

    
learner.candidates









    Out[23]:





{'instigator': {0, 1}, 'peacemaker': {0, 1, 2}}



In [24]:

    
learner.run_instance([False, True, True], False)









    Out[24]:





-1



In [25]:

    
learner.candidates









    Out[25]:





{'instigator': set(), 'peacemaker': {1, 2}}



In [26]:

    
# This is not correct
learner.instigator









    Out[26]:





0



In [27]:

    
learner.run_instance([True, False, True], False)









    Out[27]:





-1



In [28]:

    
learner.candidates









    Out[28]:





{'instigator': set(), 'peacemaker': {1, 2}}



In [29]:

    
learner.run_instance([True, True, False], True)









    Out[29]:





-1



In [30]:

    
learner.candidates









    Out[30]:





{'instigator': set(), 'peacemaker': set()}



In [31]:

    
learner.peacemaker









    Out[31]:





2



In [32]:

    
learner.instigator









    Out[32]:





0



In [33]:

    
# Incorrect
learner.run_instance([False, True, False], True)









    Out[33]:





0

How do we fix this?

References

http://icml2008.cs.helsinki.fi/papers/627.pdf