Knows What It Knows (KWIK)

A framework for self-aware learning

  • Combines elements of Probably Approximately Correct (PAC) and mistake-bound models
  • Useful for active learning

Motivation

  • Polynomial sample complexity guarantee algorithms
    • Rmax algorithm that estimates transition probabilities for each state-action-next action of MDP
    • Accuracy bound using Hoeffding bounds
  • KWIK
    1. Only makes accurate predictions
    2. Can opt-out of prediction by saying "i don't know", which is polynomially bound

Example 1


In [1]:
from collections import Counter


class Kwik:
    def __init__(self, number_of_patrons):
        # Init
        self.current_i_do_not_knows = 0
        self.number_of_patrons = number_of_patrons
        self.max_i_do_not_knows = self.number_of_patrons * (self.number_of_patrons - 1)
        self.instigator = None
        self.peacemaker = None
        self.candidates = {candidate_type: set(range(self.number_of_patrons))
                           for candidate_type in ['instigator', 'peacemaker']}
        self.peacemaker_candidates = set(range(self.number_of_patrons))
        self.solved = False
        self.memory = {}

    def _remove_candidate(self, patron_index, candidate_type):
        if not self.solved and not (candidate_type == 'instigator' and self.instigator is not None) \
                and not (candidate_type == 'peacemaker' and self.peacemaker is not None):
            candidates_for_type = self.candidates[candidate_type]
            candidates_for_type.discard(patron_index)

            if len(candidates_for_type) == 1:
                remaining = candidates_for_type.pop()
                if candidate_type == 'instigator':
                    self.instigator = remaining
                    if self.peacemaker is not None:
                        self.solved = True
                    else:
                        self._remove_candidate(remaining, 'peacemaker')
                else:
                    self.peacemaker = remaining
                    if self.instigator is not None:
                        self.solved = True
                    else:
                        self._remove_candidate(remaining, 'instigator')

    def _learn(self, at_establishment, fight_occurred, counts):
        if counts[True] == 1 and fight_occurred:
            # If only one person is there and a fight breaks out -> he's the instigator
            instigator = at_establishment.index(True)
            self.instigator = instigator
            self.candidates['instigator'] = set()
            self._remove_candidate(instigator, 'peacemaker')
        elif counts[True] == 1 and not fight_occurred:
            # If only one person is there and no fight breaks out -> he's NOT the instigator
            # remove him from the list of instigators
            index = at_establishment.index(True)
            self._remove_candidate(index, 'instigator')
        else:
            # Some people are present, eliminate candidates
            for patron_index, patron_present in enumerate(at_establishment):
                # If the patron was present
                if patron_present:
                    if fight_occurred:
                        # The patron is not a peacemaker
                        self._remove_candidate(patron_index, 'peacemaker')
                    else:
                        # The patron is not an instigator
                        # TODO: this is not correct
                        self._remove_candidate(patron_index, 'instigator')

    def _all_known(self, at_establishment):
        if at_establishment[self.instigator]:
            if at_establishment[self.peacemaker]:
                return 0
            else:
                return 1
        else:
            return 0

    def _determine_and_learn(self, at_establishment, fight_occurred):
        counts = Counter(at_establishment)

        if len(counts) == 1:
            # Everyone is present so no fight and nothing to learn
            return 0
        else:
            self._learn(at_establishment, fight_occurred, counts)
            if self.current_i_do_not_knows == self.max_i_do_not_knows:
                raise ValueError("Exhausted ⟂")
            else:
                self.current_i_do_not_knows += 1
                return -1

    def run_instance(self, at_establishment, fight_occurred):
        # Make it hashable
        at_establishment = tuple(at_establishment)

        if at_establishment in self.memory:
            # We've seen this before, return from memory
            return int(self.memory[at_establishment])
        else:
            self.memory[at_establishment] = fight_occurred
            if self.solved:
                # Instigator and peacemaker are already known
                return self._all_known(at_establishment)
            else:
                # Another case
                return self._determine_and_learn(at_establishment, fight_occurred)

Example 1

2 patrons


In [33]:
learner = Kwik(number_of_patrons=2)
# both patrons 0 and 1 are candidates of being both I and P
learner.candidates


Out[33]:
{'instigator': {0, 1}, 'peacemaker': {0, 1}}

In [34]:
# We haven't memorized anything
learner.memory


Out[34]:
{}

In [35]:
# P and I present and no fight
# Since we know that there's at least one I and one P
# if everyone is present or absent we haven't learned anything
# and we know that there's no fight
learner.run_instance([True, True], False)


Out[35]:
0

In [36]:
# Memorize this instance
learner.memory


Out[36]:
{(True, True): False}

In [37]:
# Nothing was learnt from this case
learner.candidates


Out[37]:
{'instigator': {0, 1}, 'peacemaker': {0, 1}}

In [38]:
# Patron 1 present and patron 0 absent
# We still don't know who is who so return -1 (don't know)
learner.run_instance([True, False], True)


Out[38]:
-1

In [39]:
# Memorize
learner.memory


Out[39]:
{(True, False): True, (True, True): False}

In [40]:
# Since a fight broke out we know 0 was the I
# and we can deduce that 1 is P
learner.candidates


Out[40]:
{'instigator': set(), 'peacemaker': set()}

In [41]:
learner.instigator


Out[41]:
0

In [42]:
learner.peacemaker


Out[42]:
1

Example 2

3 patrons


In [2]:
learner = Kwik(3)
learner.candidates


Out[2]:
{'instigator': {0, 1, 2}, 'peacemaker': {0, 1, 2}}

In [3]:
learner.run_instance([True, True, True], False)


Out[3]:
0

In [4]:
learner.run_instance([False, False, True], False)


Out[4]:
-1

In [5]:
learner.candidates


Out[5]:
{'instigator': {0, 1}, 'peacemaker': {0, 1, 2}}

In [6]:
learner.run_instance([True, True, False], True)


Out[6]:
-1

In [7]:
learner.candidates


Out[7]:
{'instigator': {0, 1}, 'peacemaker': set()}

In [8]:
learner.peacemaker


Out[8]:
2

In [9]:
learner.run_instance([False, True, True], False)


Out[9]:
-1

In [10]:
# Is this correct?
# We eliminate patron 1 as an instigator and deduce 0 is I
learner.candidates


Out[10]:
{'instigator': set(), 'peacemaker': set()}

In [11]:
learner.instigator


Out[11]:
0

In [12]:
learner.run_instance([True, False, True], False)


Out[12]:
0

In [13]:
learner.run_instance([True, False, False], True)


Out[13]:
1

In [14]:
learner.run_instance([True, False, False], True)


Out[14]:
1

Another example

This time the composition is (Normal patron, I, P)


In [21]:
learner = Kwik(3)
learner.candidates


Out[21]:
{'instigator': {0, 1, 2}, 'peacemaker': {0, 1, 2}}

In [22]:
learner.run_instance([False, False, True], False)


Out[22]:
-1

In [23]:
learner.candidates


Out[23]:
{'instigator': {0, 1}, 'peacemaker': {0, 1, 2}}

In [24]:
learner.run_instance([False, True, True], False)


Out[24]:
-1

In [25]:
learner.candidates


Out[25]:
{'instigator': set(), 'peacemaker': {1, 2}}

In [26]:
# This is not correct
learner.instigator


Out[26]:
0

In [27]:
learner.run_instance([True, False, True], False)


Out[27]:
-1

In [28]:
learner.candidates


Out[28]:
{'instigator': set(), 'peacemaker': {1, 2}}

In [29]:
learner.run_instance([True, True, False], True)


Out[29]:
-1

In [30]:
learner.candidates


Out[30]:
{'instigator': set(), 'peacemaker': set()}

In [31]:
learner.peacemaker


Out[31]:
2

In [32]:
learner.instigator


Out[32]:
0

In [33]:
# Incorrect
learner.run_instance([False, True, False], True)


Out[33]:
0

How do we fix this?