DP Tensor Experiment - v2

Purpose:

to evaluate whether it's possible to build an automatic sensitivity calculation mechanism which could reliably produce sensitivity scores using only a forward pass through a dynamic graph. However, we want to extend the v1 experiment in several directions:

  • we want to be able to track the per-user sensitvity for each datapoint (meaning multiple sensitivity metrics per datapoint, one per input user)
  • we want to be able to have an accountant which keeps track of the epsilon budget for each user, letting us ask the question "how much noise should i add to satisfy the most constraining privacy budget"?

Conclusions:

  • looks like it can be done - but it requires a lot of sparse operations (could be tough to vectorize in an SMPC compatible way)
  • haven't implemented the accountant yet - will try in the next experiment. However, it seems that keeping things in the sensitivity space is a sortof accountant in and of itself.
  • BUG: unfortunately I used Counter() which sets every value to be 0 automatically if it hasn't been set. However, there are edge cases when this will cause incorrect behavior (When the true range as defined by others doesn't overlap wtih 0)

In [30]:
from collections import Counter
import numpy as np
class PrivateNumber():
    
    def __init__(self, value, max_val, min_val):
        self.value = value
        self.max_val = max_val
        self.min_val = min_val
        
    def __add__(self, other):
        
        # add to a private number
        
        if(isinstance(other, PrivateNumber)):

            entities = self.entities.union(other.entities)
            
            new_val = self.value + other.value

            entities = set(self.max_val.keys()).union(set(other.max_val.keys()))

            new_max_val = Counter()
            new_min_val = Counter()            
            for entity in entities:
                new_max_val[entity] = self.max_val[entity] + other.max_val[entity]
                new_min_val[entity] = self.min_val[entity] + other.min_val[entity]

            return PrivateNumber(self.value + other.value,
                                new_max_val,
                                new_min_val)
        
        entities = self.entities
        
        # add to a public number
        
        new_max_val = Counter()
        new_min_val = Counter()        
        for entity in entities:
            new_max_val[entity] = self.max_val[entity] + other
            new_min_val[entity] = self.min_val[entity] + other
        
        return PrivateNumber(self.value + other,
                                new_max_val,
                                new_min_val)

    def __sub__(self, other):
        return self + (-other)
    
    def __mul__(self, other):
        
        if(isinstance(other, PrivateNumber)):
        
            entities = self.entities.union(other.entities)
        
            new_self_max_val = Counter()
            new_self_min_val = Counter()            
            for entity in entities:
                
                # the biggest positive number this entity could contribute is when
                # it is multiplied by the largest value of the same sign from other
                new_self_max_val[entity] = max(self.min_val[entity] * other.xmin, 
                                               self.max_val[entity] * other.xmax)
                
                # the smallest negative number this entity could contribute is when
                # it is multiplied by the largest value of the opposite sign from other
                new_self_min_val[entity] = min(self.min_val[entity] * other.xmax,
                                               self.max_val[entity] * other.xmin)
                
            new_other_max_val = Counter()
            new_other_min_val = Counter()            
            for entity in entities:
                
                # the biggest positive number this entity could contribute is when
                # it is multiplied by the largest value of the same sign from other
                new_other_max_val[entity] = max(other.min_val[entity] * self.xmin, 
                                                other.max_val[entity] * self.xmax)
                
                # the smallest negative number this entity could contribute is when
                # it is multiplied by the largest value of the opposite sign from other
                new_other_min_val[entity] = min(other.min_val[entity] * self.xmax,
                                                other.max_val[entity] * self.xmin)
                
            new_max_val = Counter()
            new_min_val = Counter()
            
            for entity in entities:
                new_max_val[entity] = max(new_self_max_val[entity], new_other_max_val[entity])
                new_min_val[entity] = min(new_self_min_val[entity], new_other_min_val[entity])

            return PrivateNumber(self.value * other.value,
                                    new_max_val,
                                    new_min_val)
        
        entities = self.entities
        
        new_max_val = Counter()
        for entity in entities:
            new_max_val[entity] = self.max_val[entity] * other

        new_min_val = Counter()
        for entity in entities:
            new_min_val[entity] = self.min_val[entity] * other
        
        if(other > 0):
            return PrivateNumber(self.value * other,
                                    new_max_val,
                                    new_min_val)
        else:
            return PrivateNumber(self.value * other,
                                    new_min_val,                                 
                                    new_max_val)
    
    def __truediv__(self, other):
        
        if(isinstance(other, PrivateNumber)):
            raise Exception("probably best not to do this - it's gonna be inf a lot")
            
        entities = self.entities
        
        new_max_val = Counter()
        for entity in entities:
            new_max_val[entity] = self.max_val[entity] / other

        new_min_val = Counter()
        for entity in entities:
            new_min_val[entity] = self.min_val[entity] / other
        
        return PrivateNumber(self.value / other,
                                new_max_val,
                                new_min_val)

    def __gt__(self, other):
        """BUG!: Counter() defaults to 0"""
        if(isinstance(other, PrivateNumber)):
        
            entities = self.entities.union(other.entities)
        
            new_self_max_val = Counter()
            new_self_min_val = Counter()            
            for entity in entities:
                
                if not (self.min_val[entity] > other.xmax or self.max_val[entity] < other.xmin):
                    new_self_max_val[entity] = 1
                else:
                    new_self_max_val[entity] = 0
                
                new_self_min_val[entity] = 0
                
            new_other_max_val = Counter()
            new_other_min_val = Counter()            
            for entity in entities:
                
                if not (other.min_val[entity] > self.xmax or other.max_val[entity] < self.xmin):
                    new_other_max_val[entity] = 1
                else:
                    new_other_max_val[entity] = 0
                    
                new_other_min_val[entity] = 0
                
            new_max_val = Counter()
            new_min_val = Counter()
            
            for entity in entities:
                new_max_val[entity] = max(new_self_max_val[entity], new_other_max_val[entity])
                new_min_val[entity] = min(new_self_min_val[entity], new_other_min_val[entity])

            return PrivateNumber(int(self.value > other.value),
                                    new_max_val,
                                    new_min_val)
        
        entities = self.entities
        
        new_max_val = Counter()
        new_min_val = Counter()
        for entity in entities:
            
            new_min_val[entity] = 0
            
            if(other <= self.max_val[entity] and other >= self.min_val[entity]):    
                new_max_val[entity] = 1
            else:
                new_max_val[entity] = 0

        return PrivateNumber(int(self.value > other),
                                new_max_val,
                                new_min_val)
    

    def __lt__(self, other):
        """BUG!: Counter() defaults to 0"""
        if(isinstance(other, PrivateNumber)):
        
            entities = self.entities.union(other.entities)
        
            new_self_max_val = Counter()
            new_self_min_val = Counter()            
            for entity in entities:
                
                if not (self.min_val[entity] > other.xmax or self.max_val[entity] < other.xmin):
                    new_self_max_val[entity] = 1
                else:
                    new_self_max_val[entity] = 0
                
                new_self_min_val[entity] = 0
                
            new_other_max_val = Counter()
            new_other_min_val = Counter()            
            for entity in entities:
                
                if not (other.min_val[entity] > self.xmax or other.max_val[entity] < self.xmin):
                    new_other_max_val[entity] = 1
                else:
                    new_other_max_val[entity] = 0
                    
                new_other_min_val[entity] = 0
                
            new_max_val = Counter()
            new_min_val = Counter()
            
            for entity in entities:
                new_max_val[entity] = max(new_self_max_val[entity], new_other_max_val[entity])
                new_min_val[entity] = min(new_self_min_val[entity], new_other_min_val[entity])

            return PrivateNumber(int(self.value < other.value),
                                    new_max_val,
                                    new_min_val)
        
        entities = self.entities
        
        new_max_val = Counter()
        new_min_val = Counter()
        for entity in entities:
            
            new_min_val[entity] = 0
            
            if(other <= self.max_val[entity] and other >= self.min_val[entity]):    
                new_max_val[entity] = 1
            else:
                new_max_val[entity] = 0

        return PrivateNumber(int(self.value < other),
                                new_max_val,
                                new_min_val)
    
    def __neg__(self):
        return self * -1
    
    def max(self, other):
        
        if(isinstance(other, PrivateNumber)):
            raise Exception("Not implemented yet")
        
        entities = self.entities
        
        new_min_val = Counter()
        for entity in entities:
            new_min_val[entity] = max(self.min_val[entity], other)
            
        return PrivateNumber(max(self.value, other),
                                self.max_val,
                                new_min_val)
    
    def min(self, other):
        
        if(isinstance(other, PrivateNumber)):
            raise Exception("Not implemented yet")
        
        entities = self.entities
        
        new_max_val = Counter()
        for entity in entities:
            new_max_val[entity] = min(self.max_val[entity], other)
                
        return PrivateNumber(min(self.value, other),
                                new_max_val,
                                self.min_val)
    
    def hard_sigmoid(self):
        return self.min(1).max(0)
    
    def hard_sigmoid_deriv(self):
        return ((self < 1) * (self > 0)) + (self < 0) * 0.01 - (self > 1) * 0.01
        
    def __repr__(self):
        return str(self.value) + " " + str(self.max_val) + " " + str(self.min_val)
    
    @property
    def xmin(self):
        return self.min_val.most_common(len(self.min_val))[-1][1]
    
    @property
    def xmax(self):
        return self.max_val.most_common(1)[0][1]
    
    @property
    def entities(self):
        return set(self.max_val.keys())
    
    @property
    def sensitivity(self):
        sens = Counter()
        for entity, value in self.max_val.items():
            sens[entity] = value - self.min_val[entity]
        return sens.most_common()[0][1]
    
x = PrivateNumber(0.5,Counter({"bob":4, "amos":3}),Counter({"bob":3, "amos":2}))
y = PrivateNumber(1,Counter({"bob":1}),Counter({"bob":-1}))
z = PrivateNumber(-0.5,Counter({"sue":2}),Counter({"sue":-1}))

In [31]:
a = x > y

In [34]:
a.sensitivity


Out[34]:
0

In [ ]:


In [2]:
a = x + y

In [4]:
b = a * z

In [6]:
b


Out[6]:
-1.0 Counter({'sue': 8, 'bob': 8, 'amos': 6}) Counter({'amos': -3, 'sue': -6, 'bob': -6})

In [ ]:


In [2]:
# class PrivacyAccountant():
    
#     def __init__(self, default_budget = 0.1):
        
#         self.entity2epsilon = {}
#         self.entity2id = {}
#         self.default_budget = default_budget
        
#     def add_entity(self, entity_id, budget=None):
#         """Add another entity to the system to be tracked.
        
#         Args:
#             entity_id: a string or other unique identifier of the entity
#             budget: the epsilon level defining this user's privacy budget
#         """
        
#         if(budget is None):
#             budget = self.default_budget
        
#         self.entity2id[entity_id] = len(self.entity2id)
#         self.entity2epsilon[self.entity2id[entity_id]] = budget
        
        
# accountant = PrivacyAccountant()

# class DPTensor():
    
#     def __init__(self, data, entities, max_values=None, min_values=None):
        
#         assert data.shape == entities.shape#[0:-1]

#         self.data = data
#         self.entities = entities
        
#         if max_values is None:
#             max_values = np.inf + np.zeros_like(self.data)
            
#         assert max_values.shape == data.shape
#         self.max_values = max_values    
        
#         if min_values is None:
#             min_values = -np.inf + np.zeros_like(self.data)            
            
#         assert min_values.shape == data.shape            
#         self.min_values = min_values

#     def sum(self, dim=0):
        
#         _new_data = self.data.sum(dim)
        
#         return _new_data
    
#     @property
#     def sensitivity(self):
#         return self.max_values - self.min_values

In [163]:
# results, tags = grid.search("diabetes","#data", verbose=False)
# dataset = results['alice'][0][0:5][:,0:4]
# n_ent = dataset.shape[0]
# n_classes = dataset.shape[1]

# for i in range(n_ent):
#     accountant.add_entity("Diabetes Patient #" + str(i))
    
# d2 = dataset.clone().get()
# entities = th.arange(0,n_ent).view(-1,1).expand(n_ent,n_classes)#.unsqueeze(2)
# db = DPTensor(data=d2, 
#               entities=entities, 
#               max_values=d2.max(0)[0].expand(n_ent,n_classes), 
#               min_values=d2.min(0)[0].expand(n_ent,n_classes))

In [ ]:


In [ ]: