DP Tensor Experiment - v1

Purpose: to evaluate whether it's possible to build an automatic sensitivity calculation mechanism which could reliably produce sensitivity scores using only a forward pass through a dynamic graph.

Conclusions:

  • it can be done!
  • the current model has very loose assumptions - namely that every operation is always using exclusively unique individuals. This is not true in practice and can lead to sensitivity estimations that are higher than they should be
  • related to this last point - this DPTensor just keeps track of one sensitvity measure per datapoint. However, each datapoint is often sensitive to multiple different individuals (at different levels of sensitivity). Thus, instead, it's far more appropriate to keep track of the list of individuals which have contributed to every datapoint, and the corresponding level of sensitivity.
  • If we can do the last point - then we can be much more sophisticated about when downstream operations cancel out sensitivity created from upstream operations - which at present we cannot do (as exemplified in the subtraction example below in the notebook)

In [1]:
import numpy as np

In [2]:
class DPTensor():
    
    def __init__(self, data, max_values, min_values):
        
        self.data = data
        self.max_values = max_values
        self.min_values = min_values        
    
    def minimum(self, other):
        
        # if other is a scalar, create a tensor with its value
        if(isinstance(other,(float,int))):
            _data = np.zeros_like(self.data) + other
            other = DPTensor(data=_data, max_values=_data, min_values=_data)
        
        _new_data = np.minimum(self.data, other.data)
        _new_max_values = np.minimum(self.max_values,other.max_values)
        _new_min_values = self.min_values
        
        return DPTensor(data=_new_data,
                        max_values=_new_max_values,
                        min_values=_new_min_values)

    def maximum(self, other):

        # if other is a scalar, create a tensor with its value
        if(isinstance(other,(float,int))):
            _data = np.zeros_like(self.data) + other
            other = DPTensor(data=_data, max_values=_data, min_values=_data)
        
        _new_data = np.maximum(self.data, other.data)
        _new_min_values = np.maximum(self.min_values,other.min_values)
        _new_max_values = self.max_values
        
        return DPTensor(data=_new_data,
                        max_values=_new_max_values,
                        min_values=_new_min_values)
    
    def __add__(self, other):
        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT
        
        _data = self.data + other.data
        
        # remember, it's not about the maximum value that .data could take on
        # which would be self._max_values + other._max_values, it's about how
        # the maximum amount that .data could CHANGE if an entity is removed.
        _max_values = np.maximum(self.max_values, other.max_values)
        _min_values = np.minimum(self.min_values, other.min_values)
        
        return DPTensor(data=_data, 
                        max_values=_max_values,
                        min_values=_min_values)
    
    def __neg__(self):
        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT

        _data = -self.data

        return DPTensor(data=_data, 
                        max_values=-self.min_values,
                        min_values=-self.max_values)
    
    def __sub__(self, other):
        # NOTE: This assumes that all entities in self.data and other.data are DIFFERENT
        return (-other) + self
    
    @property
    def sensitivity(self):
        return self.max_values - self.min_values


class DatasetTensor(DPTensor):
    
    def __init__(self, data, entities, epsilon, max_values=None, min_values=None):
        
        self.data = data
        self.epsilon = epsilon
        self.entities = entities
        
        if max_values is None:
            max_values = np.inf + np.zeros_like(self.data)
        self.max_values = max_values    
        
        if min_values is None:
            min_values = -np.inf + np.zeros_like(self.data)            
        self.min_values = min_values

In [ ]:


In [3]:
db = DatasetTensor(data=np.array([0.,1,1,0,1,1,0]), epsilon=np.zeros(7) + 0.1, entities=np.array(range(0,6)))

In [4]:
db2 = db.minimum(1).maximum(0)

In [5]:
db2.max_values


Out[5]:
array([1., 1., 1., 1., 1., 1., 1.])

In [6]:
db2.min_values


Out[6]:
array([0., 0., 0., 0., 0., 0., 0.])

In [7]:
db2.sensitivity


Out[7]:
array([1., 1., 1., 1., 1., 1., 1.])

In [8]:
db3 = -db2

In [9]:
db3.max_values


Out[9]:
array([-0., -0., -0., -0., -0., -0., -0.])

In [10]:
db3.min_values


Out[10]:
array([-1., -1., -1., -1., -1., -1., -1.])

In [11]:
db3.sensitivity


Out[11]:
array([1., 1., 1., 1., 1., 1., 1.])

In [12]:
db4 = db3 + db2

In [13]:
db4.sensitivity


Out[13]:
array([2., 2., 2., 2., 2., 2., 2.])

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [163]:
db3 = (db2 - db2)

In [165]:
db3.data


Out[165]:
array([0., 0., 0., 0., 0., 0., 0.])

In [166]:
db3.max_values


Out[166]:
array([1., 1., 1., 1., 1., 1., 1.])

In [167]:
db2.min_values


Out[167]:
array([0., 0., 0., 0., 0., 0., 0.])

In [ ]: