In [1]:

    
from sympy import *
init_printing()
from IPython.display import display

%matplotlib inline

import matplotlib.pyplot as plt

Uniqueness and Exact Matches



In [2]:

    
#def overlap(x, y):
#    return (x & y).sum()

#def match(x, y, t):
#    return overlap(x, y) >= t

n = Symbol("n", positive=True)
w = Symbol("w", positive=True)

nCw = factorial(n) / (factorial(w) * factorial(n - w))
display(Eq(binomial(n, w), nCw))

display("Valid for n=1024, w=2?", Eq(binomial(n, w), nCw).subs(n, 1024).subs(w, 2))









    





$${\binom{n}{w}} = \frac{n!}{w! \left(n - w\right)!}$$






    





'Valid for n=1024, w=2?'






    





$$\mathrm{True}$$



In [3]:

    
# Probability of two SDRs being the same
probMatch = 1 / binomial(n, w)
display("Probability of match", probMatch)

def displayProbs(nVal, wVal):
    subbed = probMatch.subs(n, nVal).subs(w, wVal)
    computedProb = subbed.evalf()
    display("Prob with n=%i and w=%i is:" % (nVal, wVal), computedProb)
    
displayProbs(1024, 2)
displayProbs(1024, 5)
displayProbs(1024, 20)









    





'Probability of match'






    





$$\frac{1}{{\binom{n}{w}}}$$






    





'Prob with n=1024 and w=2 is:'






    





$$1.90921309872923 \cdot 10^{-6}$$






    





'Prob with n=1024 and w=5 is:'






    





$$1.07628886213801 \cdot 10^{-13}$$






    





'Prob with n=1024 and w=20 is:'






    





$$1.82483624417666 \cdot 10^{-42}$$

Overlap Sets



In [4]:

    
wx = Symbol("w_x")
omega = Symbol("Omega_x")
b = Symbol("b")

omegaF = binomial(wx, b) * binomial(n - wx, w - b)
omegaEquality = Eq(Abs(omega), omegaF)
display(omegaEquality)









    





$$\left\lvert{\Omega_{x}}\right\rvert = {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$

Inexact Matching



In [5]:

    
theta = Symbol("theta")
fpF = Sum(omegaF, (b, theta, w)) / binomial(n, w)
fp = Symbol("fp_w^n")
display(Eq(fp, fpF))

display("n=1024, w=4, b=2", fpF.subs(n, 1024).subs(wx, w).subs(w, 4).subs(theta, 2).evalf())









    





$$fp^{n}_{w} = \frac{1}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$






    





'n=1024, w=4, b=2'






    





$$6.85523984236413 \cdot 10^{-5}$$



In [6]:

    
approxFpF = omegaF.subs(b, theta) / binomial(n, w)
display("Exact:", fpF.subs(wx, w).subs(w, 7).subs(theta, 4).subs(n, 1024).evalf())
display("Approx:", approxFpF.subs(wx, w).subs(w, 7).subs(theta, 4).subs(n, 1024).evalf())









    





'Exact:'






    





$$2.67069117512708 \cdot 10^{-8}$$






    





'Approx:'






    





$$2.66596026115235 \cdot 10^{-8}$$



In [7]:

    
def plotLines(nVal, wVals, maxTheta, extra):
    thetas = xrange(1, maxTheta+1)
    
    tmp1 = fpF.subs(wx, w).subs(n, nVal)
    for wVal in wVals:
        tmp2 = tmp1.subs(w, wVal)
        errors = [tmp2.subs(theta, i).evalf() for i in thetas]
        label = "n=%i, w=%i" % (nVal, wVal)
        plt.plot(thetas, errors, extra, label=label)

fig = plt.gcf()
fig.set_size_inches(16, 8)

plotLines(1000, (20, 40, 60, 80, 100), 20, '')
plotLines(5000, (20, 40, 60, 80, 100), 20, '')
plotLines(10000, (20, 40, 60, 80, 100), 20, '')

plt.title("Calculated False Match Curves")
plt.xlabel("Theta")
plt.ylabel("False positive rate")
plt.legend()
plt.show()

Subsampling



In [8]:

    
wxp = Symbol("w_x'")
wy = Symbol("w_y")
oxp = Symbol("Omega_x'")

subsampledOmega = binomial(wxp, b) * binomial(n - wxp, wy - b)
display(Eq(oxp, subsampledOmega))









    





$$\Omega_{x'} = {\binom{w_{x'}}{b}} {\binom{n - w_{x'}}{- b + w_{y}}}$$



In [9]:

    
subsampledFpF = Sum(subsampledOmega, (b, theta, w)) / binomial(n, wy)
display(subsampledFpF)

display("n=1024, w=8, w_x'=4, omega=2", subsampledFpF.subs(wy, w).subs(n, 1024).subs(w, 8).subs(wxp, 4).subs(theta, 2).evalf())









    





$$\frac{1}{{\binom{n}{w_{y}}}} \sum_{b=\theta}^{w} {\binom{w_{x'}}{b}} {\binom{n - w_{x'}}{- b + w_{y}}}$$






    





"n=1024, w=8, w_x'=4, omega=2"






    





$$0.000318241665415146$$

Classifying a Set of Vectors



In [10]:

    
M = Symbol("M")
fpX = Symbol("fp_X")
fpXF = M * fpF
display(Eq(fpX, M * fp))
display(Eq(fpX, fpXF))









    





$$fp_{X} = M fp^{n}_{w}$$






    





$$fp_{X} = \frac{M}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$



In [11]:

    
display("n=64, w=3, theta=2, M=10", fpXF.subs(wx, w).subs(n, 64).subs(w, 3).subs(theta, 2).subs(M, 10).evalf())
display("n=64, w=12, theta=8, M=10", fpXF.subs(wx, w).subs(n, 64).subs(w, 12).subs(theta, 8).subs(M, 10).evalf())
display("n=1024, w=21, theta=14, M=10", fpXF.subs(wx, w).subs(n, 1024).subs(w, 21).subs(theta, 14).subs(M, 10).evalf())









    





'n=64, w=3, theta=2, M=10'






    





$$0.0441628264208909$$






    





'n=64, w=12, theta=8, M=10'






    





$$0.000423111850363184$$






    





'n=1024, w=21, theta=14, M=10'






    





$$8.8349018063014 \cdot 10^{-21}$$

Union Property



In [12]:

    
s = Symbol("s")
p0 = Symbol("p_0")
p0F = (1 - s) ** M
display(Eq(p0, p0F))









    





$$p_{0} = \left(- s + 1\right)^{M}$$



In [13]:

    
mVals = list(xrange(160))
p0F02 = p0F.subs(s, 0.02)
percentOnBits = [1 - p0F02.subs(M, mVal).evalf() for mVal in mVals]
plt.plot(mVals, percentOnBits)
plt.xlabel("M")
plt.ylabel("Percentage of ON bits")
plt.show()



In [14]:

    
pFp = Symbol("p_fp")
pFpF = (1 - (1 - s) ** M) ** w
display(Eq(pFp, pFpF))









    





$$p_{fp} = \left(- \left(- s + 1\right)^{M} + 1\right)^{w}$$



In [15]:

    
display("n=1024, w=2, M=20, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 2).subs(M, 20).evalf())
display("n=1024, w=20, M=20, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).evalf())
display("n=1024, w=20, M=40, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 40).evalf())









    





'n=1024, w=2, M=20, s=0.02'






    





$$0.00147042577155698$$






    





'n=1024, w=20, M=20, s=0.02'






    





$$1.83536456233615 \cdot 10^{-10}$$






    





'n=1024, w=20, M=40, s=0.02'






    





$$5.4821893505826 \cdot 10^{-6}$$

Inexact Matches with Unions



In [16]:

    
epsilon = Symbol("epsilon")
unionInexactF = Sum(omegaF, (b, theta, w)) / binomial(n, w)
display(Eq(epsilon, unionInexactF))









    





$$\epsilon = \frac{1}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$



In [17]:

    
display("n=1024, w=20, M=20, theta=20", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 20).evalf())
display("n=1024, w=20, M=20, theta=19", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 19).evalf())
display("n=1024, w=20, M=20, theta=18", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 18).evalf())
display("n=2048, w=20, M=20, theta=18", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 2048).subs(w, 20).subs(M, 20).subs(theta, 18).evalf())









    





'n=1024, w=20, M=20, theta=20'






    





$$1.23782572305677 \cdot 10^{-10}$$






    





'n=1024, w=20, M=20, theta=19'






    





$$5.55166122237973 \cdot 10^{-9}$$






    





'n=1024, w=20, M=20, theta=18'






    





$$1.18086573478834 \cdot 10^{-7}$$






    





'n=2048, w=20, M=20, theta=18'






    





$$3.08795140545437 \cdot 10^{-12}$$



In [24]:

    
def plotUnionFalsePositives(nVal):
    thetas = list(xrange(1, 81))
    for wVal in (20, 40, 60, 80, 100):
        tmpEq = unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, nVal).subs(w, wVal).subs(M, 20)
        probs = []

        for t in thetas:
            probs.append(tmpEq.subs(theta, t).evalf())
        label = "n=%i, w=%i, M=20" % (nVal, wVal)
        plt.plot(thetas, probs, label=label)

fig = plt.gcf()
fig.set_size_inches(16, 8)
plotUnionFalsePositives(1000)
plotUnionFalsePositives(5000)
plotUnionFalsePositives(10000)
plt.xlabel("Theta")
plt.ylabel("False match probability")
plt.legend()
plt.show()



In [18]: