In [1]:
from sympy import *
init_printing()
from IPython.display import display

%matplotlib inline

import matplotlib.pyplot as plt

Uniqueness and Exact Matches


In [2]:
#def overlap(x, y):
#    return (x & y).sum()

#def match(x, y, t):
#    return overlap(x, y) >= t

n = Symbol("n", positive=True)
w = Symbol("w", positive=True)

nCw = factorial(n) / (factorial(w) * factorial(n - w))
display(Eq(binomial(n, w), nCw))

display("Valid for n=1024, w=2?", Eq(binomial(n, w), nCw).subs(n, 1024).subs(w, 2))


$${\binom{n}{w}} = \frac{n!}{w! \left(n - w\right)!}$$
'Valid for n=1024, w=2?'
$$\mathrm{True}$$

In [3]:
# Probability of two SDRs being the same
probMatch = 1 / binomial(n, w)
display("Probability of match", probMatch)

def displayProbs(nVal, wVal):
    subbed = probMatch.subs(n, nVal).subs(w, wVal)
    computedProb = subbed.evalf()
    display("Prob with n=%i and w=%i is:" % (nVal, wVal), computedProb)
    
displayProbs(1024, 2)
displayProbs(1024, 5)
displayProbs(1024, 20)


'Probability of match'
$$\frac{1}{{\binom{n}{w}}}$$
'Prob with n=1024 and w=2 is:'
$$1.90921309872923 \cdot 10^{-6}$$
'Prob with n=1024 and w=5 is:'
$$1.07628886213801 \cdot 10^{-13}$$
'Prob with n=1024 and w=20 is:'
$$1.82483624417666 \cdot 10^{-42}$$

Overlap Sets


In [4]:
wx = Symbol("w_x")
omega = Symbol("Omega_x")
b = Symbol("b")

omegaF = binomial(wx, b) * binomial(n - wx, w - b)
omegaEquality = Eq(Abs(omega), omegaF)
display(omegaEquality)


$$\left\lvert{\Omega_{x}}\right\rvert = {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$

Inexact Matching


In [5]:
theta = Symbol("theta")
fpF = Sum(omegaF, (b, theta, w)) / binomial(n, w)
fp = Symbol("fp_w^n")
display(Eq(fp, fpF))

display("n=1024, w=4, b=2", fpF.subs(n, 1024).subs(wx, w).subs(w, 4).subs(theta, 2).evalf())


$$fp^{n}_{w} = \frac{1}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$
'n=1024, w=4, b=2'
$$6.85523984236413 \cdot 10^{-5}$$

In [6]:
approxFpF = omegaF.subs(b, theta) / binomial(n, w)
display("Exact:", fpF.subs(wx, w).subs(w, 7).subs(theta, 4).subs(n, 1024).evalf())
display("Approx:", approxFpF.subs(wx, w).subs(w, 7).subs(theta, 4).subs(n, 1024).evalf())


'Exact:'
$$2.67069117512708 \cdot 10^{-8}$$
'Approx:'
$$2.66596026115235 \cdot 10^{-8}$$

In [7]:
def plotLines(nVal, wVals, maxTheta, extra):
    thetas = xrange(1, maxTheta+1)
    
    tmp1 = fpF.subs(wx, w).subs(n, nVal)
    for wVal in wVals:
        tmp2 = tmp1.subs(w, wVal)
        errors = [tmp2.subs(theta, i).evalf() for i in thetas]
        label = "n=%i, w=%i" % (nVal, wVal)
        plt.plot(thetas, errors, extra, label=label)

fig = plt.gcf()
fig.set_size_inches(16, 8)

plotLines(1000, (20, 40, 60, 80, 100), 20, '')
plotLines(5000, (20, 40, 60, 80, 100), 20, '')
plotLines(10000, (20, 40, 60, 80, 100), 20, '')

plt.title("Calculated False Match Curves")
plt.xlabel("Theta")
plt.ylabel("False positive rate")
plt.legend()
plt.show()


Subsampling


In [8]:
wxp = Symbol("w_x'")
wy = Symbol("w_y")
oxp = Symbol("Omega_x'")

subsampledOmega = binomial(wxp, b) * binomial(n - wxp, wy - b)
display(Eq(oxp, subsampledOmega))


$$\Omega_{x'} = {\binom{w_{x'}}{b}} {\binom{n - w_{x'}}{- b + w_{y}}}$$

In [9]:
subsampledFpF = Sum(subsampledOmega, (b, theta, w)) / binomial(n, wy)
display(subsampledFpF)

display("n=1024, w=8, w_x'=4, omega=2", subsampledFpF.subs(wy, w).subs(n, 1024).subs(w, 8).subs(wxp, 4).subs(theta, 2).evalf())


$$\frac{1}{{\binom{n}{w_{y}}}} \sum_{b=\theta}^{w} {\binom{w_{x'}}{b}} {\binom{n - w_{x'}}{- b + w_{y}}}$$
"n=1024, w=8, w_x'=4, omega=2"
$$0.000318241665415146$$

Classifying a Set of Vectors


In [10]:
M = Symbol("M")
fpX = Symbol("fp_X")
fpXF = M * fpF
display(Eq(fpX, M * fp))
display(Eq(fpX, fpXF))


$$fp_{X} = M fp^{n}_{w}$$
$$fp_{X} = \frac{M}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$

In [11]:
display("n=64, w=3, theta=2, M=10", fpXF.subs(wx, w).subs(n, 64).subs(w, 3).subs(theta, 2).subs(M, 10).evalf())
display("n=64, w=12, theta=8, M=10", fpXF.subs(wx, w).subs(n, 64).subs(w, 12).subs(theta, 8).subs(M, 10).evalf())
display("n=1024, w=21, theta=14, M=10", fpXF.subs(wx, w).subs(n, 1024).subs(w, 21).subs(theta, 14).subs(M, 10).evalf())


'n=64, w=3, theta=2, M=10'
$$0.0441628264208909$$
'n=64, w=12, theta=8, M=10'
$$0.000423111850363184$$
'n=1024, w=21, theta=14, M=10'
$$8.8349018063014 \cdot 10^{-21}$$

Union Property


In [12]:
s = Symbol("s")
p0 = Symbol("p_0")
p0F = (1 - s) ** M
display(Eq(p0, p0F))


$$p_{0} = \left(- s + 1\right)^{M}$$

In [13]:
mVals = list(xrange(160))
p0F02 = p0F.subs(s, 0.02)
percentOnBits = [1 - p0F02.subs(M, mVal).evalf() for mVal in mVals]
plt.plot(mVals, percentOnBits)
plt.xlabel("M")
plt.ylabel("Percentage of ON bits")
plt.show()



In [14]:
pFp = Symbol("p_fp")
pFpF = (1 - (1 - s) ** M) ** w
display(Eq(pFp, pFpF))


$$p_{fp} = \left(- \left(- s + 1\right)^{M} + 1\right)^{w}$$

In [15]:
display("n=1024, w=2, M=20, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 2).subs(M, 20).evalf())
display("n=1024, w=20, M=20, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).evalf())
display("n=1024, w=20, M=40, s=0.02", pFpF.subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 40).evalf())


'n=1024, w=2, M=20, s=0.02'
$$0.00147042577155698$$
'n=1024, w=20, M=20, s=0.02'
$$1.83536456233615 \cdot 10^{-10}$$
'n=1024, w=20, M=40, s=0.02'
$$5.4821893505826 \cdot 10^{-6}$$

Inexact Matches with Unions


In [16]:
epsilon = Symbol("epsilon")
unionInexactF = Sum(omegaF, (b, theta, w)) / binomial(n, w)
display(Eq(epsilon, unionInexactF))


$$\epsilon = \frac{1}{{\binom{n}{w}}} \sum_{b=\theta}^{w} {\binom{w_{x}}{b}} {\binom{n - w_{x}}{- b + w}}$$

In [17]:
display("n=1024, w=20, M=20, theta=20", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 20).evalf())
display("n=1024, w=20, M=20, theta=19", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 19).evalf())
display("n=1024, w=20, M=20, theta=18", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 1024).subs(w, 20).subs(M, 20).subs(theta, 18).evalf())
display("n=2048, w=20, M=20, theta=18", unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, 2048).subs(w, 20).subs(M, 20).subs(theta, 18).evalf())


'n=1024, w=20, M=20, theta=20'
$$1.23782572305677 \cdot 10^{-10}$$
'n=1024, w=20, M=20, theta=19'
$$5.55166122237973 \cdot 10^{-9}$$
'n=1024, w=20, M=20, theta=18'
$$1.18086573478834 \cdot 10^{-7}$$
'n=2048, w=20, M=20, theta=18'
$$3.08795140545437 \cdot 10^{-12}$$

In [24]:
def plotUnionFalsePositives(nVal):
    thetas = list(xrange(1, 81))
    for wVal in (20, 40, 60, 80, 100):
        tmpEq = unionInexactF.subs(wx, n*(1-p0F)).subs(s, w / n).subs(n, nVal).subs(w, wVal).subs(M, 20)
        probs = []

        for t in thetas:
            probs.append(tmpEq.subs(theta, t).evalf())
        label = "n=%i, w=%i, M=20" % (nVal, wVal)
        plt.plot(thetas, probs, label=label)

fig = plt.gcf()
fig.set_size_inches(16, 8)
plotUnionFalsePositives(1000)
plotUnionFalsePositives(5000)
plotUnionFalsePositives(10000)
plt.xlabel("Theta")
plt.ylabel("False match probability")
plt.legend()
plt.show()



In [18]: