In [1]:

from sympy import *
from sympy.plotting import plot
init_printing()

from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
init_notebook_mode(connected=True)




requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window.Plotly) {{require(['plotly'],function(plotly) {window.Plotly=plotly;});}}



# Compute False Match Probability

Give a grid cell representation (one cell active per module), calculate the probability that a random other representation will have at least theta overlap (false match).



In [2]:

# n = number of cells per module
# m = number of modules
# theta = number of matching modules needed to call two representations equal
# t = exact match threshold used as an intermediate variable
# U = number of representations in union
# s = number of subsampled bits, or the number of synapses that a segment receives from a subset of the active bits
# omega = threshold for subsampling cells that results in overall match
# o = intermediate variable for omega that represents exact number of subsampling cells that match
# phi = total number of subsampling cells
n, m, theta, t, U, E, s, omega, o, phi = symbols('n, m, theta, t, U, E, s, omega, o, phi')

collision = Sum(binomial(m, t)*((n-1) ** (m-t)), (t, theta, m)) / (n ** m)
collision




Out[2]:

$$n^{- m} \sum_{t=\theta}^{m} \left(n - 1\right)^{m - t} {\binom{m}{t}}$$



# Plot False Match Probabilities



In [3]:

x=range(5, 30)
plots = [
Scatter(x=x, y=[float(collision.subs(theta, m/2).subs(m, numModules).subs(n, numCells).evalf()) for numCells in x], name="Modules: {}".format(numModules))
for numModules in (20, 40, 60, 80, 100)
]

layout = Layout(
title= 'Error Probability (threshold=50% of modules)',
xaxis= dict(
title= '# of Cells Per Module',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=plots, layout=layout)
iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("b0a133fe-25da-4219-8c85-faecbe8a25ed", [{"y": [0.0025948274006740175, 0.0005985039366041009, 0.00016416824067363322, 5.18349726471249e-05, 1.8359621843660388e-05, 7.15090402108378e-06, 3.0158789673608126e-06, 1.3607321253522466e-06, 6.505302284200064e-07, 3.2699534541741657e-07, 1.717345283771905e-07, 9.374695307689745e-08, 5.296106079810804e-08, 3.0851051529519175e-08, 1.847350762224536e-08, 1.1340718060930294e-08, 7.121108233467906e-09, 4.564627545592271e-09, 2.9816573132291816e-09, 1.9817102190779227e-09, 1.3383368025328808e-09, 9.173017577485468e-10, 6.374052796175021e-10, 4.4860012077276774e-10, 3.194987392081684e-10], "x": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], "type": "scatter", "name": "Modules: 20"}, {"y": [2.1691893126332917e-05, 1.2089974874808825e-06, 9.379931452865704e-08, 9.55677274884794e-09, 1.2185166309687686e-09, 1.8718579354173986e-10, 3.362936811005744e-11, 6.902002064682187e-12, 1.5882013692107826e-12, 4.035860482233449e-13, 1.1186433057538359e-13, 3.34755459580669e-14, 1.0723375313370181e-14, 3.6506662757920594e-15, 1.3127628586694559e-15, 4.960102241653535e-16, 1.9602589882262421e-16, 8.071250605937334e-17, 3.450435160875041e-17, 1.526843041011106e-17, 6.974886822164969e-18, 3.2814607784099266e-18, 1.586577234064697e-18, 7.86850143062357e-19, 3.9959112753925796e-19], "x": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], "type": "scatter", "name": "Modules: 40"}, {"y": [2.061216887210954e-07, 2.7847710798625953e-09, 6.122331733956415e-11, 2.0152415077634873e-12, 9.257429640104364e-14, 5.612335869789724e-15, 4.2972144999494897e-16, 4.0133078156651625e-17, 4.4463025386609724e-18, 5.713386132865956e-19, 8.359466820392284e-20, 1.3716006358382463e-20, 2.4917340323050275e-21, 4.958232223561453e-22, 1.070843344585057e-22, 2.4905071921150958e-23, 6.195350760378986e-24, 1.638695512436018e-24, 4.585047341008378e-25, 1.3509251983032364e-25, 4.174628376925292e-26, 1.348202417257475e-26, 4.535881129140811e-27, 1.5852574265609915e-27, 5.740587520969946e-28], "x": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], "type": "scatter", "name": "Modules: 60"}, {"y": [2.0682110254832403e-09, 6.779916425685248e-12, 4.226166664058823e-14, 4.495827618986563e-16, 7.442558382008985e-18, 1.781002663536882e-19, 5.81251561608107e-21, 2.470498313184775e-22, 1.3179048987585307e-23, 8.56392622988142e-25, 6.614739217126373e-26, 5.9510871100909796e-27, 6.131388783352107e-28, 7.131559572920061e-29, 9.250867508927386e-30, 1.3243846111098818e-30, 2.0737596790954958e-31, 3.5237537438167303e-32, 6.453159853833513e-33, 1.266003213714848e-33, 2.6465024430752076e-34, 5.867098797206469e-35, 1.373562952426905e-35, 3.382981150178912e-36, 8.735625098190398e-37], "x": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], "type": "scatter", "name": "Modules: 80"}, {"y": [2.139250662619578e-11, 1.7023294367034588e-14, 3.009305631436628e-17, 1.0347815236449967e-19, 6.173849654538396e-22, 5.8320387857343456e-24, 8.11336566415714e-26, 1.5694443196251826e-27, 4.031475086602668e-29, 1.3248291234337941e-30, 5.402131090704992e-32, 2.664969085250927e-33, 1.5572235729464916e-34, 1.0587264908390598e-35, 8.248717428782942e-37, 7.269307456113735e-38, 7.164866505047195e-39, 7.821212149795715e-40, 9.374884376052286e-41, 1.2246346019834913e-41, 1.7318010576676173e-42, 2.635516080628719e-43, 4.2935048360275396e-44, 7.452091090097457e-45, 1.3721846587181514e-45], "x": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], "type": "scatter", "name": "Modules: 100"}], {"title": "Error Probability (threshold=50% of modules)", "xaxis": {"title": "# of Cells Per Module"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});



# Compute False Match with Unions

Given a union of grid cell representations, compute the probability that a random new representation will have at least theta overlapping bits with the union.



In [4]:

expectedUnionSize = U - (U + (n*(((n-1)/n) ** U)) - n)

numUnionMatches = Sum(binomial(m, t)*(E ** t)*((n-E) ** (m-t)), (t, theta, m))
numUnionMatches = numUnionMatches.subs(E, expectedUnionSize)
numUnionTotal = n ** m
unionProb = numUnionMatches / numUnionTotal
unionProb




Out[4]:

$$n^{- m} \sum_{t=\theta}^{m} \left(n \left(\frac{1}{n} \left(n - 1\right)\right)^{U}\right)^{m - t} \left(- n \left(\frac{1}{n} \left(n - 1\right)\right)^{U} + n\right)^{t} {\binom{m}{t}}$$




In [5]:

# Debugging, ignore
print expectedUnionSize.subs(U, 60).subs(n, 20).evalf()




19.0786040202610




In [6]:

print collision.subs(m, 80).subs(n, 20).subs(theta, 20).evalf()
# Should be identical when U is 1
print unionProb.subs(m, 80).subs(n, 20).subs(theta, 20).subs(U, 1).evalf()




1.82479860292780e-9
1.82479860292780e-9



# Plot the False Match Probability for Unions



In [7]:

x = range(51)
unionPlots = [
Scatter(x=x, y=[float(unionProb.subs(n, 20).subs(theta, threshold).subs(m, 80).subs(U, i).evalf()) for i in x], name="Threshold={}".format(threshold))
for threshold in (20, 40, 60, 80)]

layout = Layout(
title= 'Error Probability with Unions (80 modules, 20 cells per module)',
xaxis= dict(
title= '# of Representations in Union',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=unionPlots, layout=layout)

iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("927260a9-4456-47f6-b6d1-f7cce7bc7602", [{"y": [0.0, 1.8247986029278026e-09, 6.469787257599686e-05, 0.007679934235088286, 0.09322151799805686, 0.34625644017296614, 0.6598355536157001, 0.8715896841353494, 0.9633239517826396, 0.9917047052283867, 0.998454557276352, 0.9997551371434554, 0.999966152547535, 0.9999958339269625, 0.9999995358381054, 0.999999952557489, 0.9999999955021831, 0.9999999996008447, 0.999999999966587, 0.9999999999973446, 0.9999999999997985, 0.9999999999999853, 0.999999999999999, 0.9999999999999999, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], "type": "scatter", "name": "Threshold=20"}, {"y": [0.0, 1.3243846111098818e-30, 7.204359894123987e-20, 4.00167983722314e-14, 2.0317878767909814e-10, 7.944109708217283e-08, 6.187022804655979e-06, 0.00015955099826536406, 0.0018479297651353985, 0.01172210301587541, 0.04673451063883227, 0.12952983232896748, 0.26971614433272595, 0.4491678348090523, 0.6303911413560204, 0.779747511208504, 0.8829946786242259, 0.9442197656995985, 0.9759492194767924, 0.9905457860965938, 0.9965855610721114, 0.9988588572829297, 0.9996447298424744, 0.9998963461121735, 0.9999715041907862, 0.9999925822037213, 0.9999981635112228, 0.9999995658353766, 0.9999999016353296, 0.9999999785731161, 0.9999999954991141, 0.9999999990858405, 0.9999999998200335, 0.9999999999655829, 0.999999999993593, 0.9999999999988368, 0.9999999999997937, 0.9999999999999643, 0.9999999999999939, 0.999999999999999, 0.9999999999999998, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], "type": "scatter", "name": "Threshold=40"}, {"y": [0.0, 1.1185402382398384e-60, 1.0310351322458232e-43, 3.0750198892595395e-34, 7.9358565524268545e-28, 4.325030680383421e-23, 2.0657924498876027e-19, 1.8475924146951335e-16, 4.868474897293366e-14, 5.063480338933382e-12, 2.538198665752308e-10, 7.071889586778293e-09, 1.2168403053488907e-07, 1.400913818968222e-06, 1.1485838763363427e-05, 7.047280214327524e-05, 0.0003368300691824411, 0.0012961443293391082, 0.004127579331721338, 0.01113365104640918, 0.025949659383822397, 0.053172033977734984, 0.09724930724302452, 0.16091912100125047, 0.24385394208638983, 0.34218871120381295, 0.4492033728293886, 0.556882992502393, 0.6577186201453634, 0.7461116629458132, 0.8190266719273739, 0.8758928113132349, 0.9180015748200078, 0.9477211297135042, 0.9677833160764757, 0.9807778042883836, 0.9888770188352665, 0.9937475859355788, 0.996580431380712, 0.9981775621671911, 0.999052187997364, 0.9995182979751003, 0.9997604542280839, 0.9998832953745987, 0.9999442323202028, 0.9999738333391487, 0.9999879320443371, 0.9999945240331961, 0.9999975530412453, 0.9999989222830656, 0.9999995317839744], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], "type": "scatter", "name": "Threshold=60"}, {"y": [0.0, 8.271806125530276e-105, 1.3193780538690285e-81, 2.165970783076165e-68, 2.926459318798534e-59, 2.3032223151269357e-52, 7.040771514394954e-47, 2.300009112840315e-42, 1.4689938152598942e-38, 2.708382997168852e-35, 1.880764955250699e-32, 5.946928803954335e-30, 9.84960131567412e-28, 9.503934034384788e-26, 5.802903044795568e-24, 2.3938412071507636e-22, 7.033678061531675e-21, 1.5369310954168535e-19, 2.588416795809538e-18, 3.4619307354756764e-17, 3.771336541061379e-16, 3.4193004496355436e-15, 2.628459172599938e-14, 1.7408549848532738e-13, 1.0073936701451315e-12, 5.156296862579873e-12, 2.3597834315965246e-11, 9.74894329326739e-11, 3.666823582497922e-10, 1.2652320674112745e-09, 4.03233792844051e-09, 1.1942961207763372e-08, 3.3055057259533895e-08, 8.592300942123856e-08, 2.1071777412318934e-07, 4.895648048830927e-07, 1.0816242496912472e-06, 2.2803400169617004e-06, 4.602081615269192e-06, 8.91667339305309e-06, 1.6630552360426202e-05, 2.9932036205431964e-05, 5.210506528279363e-05, 8.791269476744188e-05, 0.00014404476309309112, 0.0002296160977229882, 0.0003566931805683165, 0.000540819409003046, 0.0008015030131467822, 0.001162628252253066, 0.001652750360366511], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], "type": "scatter", "name": "Threshold=80"}], {"title": "Error Probability with Unions (80 modules, 20 cells per module)", "xaxis": {"title": "# of Representations in Union"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});



# Compute False Match with Unions and Subsampling



In [8]:

numSubsamplingMatches = binomial(m, s) * numUnionMatches.subs(m, s)
numSubsamplingTotal = binomial(m, s) * numUnionTotal.subs(m, s)
probSubsampling = numSubsamplingMatches / numSubsamplingTotal
probSubsampling




Out[8]:

$$n^{- s} \sum_{t=\theta}^{s} \left(n \left(\frac{1}{n} \left(n - 1\right)\right)^{U}\right)^{s - t} \left(- n \left(\frac{1}{n} \left(n - 1\right)\right)^{U} + n\right)^{t} {\binom{s}{t}}$$




In [9]:

x = range(31)
subsamplingPlots = [
Scatter(x=x, y=[float(probSubsampling.subs(n, 20).subs(theta, threshold).subs(m, 80).subs(U, i).subs(s, sampling).evalf()) for i in x], name="Threshold={}, Sampling={}".format(threshold, sampling))
for sampling, threshold in ((20, 8), (20, 10), (20, 12), (20, 14))]

layout = Layout(
title= 'Error Probability with Unions and Sampling (80 modules, 20 cells per module)',
xaxis= dict(
title= '# of Representations in Union',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=subsamplingPlots, layout=layout)

iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("613b2225-c5e4-407c-9c0e-4980bbdf916b", [{"y": [0.0, 2.856885382818746e-06, 0.00034933829042682005, 0.004319889255199723, 0.021053387251264138, 0.06198617330188904, 0.13345901097970858, 0.23282636673978033, 0.350112050473545, 0.4725249723923775, 0.5886393965678055, 0.6906854742183532, 0.7749550807110472, 0.8410406131793051, 0.8906591553466691, 0.9265570760125847, 0.9517113704303223, 0.9688542952853207, 0.9802562764632995, 0.9876786376349591, 0.9924190623894324, 0.9953954905645589, 0.9972360020497993, 0.9983585481940814, 0.9990347303075494, 0.9994374624213483, 0.9996748733915217, 0.9998135186064011, 0.999893792178723, 0.9999399038130766, 0.9999662000146693], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=8, Sampling=20"}, {"y": [0.0, 1.1340718060930294e-08, 5.690598288873146e-06, 0.0001619066357082006, 0.0014290267273632043, 0.00666751929969381, 0.020859420401325837, 0.04969229922243941, 0.09728320418558796, 0.16445344330775283, 0.24837529613481674, 0.3434873181179816, 0.4430342286545002, 0.5405567790198483, 0.6309182690758436, 0.7107580001752644, 0.7784693663131694, 0.8338845293326499, 0.8778417477504095, 0.91176196543986, 0.9373039673920476, 0.9561213345884841, 0.969715326785307, 0.9793638844208885, 0.9861035370105452, 0.9907434809593251, 0.9938959511212281, 0.9960121164558217, 0.9974170312914695, 0.9983403373344378, 0.9989414955169171], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=10, Sampling=20"}, {"y": [0.0, 2.1081685071582624e-11, 4.3790390598284294e-08, 2.8965685165078294e-06, 4.6876773027940714e-05, 0.0003517213600988552, 0.0016267827604545945, 0.005400279979793398, 0.014091604510741883, 0.030655293368336427, 0.057883502725995774, 0.09764589272279063, 0.15035761838727807, 0.21483529325372466, 0.2885330431248301, 0.36803140165436415, 0.44961121861532566, 0.5297689672164801, 0.6055865404132119, 0.6749281113418276, 0.7364811714352547, 0.7896829275011829, 0.8345794259955004, 0.8716591490756669, 0.9016916080131975, 0.9255893263302326, 0.9443012977824154, 0.9587386238928541, 0.9697285948225335, 0.9779914263954705, 0.9841334779254945], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=12, Sampling=20"}, {"y": [0.0, 1.7762442397422792e-14, 1.5350671722255623e-10, 2.3745859616597298e-08, 7.094772146125953e-07, 8.629050568820554e-06, 5.9554635477536755e-05, 0.00027846791691614947, 0.0009806622594697961, 0.0027850436844575627, 0.006683544940430512, 0.014013442145843338, 0.026313474153924026, 0.045090220802441724, 0.07155450289551872, 0.10639560111180828, 0.1496451855938925, 0.2006537001659533, 0.2581721127924222, 0.3205104296568489, 0.3857347333363125, 0.4518657045063133, 0.5170500238511262, 0.5796875693510712, 0.6385085279345108, 0.6926033423798293, 0.7414140054492497, 0.7846977281317467, 0.8224741133323371, 0.8549655051930367, 0.8825379446877918], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=14, Sampling=20"}], {"title": "Error Probability with Unions and Sampling (80 modules, 20 cells per module)", "xaxis": {"title": "# of Representations in Union"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});




In [10]:

x = range(31)
subsamplingPlots = [
Scatter(x=x, y=[float(probSubsampling.subs(n, 20).subs(theta, threshold).subs(m, 80).subs(U, i).subs(s, sampling).evalf()) for i in x], name="Threshold={}, Sampling={}".format(threshold, sampling))
for sampling, threshold in ((20, 10), (30, 15), (40, 20), (50, 25))]

layout = Layout(
title= 'Error Probability with Unions and Sampling (80 modules, 20 cells per module)',
xaxis= dict(
title= '# of Representations in Union',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=subsamplingPlots, layout=layout)

iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("54b75a18-b1fe-4ebe-826a-2cb647b9d66d", [{"y": [0.0, 1.1340718060930294e-08, 5.690598288873146e-06, 0.0001619066357082006, 0.0014290267273632043, 0.00666751929969381, 0.020859420401325837, 0.04969229922243941, 0.09728320418558796, 0.16445344330775283, 0.24837529613481674, 0.3434873181179816, 0.4430342286545002, 0.5405567790198483, 0.6309182690758436, 0.7107580001752644, 0.7784693663131694, 0.8338845293326499, 0.8778417477504095, 0.91176196543986, 0.9373039673920476, 0.9561213345884841, 0.969715326785307, 0.9793638844208885, 0.9861035370105452, 0.9907434809593251, 0.9938959511212281, 0.9960121164558217, 0.9974170312914695, 0.9983403373344378, 0.9989414955169171], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=10, Sampling=20"}, {"y": [0.0, 2.306221717483969e-12, 2.530290016404743e-08, 3.7420271519078777e-06, 9.545632365685548e-05, 0.0009342593085285661, 0.005011805345900281, 0.017834413337174938, 0.04720414041006681, 0.10011137751711852, 0.1791016893893094, 0.28055248446465053, 0.3957536891653386, 0.5138324764643201, 0.624841379404814, 0.721781771324157, 0.8012179800427394, 0.862804683508823, 0.9082898557320943, 0.9404735939683008, 0.9623949743328095, 0.9768272115050822, 0.9860433865399375, 0.9917690912647428, 0.9952389110094042, 0.997294707919533, 0.9984879380100298, 0.9991676303847037, 0.9995481977046955, 0.999757943690658, 0.9998718759729924], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=15, Sampling=30"}, {"y": [0.0, 4.960102241653535e-16, 1.188695512535943e-10, 9.12591467258926e-08, 6.717228821456126e-06, 0.0001376294446640841, 0.0012628197619833476, 0.006692092616799575, 0.023859982855543284, 0.06321264741055839, 0.13329661056800896, 0.23519742750879774, 0.36067003850662976, 0.4951909887817563, 0.6234987198519079, 0.7342456159097559, 0.8219394523495299, 0.8863580261337904, 0.9306636568980939, 0.9594146908429572, 0.9771325856489613, 0.9875589262733036, 0.9934455809336822, 0.996647407633198, 0.99833110358728, 0.999189754326441, 0.9996155920031273, 0.9998214633554456, 0.9999186941864354, 0.9999636410376993, 0.9999840126559174], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=20, Sampling=40"}, {"y": [0.0, 1.1004890634145517e-19, 5.758147914059017e-13, 2.293532234142043e-09, 4.867507484171914e-07, 2.08575935350365e-05, 0.0003269328609668054, 0.0025760490051403013, 0.01234809203287212, 0.040768307597722486, 0.10103869435972658, 0.200145448568139, 0.3323988141018698, 0.4806720959886619, 0.6241457781514864, 0.7465519178919526, 0.8402254402475245, 0.9054339588386242, 0.9472073993781442, 0.9720737311701156, 0.9859409360473821, 0.9932364178106874, 0.9968789198294505, 0.9986137902651117, 0.9994055701901915, 0.9997532000545, 0.9999005345823392, 0.9999609979834978, 0.9999850891840665, 0.9999944314441253, 0.9999979650075759], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=25, Sampling=50"}], {"title": "Error Probability with Unions and Sampling (80 modules, 20 cells per module)", "xaxis": {"title": "# of Representations in Union"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});




In [11]:

x = range(31)
subsamplingPlots = [
Scatter(x=x, y=[float(probSubsampling.subs(n, 20).subs(theta, threshold).subs(m, 80).subs(U, i).subs(s, sampling).evalf()) for i in x], name="Threshold={}, Sampling={}".format(threshold, sampling))
for sampling, threshold in ((20, 10), (30, 20), (40, 30), (50, 40))]

layout = Layout(
title= 'Error Probability with Unions and Sampling (80 modules, 20 cells per module)',
xaxis= dict(
title= '# of Representations in Union',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=subsamplingPlots, layout=layout)

iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("84474022-4180-46e5-b752-1fd60a20be34", [{"y": [0.0, 1.1340718060930294e-08, 5.690598288873146e-06, 0.0001619066357082006, 0.0014290267273632043, 0.00666751929969381, 0.020859420401325837, 0.04969229922243941, 0.09728320418558796, 0.16445344330775283, 0.24837529613481674, 0.3434873181179816, 0.4430342286545002, 0.5405567790198483, 0.6309182690758436, 0.7107580001752644, 0.7784693663131694, 0.8338845293326499, 0.8778417477504095, 0.91176196543986, 0.9373039673920476, 0.9561213345884841, 0.969715326785307, 0.9793638844208885, 0.9861035370105452, 0.9907434809593251, 0.9938959511212281, 0.9960121164558217, 0.9974170312914695, 0.9983403373344378, 0.9989414955169171], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=10, Sampling=20"}, {"y": [0.0, 1.7595102128836152e-19, 6.840653560730264e-14, 8.487830318278435e-11, 1.0052870346676663e-08, 3.2971483421098523e-07, 4.813209756379219e-06, 4.0287383703537634e-05, 0.0002249307206236185, 0.0009236555700185448, 0.002982513726004849, 0.0079437847458851, 0.018081409533021318, 0.036136128791486345, 0.06476587014254645, 0.10587530561529195, 0.16005712021099247, 0.22633350039957545, 0.30226583563895987, 0.3843715301118323, 0.4687041482435751, 0.5514363122534268, 0.6293209959440963, 0.6999679364982069, 0.7619310303705491, 0.8146435824134832, 0.8582567218061354, 0.8934360185379152, 0.9211596870926316, 0.9425459493326972, 0.9587222798602598], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=20, Sampling=30"}, {"y": [0.0, 4.808153959583734e-31, 1.472874501239228e-22, 8.120404342546864e-18, 1.3177749954665879e-14, 3.1096729022574677e-12, 2.1739261896685108e-10, 6.5813260413071135e-09, 1.0823472333412066e-07, 1.1192584251278146e-06, 8.042784132551152e-06, 4.3129938183839156e-05, 0.00018195067523593542, 0.0006285732487846795, 0.001834685824544862, 0.004638384483800628, 0.010363566778406424, 0.020805298542861516, 0.038049768027804874, 0.06413627014169568, 0.10063606252893165, 0.14826485775697235, 0.20664188319830987, 0.274262440283747, 0.34868508679803756, 0.42687609151858974, 0.5056215732352908, 0.581916658824893, 0.6532639896150624, 0.7178480717364544, 0.7745848993140289], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=30, Sampling=40"}, {"y": [0.0, 5.666373935445186e-43, 1.3736690830521499e-31, 3.38139807974563e-25, 7.55826382378336e-21, 1.2907394318285605e-17, 4.348838229894576e-15, 4.795454022156705e-13, 2.3411341540969718e-11, 6.149167457106999e-10, 9.92694175088499e-09, 1.0831201700387774e-07, 8.567918006922113e-07, 5.182883983088683e-06, 2.4991103997496665e-05, 9.927071143638177e-05, 0.0003336028468517921, 0.0009693506072058996, 0.0024799445403707143, 0.005671894723455675, 0.011747968173479144, 0.022283030953211862, 0.0390793898801015, 0.063906186312117, 0.09817176142563891, 0.14260898269937147, 0.19705706968433004, 0.26039808984228696, 0.3306630070633164, 0.40527781024613285, 0.4813894973765426], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], "type": "scatter", "name": "Threshold=40, Sampling=50"}], {"title": "Error Probability with Unions and Sampling (80 modules, 20 cells per module)", "xaxis": {"title": "# of Representations in Union"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});



# False Match with Multiple Cells Subsampling



In [12]:

numUnionNotMatches = Sum(binomial(m, t)*(E ** t)*((n-E) ** (m-t)), (t, 0, theta-1))
numUnionNotMatches = numUnionNotMatches.subs(E, expectedUnionSize)

numSubsamplingNotMatches = binomial(m, s) * numUnionNotMatches.subs(m, s)

numSubsamplingMatchesMulti = Sum(binomial(phi, o) * (numSubsamplingMatches ** o) * (numSubsamplingNotMatches ** (phi - o)), (o, omega, phi))
numSubsamplingTotalMulti =  numSubsamplingTotal ** phi
probMatchMulti = numSubsamplingMatchesMulti / numSubsamplingTotalMulti
probMatchMulti




Out[12]:

$$\left(n^{s} {\binom{m}{s}}\right)^{- \phi} \sum_{o=\omega}^{\phi} \left(\left({\binom{m}{s}} \sum_{t=0}^{\theta - 1} \left(n \left(\frac{1}{n} \left(n - 1\right)\right)^{U}\right)^{s - t} \left(- n \left(\frac{1}{n} \left(n - 1\right)\right)^{U} + n\right)^{t} {\binom{s}{t}}\right)^{- o + \phi}\right) \left(\left({\binom{m}{s}} \sum_{t=\theta}^{s} \left(n \left(\frac{1}{n} \left(n - 1\right)\right)^{U}\right)^{s - t} \left(- n \left(\frac{1}{n} \left(n - 1\right)\right)^{U} + n\right)^{t} {\binom{s}{t}}\right)^{o}\right) {\binom{\phi}{o}}$$




In [13]:

probSubsampling.subs(n, 20).subs(theta, 20).subs(m, 80).subs(s, 20).subs(U, 20).evalf()




Out[13]:

$$0.000139355406433676$$




In [14]:

# Make sure we get the same results as before when phi, omega are 1
probMatchMulti.subs(phi, 1).subs(omega, 1).subs(n, 20).subs(theta, 20).subs(m, 80).subs(s, 20).subs(U, 20).evalf()




Out[14]:

$$0.000139355406433676$$




In [15]:

# Debugging, ignore
unionRange = range(11)
[[probMatchMulti.subs(phi, 20).subs(omega, 10).subs(n, 20).subs(theta, 10).subs(m, 80).subs(s, sampling).subs(U, i).evalf() for i in unionRange] for sampling in (20,)]




Out[15]:

$$\left [ \left [ 0, \quad 6.50135742842162 \cdot 10^{-75}, \quad 6.5789316322732 \cdot 10^{-48}, \quad 2.28352496898893 \cdot 10^{-33}, \quad 6.47674336402067 \cdot 10^{-24}, \quad 3.0188607969385 \cdot 10^{-17}, \quad 2.37977333397903 \cdot 10^{-12}, \quad 1.06929875918232 \cdot 10^{-8}, \quad 5.57726916218864 \cdot 10^{-6}, \quad 0.000535942963473399, \quad 0.0132313603910653\right ]\right ]$$




In [16]:

x = range(21)
plots = [
Scatter(x=x, y=[float(probMatchMulti.subs(phi, 20).subs(omega, 10).subs(n, 30).subs(theta, 20).subs(m, 80).subs(U, i).subs(s, 20).evalf()) for i in x], name="30 sampling cells, 20 must match"),
Scatter(x=x, y=[float(probMatchMulti.subs(phi, 1).subs(omega, 1).subs(n, 30).subs(theta, 20).subs(m, 80).subs(U, i).subs(s, 20).evalf()) for i in x], name="1 sampling cell"),
]

layout = Layout(
title= 'Error Probability with Multiple Subsampling Cells (80 modules, 20 cells per module)',
xaxis= dict(
title= '# of Representations in Union',
),
yaxis=dict(
title= 'Probability of False Match',
),
)
fig = Figure(data=plots, layout=layout)

iplot(fig)




require(["plotly"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {};window.PLOTLYENV.BASE_URL="https://plot.ly";Plotly.newPlot("49a68c2d-5898-4a59-879f-abfd46dba7a7", [{"y": [0.0, 6.955808343601177e-291, 3.877028483702514e-232, 2.2657922339810736e-198, 7.938735404484994e-175, 7.028281517045024e-157, 1.8050971564794976e-142, 1.6887388400960642e-130, 2.6058621692022616e-120, 1.7568873661984196e-111, 1.0064921314541648e-103, 7.873849729875255e-97, 1.1939892651773506e-90, 4.5790285072992664e-85, 5.461355656425397e-80, 2.3865010692530914e-75, 4.360489921575902e-71, 3.711503304984871e-67, 1.609443792198225e-63, 3.832393915457215e-60, 5.33915724145263e-57], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], "type": "scatter", "name": "30 sampling cells, 20 must match"}, {"y": [0.0, 2.8679719907924415e-30, 2.1487705545473463e-24, 5.1151945155099194e-21, 1.1569518306811282e-18, 7.211490899037366e-17, 1.990628433474109e-15, 3.1339812518065294e-14, 3.27291923895636e-13, 2.4992778305822976e-12, 1.4914928758823817e-11, 7.293884084026653e-11, 3.0271935350056793e-10, 1.0950084739042294e-09, 3.5242782710649645e-09, 1.0259266519996365e-08, 2.737119031699259e-08, 6.765426134802651e-08, 1.5631756741373722e-07, 3.401631725922353e-07, 7.015969510173282e-07], "x": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], "type": "scatter", "name": "1 sampling cell"}], {"title": "Error Probability with Multiple Subsampling Cells (80 modules, 20 cells per module)", "xaxis": {"title": "# of Representations in Union"}, "yaxis": {"title": "Probability of False Match"}}, {"linkText": "Export to plot.ly", "showLink": true})});



# Validate with Simulations

Below are two sets of simulations that empirically compute the probabilities calculated above. Several values are used to show that the results match.



In [17]:

import numpy as np




In [18]:

def check(m, n, t):
r1 = np.random.randint(n, size=m)
r2 = np.random.randint(n, size=m)
overlap = np.equal(r1, r2).nonzero()[0].size
return overlap >= t

numTrials = 1000000

def estimateProb(m, n, t, U):
collisions = 0
for _ in xrange(numTrials):
collisions += int(check(m, n, t))

prob = float(collisions) / float(numTrials)
#print "m={} n={} t={} U={} and prob is {}".format(m, n, t, U, prob)
return prob

for numCells in (1, 2, 3, 4, 5, 6):
simProb = estimateProb(20, numCells, 10, 1)
calcProb = float(collision.subs(m, 20).subs(n, numCells).subs(theta, 10).evalf())
print "With m={} n={} t={} U={}, calculated prob is:\n{}\nand simulated prob is:\n{}\n".format(20, numCells, 10, 1, calcProb, simProb)




With m=20 n=1 t=10 U=1, calculated prob is:
1.0
and simulated prob is:
1.0

With m=20 n=2 t=10 U=1, calculated prob is:
0.588098526001
and simulated prob is:
0.588963

With m=20 n=3 t=10 U=1, calculated prob is:
0.0918957744873
and simulated prob is:
0.092305

With m=20 n=4 t=10 U=1, calculated prob is:
0.0138644169438
and simulated prob is:
0.01392

With m=20 n=5 t=10 U=1, calculated prob is:
0.00259482740067
and simulated prob is:
0.002496

With m=20 n=6 t=10 U=1, calculated prob is:
0.000598503936604
and simulated prob is:
0.000589




In [8]:

def check(m, n, t, U):
r1 = np.random.randint(n, size=(m, U))
r2 = np.random.randint(n, size=(m, 1))
overlap = 0
for module in xrange(r1.shape[0]):
match = np.intersect1d(r1[module], r2[module])
if len(match) > 0:
overlap += 1
return overlap >= t

numTrials = 1000000

def estimateProb(m, n, t, U):
collisions = 0
for _ in xrange(numTrials):
collisions += int(check(m, n, t, U))

prob = float(collisions) / float(numTrials)
#print "m={} n={} t={} U={} and prob is {}".format(m, n, t, U, prob)
return prob

for unionSize in (1, 2, 3, 4, 5, 6):
simProb = estimateProb(20, 10, 10, unionSize)
calcProb = float(unionProb.subs(m, 20).subs(n, 10).subs(theta, 10).subs(U, unionSize).evalf())
print "With m={} n={} t={} U={}, calculated prob is:\n{}\nand simulated prob is:\n{}\n".format(20, 10, 10, unionSize, calcProb, simProb)




With m=20 n=10 t=10 U=1, calculated prob is:
7.15090402108e-06
and simulated prob is:
8e-06

With m=20 n=10 t=10 U=2, calculated prob is:
0.00173091693285
and simulated prob is:
0.00175

With m=20 n=10 t=10 U=3, calculated prob is:
0.0244030388572
and simulated prob is:
0.024449

With m=20 n=10 t=10 U=4, calculated prob is:
0.110198023549
and simulated prob is:
0.110114

With m=20 n=10 t=10 U=5, calculated prob is:
0.273263643935
and simulated prob is:
0.273117

With m=20 n=10 t=10 U=6, calculated prob is:
0.475182009823
and simulated prob is:
0.475516




In [ ]: