In [171]:
from os import listdir
from os.path import isfile, join
mypath = 'TEMP'
onlyfiles = [join(mypath,f) for f in listdir(mypath) if isfile(join(mypath, f))]
allf = []
for f in onlyfiles:
with open(f,'rb') as infile:
for ii in range(4):
infile.readline()
l = infile.readline()
allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))
allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))
import random
trials = 10000
support = 900
#support = len(allf)
bad = 0
for trial in range(trials):
uniq = set()
random.shuffle(allf)
for s in range(support):
uniq.add(allf[s])
if len(uniq) != support:
bad += 1.0
print bad/trials
print len(allf)
In [194]:
from os import listdir
from os.path import isfile, join
mypath = 'TEMP'
onlyfiles = [join(mypath,f) for f in listdir(mypath) if isfile(join(mypath, f))]
allf = []
for f in onlyfiles:
with open(f,'rb') as infile:
for ii in range(4):
infile.readline()
l = infile.readline()
allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))
import random
trials = 1
support = len(allf)
bad = 0
for trial in range(trials):
uniq = set()
random.shuffle(allf)
for s in range(support):
uniq.add(allf[s])
if len(uniq) != support:
bad += 1.0
print bad/trials
print len(allf)
In [206]:
from os import listdir
from os.path import isfile, join
allf = [0] + list(range(12999))
import random
trials = 10000
support = 9500
#support = len(allf)
bad = 0
for trial in range(trials):
uniq = set()
random.shuffle(allf)
for s in range(support):
uniq.add(allf[s])
if len(uniq) != support:
bad += 1.0
print bad/trials
print len(allf)
In [ ]:
In [ ]: