In [171]:
from os import listdir
from os.path import isfile, join

mypath = 'TEMP'

onlyfiles = [join(mypath,f) for f in listdir(mypath) if isfile(join(mypath, f))]

allf = []
for f in onlyfiles:
    with open(f,'rb') as infile:
        for ii in range(4):
            infile.readline()
            
        l = infile.readline()
        allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))

allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))

import random
trials = 10000
support = 900
#support = len(allf)
bad = 0
for trial in range(trials):
    uniq = set()
    random.shuffle(allf)
    for s in range(support):
        uniq.add(allf[s])
    if len(uniq) != support:
        bad += 1.0
print bad/trials
print len(allf)


---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-171-147b9c96255a> in <module>()
     24 for trial in range(trials):
     25     uniq = set()
---> 26     random.shuffle(allf)
     27     for s in range(support):
     28         uniq.add(allf[s])

/usr/lib/python2.7/random.pyc in shuffle(self, x, random)
    288         for i in reversed(xrange(1, len(x))):
    289             # pick an element in x[:i+1] with which to exchange x[i]
--> 290             j = _int(random() * (i+1))
    291             x[i], x[j] = x[j], x[i]
    292 

KeyboardInterrupt: 

In [194]:
from os import listdir
from os.path import isfile, join

mypath = 'TEMP'

onlyfiles = [join(mypath,f) for f in listdir(mypath) if isfile(join(mypath, f))]

allf = []
for f in onlyfiles:
    with open(f,'rb') as infile:
        for ii in range(4):
            infile.readline()
            
        l = infile.readline()
        allf.append(l[1: l[1:].find(';')].replace('v','').replace('*','-'))


import random
trials = 1
support = len(allf)
bad = 0
for trial in range(trials):
    uniq = set()
    random.shuffle(allf)
    for s in range(support):
        uniq.add(allf[s])
    if len(uniq) != support:
        bad += 1.0
print bad/trials
print len(allf)


0
12814

In [206]:
from os import listdir
from os.path import isfile, join



allf = [0] + list(range(12999))

import random
trials = 10000
support = 9500
#support = len(allf)
bad = 0
for trial in range(trials):
    uniq = set()
    random.shuffle(allf)
    for s in range(support):
        uniq.add(allf[s])
    if len(uniq) != support:
        bad += 1.0
print bad/trials
print len(allf)


0.5345
13000

In [ ]:


In [ ]: