In [1]:
import numpy as np
In [7]:
np.random.seed(50)
In [8]:
indexlist = range(0,40428967)
In [9]:
np.random.shuffle(indexlist)
In [10]:
indexset = set(indexlist[:20214483])
In [11]:
%%time
i = 0
with open('train1.txt', 'wb') as outfile:
for line in open("train.txt"):
if i in indexset:
outfile.write(line)
i+=1
In [25]:
%%time
i = 0
with open('train1.fm', 'wb') as outfile:
for line in open("train.fm"):
if i in indexset:
outfile.write(line)
i+=1
In [ ]: