In [33]:
#Extract files from sample_images.tar
import os
import tarfile

tmp_path = '../../tmp/'

print('Extracting sample_images files...')
tar = tarfile.open("../../input/sample_images.tar")
tar.extractall(path=tmp_path)
tar.close()
print('done.')


Extracting sample_images files...
done.

In [64]:
print('Reading stage1_labels...')
import csv
import numpy as np
with open('../../input/stage1_labels.csv', 'r') as f:
  reader = csv.reader(f, delimiter=',', quotechar='\'')
  stage1_labels = list(reader)

print('Creating sample_labels csv...')
sample_labels = ('id', 'cancer')
dirs = os.listdir(tmp_path + 'sample_images')
for d in dirs:
  for s1_label in stage1_labels:
    if(s1_label[0] == d):
      sample_labels = np.append(sample_labels, s1_label)

sample_labels = np.reshape(sample_labels, (-1,2))
print(sample_labels)
print('done.')


Reading stage1_labels...
Creating sample_labels csv...
[['id' 'cancer']
 ['0de72529c30fe642bc60dcb75c87f6bd' '0']
 ['0ddeb08e9c97227853422bd71a2a695e' '0']
 ['0d19f1c627df49eb223771c28548350e' '0']
 ['0c0de3749d4fe175b7a5098b060982a1' '1']
 ['0ca943d821204ceb089510f836a367fd' '0']
 ['0d06d764d3c07572074d468b4cff954f' '1']
 ['0d941a3ad6c889ac451caf89c46cb92a' '0']
 ['0c59313f52304e25d5a7dcf9877633b1' '0']
 ['0c37613214faddf8701ca41e6d43f56e' '1']
 ['0a0c32c9e08cc2ea76a71649de56be6d' '0']
 ['0a38e7597ca26f9374f8ea2770ba870d' '0']
 ['0bd0e3056cbf23a1cb7f0f0b18446068' '0']
 ['0c60f4b87afcb3e2dfa65abbbf3ef2f9' '1']
 ['00cba091fa4ad62cc3200a657aeb957e' '0']
 ['0a099f2549429d29b32f349e95fb2244' '0']
 ['0c9d8314f9c69840e25febabb1229fa4' '0']
 ['0acbebb8d463b4b9ca88cf38431aac69' '1']
 ['0d2fcf787026fece4e57be167d079383' '0']
 ['0c98fcb55e3f36d0c2b6507f62f4c5f1' '0']]
done.

In [70]:
print('Writing sample_labels.csv')
import csv
myfile = open('sample_labels.csv', 'w')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerows(sample_labels)
print('done.')


Writing sample_labels.csv
done.

In [ ]: