In [3]:
import os, math, glob
import ujson as json
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
In [2]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../data/test_stg1/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
In [5]:
#write Annotations
if not os.path.exists('./Annotations'):
os.mkdir('./Annotations')
files = glob.glob('./Annotations/*')
for f in files:
os.remove(f)
crop_classes=FISH_CLASSES[:]
crop_classes.remove('NoF')
crop_classes
for c in crop_classes:
j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r'))
for l in j:
filename = l["filename"]
head, tail = os.path.split(filename)
basename, file_extension = os.path.splitext(tail)
if len(l["annotations"]) == 0:
print(filename)
print("no bbox")
else:
f = open('Annotations/' + basename + '.xml','w')
line = "<annotation>" + '\n'
f.write(line)
line = '\t<folder>' + c + '</folder>' + '\n'
f.write(line)
line = '\t<filename>' + tail + '</filename>' + '\n'
f.write(line)
line = '\t<source>\n\t\t<database>Source</database>\n\t</source>\n'
f.write(line)
im=Image.open(TRAIN_DIR+ c + '/' + tail)
(width, height) = im.size
line = '\t<size>\n\t\t<width>'+ str(width) + '</width>\n\t\t<height>' + \
str(height) + '</height>\n\t\t<depth>3</depth>\n\t</size>'
f.write(line)
line = '\n\t<segmented>0</segmented>'
f.write(line)
for a in l["annotations"]:
line = '\n\t<object>'
line += '\n\t\t<name>' + a["class"].lower() + '</name>\n\t\t<pose>Unspecified</pose>'
#line += '\n\t\t<name>fish</name>\n\t\t<pose>Unspecified</pose>'
line += '\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>'
xmin = (a["x"])
line += '\n\t\t<bndbox>\n\t\t\t<xmin>' + str(xmin) + '</xmin>'
ymin = (a["y"])
line += '\n\t\t\t<ymin>' + str(ymin) + '</ymin>'
width = (a["width"])
height = (a["height"])
xmax = xmin + width
ymax = ymin + height
line += '\n\t\t\t<xmax>' + str(xmax) + '</xmax>'
line += '\n\t\t\t<ymax>' + str(ymax) + '</ymax>'
line += '\n\t\t</bndbox>'
line += '\n\t</object>'
f.write(line)
line = '</annotation>'
f.write(line)
f.close()
In [3]:
#write ImageSets/Main
if not os.path.exists('./ImageSets/Main'):
os.mkdir('./ImageSets/Main')
files = glob.glob('./ImageSets/Main/*')
for f in files:
os.remove(f)
crop_classes=FISH_CLASSES[:]
crop_classes.remove('NoF')
crop_classes
imgs = []
for fish in crop_classes:
fish_dir = TRAIN_DIR+'{}'.format(fish)
imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
imgs.extend(imgs_fish)
index = np.random.permutation(len(imgs))
imgs = [imgs[i] for i in index]
num_train = math.ceil(len(imgs)*0.8)
with open('./ImageSets/Main/train.txt','w') as f:
train = sorted(imgs[:num_train])
for im in train:
f.write(im + '\n')
with open('./ImageSets/Main/val.txt','w') as f:
val = sorted(imgs[num_train:])
for im in val:
f.write(im + '\n')
with open('./ImageSets/Main/trainval.txt','w') as f:
trainval = sorted(imgs)
for im in trainval:
f.write(im + '\n')
#del img_00568 and img_07439
for fish in crop_classes:
fish_dir = TRAIN_DIR+'{}'.format(fish)
imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
train_fish = [im+' 1' if im in imgs_fish else im+' -1' for im in train]
val_fish = [im+' 1' if im in imgs_fish else im+' -1' for im in val]
trainval_fish = [im+' 1' if im in imgs_fish else im+' -1' for im in trainval]
with open('./ImageSets/Main/' + fish + '_train.txt','w') as f:
for im in train_fish:
f.write(im + '\n')
with open('./ImageSets/Main/' + fish + '_val.txt','w') as f:
for im in val_fish:
f.write(im + '\n')
with open('./ImageSets/Main/' + fish + '_trainval.txt','w') as f:
for im in trainval_fish:
f.write(im + '\n')
In [5]:
with open('./ImageSets/Main/train_test.txt','w') as f:
for fish in FISH_CLASSES:
fish_dir = TRAIN_DIR+'{}'.format(fish)
imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
for im in sorted(imgs_fish):
f.write(im + ' {}\n'.format(fish))
#manually change from ALB to NoF of img_00568 and img_07439
In [6]:
fish_dir = TEST_DIR
imgs_test = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
with open("./ImageSets/Main/train_test.txt","r") as f:
ims = f.readlines()
imgs_train = [im[:9] for im in ims]
with open('./ImageSets/Main/test.txt','w') as f:
for im in sorted(imgs_test):
f.write(im + '\n')
for im in sorted(imgs_train):
f.write(im + '\n')