In [3]:
import os, math, glob
import ujson as json
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
TRAIN_DIR = '../data/train/'
TEST_DIR = '../data/test_stg1/'
FISH_CLASSES = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']

In [5]:
#write Annotations
if not os.path.exists('./Annotations'):
    os.mkdir('./Annotations')
files = glob.glob('./Annotations/*')
for f in files:
    os.remove(f)
    
crop_classes=FISH_CLASSES[:]
crop_classes.remove('NoF')
crop_classes


for c in crop_classes:
    j = json.load(open('../data/BBannotations/{}.json'.format(c), 'r'))
    for l in j: 
        filename = l["filename"]
        head, tail = os.path.split(filename)
        basename, file_extension = os.path.splitext(tail) 
        if len(l["annotations"]) == 0:
            print(filename)
            print("no bbox")
        else:
            f = open('Annotations/' + basename + '.xml','w') 
            line = "<annotation>" + '\n'
            f.write(line)
            line = '\t<folder>' + c + '</folder>' + '\n'
            f.write(line)
            line = '\t<filename>' + tail + '</filename>' + '\n'
            f.write(line)
            line = '\t<source>\n\t\t<database>Source</database>\n\t</source>\n'
            f.write(line)
            im=Image.open(TRAIN_DIR+ c + '/' + tail)
            (width, height) = im.size
            line = '\t<size>\n\t\t<width>'+ str(width) + '</width>\n\t\t<height>' + \
            str(height) + '</height>\n\t\t<depth>3</depth>\n\t</size>'
            f.write(line)
            line = '\n\t<segmented>0</segmented>'
            f.write(line)
            for a in l["annotations"]:
                line = '\n\t<object>'
                line += '\n\t\t<name>' + a["class"].lower() + '</name>\n\t\t<pose>Unspecified</pose>'
                #line += '\n\t\t<name>fish</name>\n\t\t<pose>Unspecified</pose>'
                line += '\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>'
                xmin = (a["x"])
                line += '\n\t\t<bndbox>\n\t\t\t<xmin>' + str(xmin) + '</xmin>'
                ymin = (a["y"])
                line += '\n\t\t\t<ymin>' + str(ymin) + '</ymin>'
                width = (a["width"])
                height = (a["height"])
                xmax = xmin + width
                ymax = ymin + height
                line += '\n\t\t\t<xmax>' + str(xmax) + '</xmax>'
                line += '\n\t\t\t<ymax>' + str(ymax) + '</ymax>'
                line += '\n\t\t</bndbox>'
                line += '\n\t</object>'     
                f.write(line)
            line = '</annotation>'
            f.write(line)
            f.close()


ALB/img_00568.jpg
no bbox
ALB/img_07439.jpg
no bbox

In [3]:
#write ImageSets/Main
if not os.path.exists('./ImageSets/Main'):
    os.mkdir('./ImageSets/Main')
files = glob.glob('./ImageSets/Main/*')
for f in files:
    os.remove(f)
    
crop_classes=FISH_CLASSES[:]
crop_classes.remove('NoF')
crop_classes

imgs = []
for fish in crop_classes:
    fish_dir = TRAIN_DIR+'{}'.format(fish)
    imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
    imgs.extend(imgs_fish)
index = np.random.permutation(len(imgs))
imgs = [imgs[i] for i in index]
num_train = math.ceil(len(imgs)*0.8)
with open('./ImageSets/Main/train.txt','w') as f:
    train = sorted(imgs[:num_train])
    for im in train:
        f.write(im + '\n')
with open('./ImageSets/Main/val.txt','w') as f:
    val = sorted(imgs[num_train:])
    for im in val:
        f.write(im + '\n')
with open('./ImageSets/Main/trainval.txt','w') as f:
    trainval = sorted(imgs)
    for im in trainval:
        f.write(im + '\n')
#del img_00568 and img_07439

for fish in crop_classes:
    fish_dir = TRAIN_DIR+'{}'.format(fish)
    imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
    train_fish = [im+'  1' if im in imgs_fish else im+' -1' for im in train]
    val_fish = [im+'  1' if im in imgs_fish else im+' -1' for im in val]
    trainval_fish = [im+'  1' if im in imgs_fish else im+' -1' for im in trainval]
    with open('./ImageSets/Main/' + fish + '_train.txt','w') as f:
        for im in train_fish:
            f.write(im + '\n')
    with open('./ImageSets/Main/' + fish + '_val.txt','w') as f:
        for im in val_fish:
            f.write(im + '\n')
    with open('./ImageSets/Main/' + fish + '_trainval.txt','w') as f:
        for im in trainval_fish:
            f.write(im + '\n')

In [5]:
with open('./ImageSets/Main/train_test.txt','w') as f:
    for fish in FISH_CLASSES:
        fish_dir = TRAIN_DIR+'{}'.format(fish)
        imgs_fish = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
        for im in sorted(imgs_fish):
            f.write(im + ' {}\n'.format(fish))
            
#manually change from ALB to NoF of img_00568 and img_07439

In [6]:
fish_dir = TEST_DIR
imgs_test = [os.path.splitext(im)[0] for im in os.listdir(fish_dir)]
with open("./ImageSets/Main/train_test.txt","r") as f:
    ims = f.readlines()
imgs_train = [im[:9] for im in ims]

with open('./ImageSets/Main/test.txt','w') as f:
    for im in sorted(imgs_test):
        f.write(im + '\n')
    for im in sorted(imgs_train):
        f.write(im + '\n')