In [ ]:
#This code filters all .jpg images such that only images with big enough galaxies will be used for traning
#It's recommended to run jpg_filters before this file
#p_radius is the minimum petrosian radius for which a galaxy will be considered valid
#sample indicates if you are configuring the training or validation set. Choose between 'train' and 'val'
#When a .jpg image has a big enough galaxy, the nth filter of said image will be copied to outdirn (e.g. outdir1)
#In each outdir, two folders must be created: /images and /labels. Inside both folders must be two folders called /train and /val
#A folder called /trainall must also be created
#Depending on sample, train.txt or val.txt will be created in each outdir. These file have all the paths to the different folders, which darknet will read
#Depending on sample, trainall.txt or valall.txt will be created in /trainall. Use this file for darknet if you want to use all filters for traning

In [ ]:
import numpy as np
from PIL import Image
from astropy.table import Table
import copy
import os.path

In [ ]:
#Lines from unique.data to be read
istart=0
iend=34000

#'train' for traning set, 'val' for validation set
sample='train'

#Minimum petrosian radius for a galaxy to be considered in the sample
p_radius = 20

#Path to the folder with all the images for each filter. The code is made assuming five filters, you can delete or add lines where corresponds to change the number of filters.
#All commands made for the five different filters are grouped together, so it's easy to check which lines to add/delete.
jpg_loc1 = 'filtro1'
jpg_loc2 = 'filtro2'
jpg_loc3 = 'filtro3'
jpg_loc4 = 'filtro4'
jpg_loc5 = 'filtro5'

#Path to the output folders
#These are the folders where /images and /labels are located

outdir1='/mnt/data3/sdss/yolo1'
outdir2='/mnt/data3/sdss/yolo2'
outdir3='/mnt/data3/sdss/yolo3'
outdir4='/mnt/data3/sdss/yolo4'
outdir5='/mnt/data3/sdss/yolo5'

#names of the input files used until now (created by 01_gettables.ipynb)

xid = Table.read('zoospecnewall2.data',format='ascii')
print(xid.colnames)
xuniq = Table.read('unique.data',format='ascii')
ngal=len(xid)
nuniq=len(xuniq)

In [ ]:
xidname = xid.group_by('imagename')

f1=open('%s/%s.txt'%(outdir1,sample),'w')
f2=open('%s/%s.txt'%(outdir2,sample),'w')
f3=open('%s/%s.txt'%(outdir3,sample),'w')
f4=open('%s/%s.txt'%(outdir4,sample),'w')
f5=open('%s/%s.txt'%(outdir5,sample),'w')


nfiles=0
nann=0
n1=0 #yolo classes 5
n2=0
n3=0
n4=0
n5=0
for k in range(iend-istart):
    i=k+istart  
    imagename=xuniq['imagename'][i]
    
    
    
    fjpg1 = '%s/%s.jpg'%(jpg_loc1,imagename) # higher contrast images
    ojpg1 = '%s/images/%s/%s.jpg'%(outdir1,sample,imagename) 
    olab1 = '%s/labels/%s/%s.txt'%(outdir1,sample,imagename)
    
    fjpg2 = '%s/%s.jpg'%(jpg_loc2,imagename) # higher contrast images
    ojpg2 = '%s/images/%s/%s.jpg'%(outdir2,sample,imagename) 
    olab2 = '%s/labels/%s/%s.txt'%(outdir2,sample,imagename)
    
    fjpg3 = '%s/%s.jpg'%(jpg_loc3,imagename) # higher contrast images
    ojpg3 = '%s/images/%s/%s.jpg'%(outdir3,sample,imagename) 
    olab3 = '%s/labels/%s/%s.txt'%(outdir3,sample,imagename)
    
    fjpg4 = '%s/%s.jpg'%(jpg_loc4,imagename) # higher contrast images
    ojpg4 = '%s/images/%s/%s.jpg'%(outdir4,sample,imagename) 
    olab4 = '%s/labels/%s/%s.txt'%(outdir4,sample,imagename)

    fjpg5 = '%s/%s.jpg'%(jpg_loc5,imagename) # higher contrast images
    ojpg5 = '%s/images/%s/%s.jpg'%(outdir5,sample,imagename) 
    olab5 = '%s/labels/%s/%s.txt'%(outdir5,sample,imagename)
    
    
    
    mask = xidname.groups.keys['imagename'] == imagename
    xidn = xidname.groups[mask]
    nthis=len(xidn)
    
    #Change the following 'if' if you are using a different amount of filters
    if not os.path.isfile(fjpg1) or not os.path.isfile(fjpg2) or not os.path.isfile(fjpg3) or not os.path.isfile(fjpg4) or not os.path.isfile(fjpg5):    
        print('i=%d file does not exist %s skip...'%(i,fjpg1))
    else:
        
        image1 = Image.open(fjpg1)
        image2 = Image.open(fjpg2)
        image3 = Image.open(fjpg3)
        image4 = Image.open(fjpg4)
        image5 = Image.open(fjpg5)
        
        

        w=image1.size[0]
        h=image1.size[1]
 

        maxprad=0
        for j in range(nthis):
            prad=xidn['petrorad_r'][j]/0.396127
            if prad > maxprad:
                maxprad=prad

                
                
        if (nthis>=1 and maxprad>p_radius):
            
            
            ff1=open(olab1,'w')
            ff2=open(olab2,'w')
            ff3=open(olab3,'w')
            ff4=open(olab4,'w')
            ff5=open(olab5,'w')

            for j in range(nthis):
                    prad=2.1*xidn['petrorad_r'][j]/0.396127
                    colc=xidn['colc'][j]
                    rowc=xidn['rowc'][j]
            #'p_el', 'p_cw', 'p_acw', 'p_edge', 'p_dk', 'p_mg', 'p_cs'
                    p_el =xidn['p_el'][j]
                    p_cw =xidn['p_cw'][j]
                    p_acw =xidn['p_acw'][j]
                    p_edge =xidn['p_edge'][j]
                    p_dk =xidn['p_dk'][j]
                    p_mg =xidn['p_mg'][j]
                    p_cs =xidn['p_cs'][j]
                    mylist=[p_el,p_cw+p_acw,p_edge,p_dk,p_mg]
                    mylist2=[p_cw+p_acw,p_edge,p_dk,p_mg]
                    if p_cs <= p_el:
                            gtype = np.argmax(mylist)
                    if p_cs > p_el:
                            gtype = np.argmax(mylist2)+1
                            prad=prad*1.1 # extended obj
                    if (gtype==0) and (prad>p_radius):
                            n1=n1+1
                    if gtype==1: 
                            n2=n2+1
                    elif gtype==2: 
                            n3=n3+1
                    elif gtype==3: 
                            n4=n4+1
                    elif gtype==4: 
                            n5=n5+1
                            prad=prad*1.2

                            
                            
                    if (prad > p_radius) or (gtype>0): 
        
        
                        ff1.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
                        ff2.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
                        ff3.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
                        ff4.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
                        ff5.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
            
            
            
                        nann=nann+1
            ff1.close()
            ff2.close()
            ff3.close()
            ff4.close()
            ff5.close()
                
                
            image1.save(ojpg1,'jpeg',quality=96)
            image2.save(ojpg2,'jpeg',quality=96)
            image3.save(ojpg3,'jpeg',quality=96)
            image4.save(ojpg4,'jpeg',quality=96)
            image5.save(ojpg5,'jpeg',quality=96)
            
            f1.write('%s/images/%s/%s.jpg\n'%(outdir1,sample,imagename))
            f2.write('%s/images/%s/%s.jpg\n'%(outdir2,sample,imagename))
            f3.write('%s/images/%s/%s.jpg\n'%(outdir3,sample,imagename))
            f4.write('%s/images/%s/%s.jpg\n'%(outdir4,sample,imagename))
            f5.write('%s/images/%s/%s.jpg\n'%(outdir5,sample,imagename))
                
            if i%50==0:
                    print('i=%d %s w=%d h=%d nthis=%d maxprad=%f nfiles=%d anntot=%d'%(i,fjpg1,w,h,nthis,maxprad,nfiles,nann))
            nfiles=nfiles+1

        if i%50==0:
                print('classes = %d %d %d %d %d'%(n1,n2,n3,n4,n5))


f1.close()
f2.close()
f3.close()
f4.close()
f5.close()


filenames = [f1,f2,f3,f4,f5]
with open('/%sall.txt'%(sample), 'w') as outfile:
    for fname in filenames:
        with open(fname) as infile:
            for line in infile:
                outfile.write(line)
outfile.close()
                
                
print('Final classes = %d %d %d %d %d tot=%d'%(n1,n2,n3,n4,n5,n1+n2+n3+n4+n5))
print('nfiles=%d nann=%d'%(nfiles,nann))