In [ ]:
#This code filters all .jpg images such that only images with big enough galaxies will be used for traning
#It's recommended to run jpg_filters before this file
#p_radius is the minimum petrosian radius for which a galaxy will be considered valid
#sample indicates if you are configuring the training or validation set. Choose between 'train' and 'val'
#When a .jpg image has a big enough galaxy, the nth filter of said image will be copied to outdirn (e.g. outdir1)
#In each outdir, two folders must be created: /images and /labels. Inside both folders must be two folders called /train and /val
#A folder called /trainall must also be created
#Depending on sample, train.txt or val.txt will be created in each outdir. These file have all the paths to the different folders, which darknet will read
#Depending on sample, trainall.txt or valall.txt will be created in /trainall. Use this file for darknet if you want to use all filters for traning
In [ ]:
import numpy as np
from PIL import Image
from astropy.table import Table
import copy
import os.path
In [ ]:
#Lines from unique.data to be read
istart=0
iend=34000
#'train' for traning set, 'val' for validation set
sample='train'
#Minimum petrosian radius for a galaxy to be considered in the sample
p_radius = 20
#Path to the folder with all the images for each filter. The code is made assuming five filters, you can delete or add lines where corresponds to change the number of filters.
#All commands made for the five different filters are grouped together, so it's easy to check which lines to add/delete.
jpg_loc1 = 'filtro1'
jpg_loc2 = 'filtro2'
jpg_loc3 = 'filtro3'
jpg_loc4 = 'filtro4'
jpg_loc5 = 'filtro5'
#Path to the output folders
#These are the folders where /images and /labels are located
outdir1='/mnt/data3/sdss/yolo1'
outdir2='/mnt/data3/sdss/yolo2'
outdir3='/mnt/data3/sdss/yolo3'
outdir4='/mnt/data3/sdss/yolo4'
outdir5='/mnt/data3/sdss/yolo5'
#names of the input files used until now (created by 01_gettables.ipynb)
xid = Table.read('zoospecnewall2.data',format='ascii')
print(xid.colnames)
xuniq = Table.read('unique.data',format='ascii')
ngal=len(xid)
nuniq=len(xuniq)
In [ ]:
xidname = xid.group_by('imagename')
f1=open('%s/%s.txt'%(outdir1,sample),'w')
f2=open('%s/%s.txt'%(outdir2,sample),'w')
f3=open('%s/%s.txt'%(outdir3,sample),'w')
f4=open('%s/%s.txt'%(outdir4,sample),'w')
f5=open('%s/%s.txt'%(outdir5,sample),'w')
nfiles=0
nann=0
n1=0 #yolo classes 5
n2=0
n3=0
n4=0
n5=0
for k in range(iend-istart):
i=k+istart
imagename=xuniq['imagename'][i]
fjpg1 = '%s/%s.jpg'%(jpg_loc1,imagename) # higher contrast images
ojpg1 = '%s/images/%s/%s.jpg'%(outdir1,sample,imagename)
olab1 = '%s/labels/%s/%s.txt'%(outdir1,sample,imagename)
fjpg2 = '%s/%s.jpg'%(jpg_loc2,imagename) # higher contrast images
ojpg2 = '%s/images/%s/%s.jpg'%(outdir2,sample,imagename)
olab2 = '%s/labels/%s/%s.txt'%(outdir2,sample,imagename)
fjpg3 = '%s/%s.jpg'%(jpg_loc3,imagename) # higher contrast images
ojpg3 = '%s/images/%s/%s.jpg'%(outdir3,sample,imagename)
olab3 = '%s/labels/%s/%s.txt'%(outdir3,sample,imagename)
fjpg4 = '%s/%s.jpg'%(jpg_loc4,imagename) # higher contrast images
ojpg4 = '%s/images/%s/%s.jpg'%(outdir4,sample,imagename)
olab4 = '%s/labels/%s/%s.txt'%(outdir4,sample,imagename)
fjpg5 = '%s/%s.jpg'%(jpg_loc5,imagename) # higher contrast images
ojpg5 = '%s/images/%s/%s.jpg'%(outdir5,sample,imagename)
olab5 = '%s/labels/%s/%s.txt'%(outdir5,sample,imagename)
mask = xidname.groups.keys['imagename'] == imagename
xidn = xidname.groups[mask]
nthis=len(xidn)
#Change the following 'if' if you are using a different amount of filters
if not os.path.isfile(fjpg1) or not os.path.isfile(fjpg2) or not os.path.isfile(fjpg3) or not os.path.isfile(fjpg4) or not os.path.isfile(fjpg5):
print('i=%d file does not exist %s skip...'%(i,fjpg1))
else:
image1 = Image.open(fjpg1)
image2 = Image.open(fjpg2)
image3 = Image.open(fjpg3)
image4 = Image.open(fjpg4)
image5 = Image.open(fjpg5)
w=image1.size[0]
h=image1.size[1]
maxprad=0
for j in range(nthis):
prad=xidn['petrorad_r'][j]/0.396127
if prad > maxprad:
maxprad=prad
if (nthis>=1 and maxprad>p_radius):
ff1=open(olab1,'w')
ff2=open(olab2,'w')
ff3=open(olab3,'w')
ff4=open(olab4,'w')
ff5=open(olab5,'w')
for j in range(nthis):
prad=2.1*xidn['petrorad_r'][j]/0.396127
colc=xidn['colc'][j]
rowc=xidn['rowc'][j]
#'p_el', 'p_cw', 'p_acw', 'p_edge', 'p_dk', 'p_mg', 'p_cs'
p_el =xidn['p_el'][j]
p_cw =xidn['p_cw'][j]
p_acw =xidn['p_acw'][j]
p_edge =xidn['p_edge'][j]
p_dk =xidn['p_dk'][j]
p_mg =xidn['p_mg'][j]
p_cs =xidn['p_cs'][j]
mylist=[p_el,p_cw+p_acw,p_edge,p_dk,p_mg]
mylist2=[p_cw+p_acw,p_edge,p_dk,p_mg]
if p_cs <= p_el:
gtype = np.argmax(mylist)
if p_cs > p_el:
gtype = np.argmax(mylist2)+1
prad=prad*1.1 # extended obj
if (gtype==0) and (prad>p_radius):
n1=n1+1
if gtype==1:
n2=n2+1
elif gtype==2:
n3=n3+1
elif gtype==3:
n4=n4+1
elif gtype==4:
n5=n5+1
prad=prad*1.2
if (prad > p_radius) or (gtype>0):
ff1.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
ff2.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
ff3.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
ff4.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
ff5.write('%d %f %f %f %f\n'%(gtype,colc/w,rowc/h,prad/w,prad/h))
nann=nann+1
ff1.close()
ff2.close()
ff3.close()
ff4.close()
ff5.close()
image1.save(ojpg1,'jpeg',quality=96)
image2.save(ojpg2,'jpeg',quality=96)
image3.save(ojpg3,'jpeg',quality=96)
image4.save(ojpg4,'jpeg',quality=96)
image5.save(ojpg5,'jpeg',quality=96)
f1.write('%s/images/%s/%s.jpg\n'%(outdir1,sample,imagename))
f2.write('%s/images/%s/%s.jpg\n'%(outdir2,sample,imagename))
f3.write('%s/images/%s/%s.jpg\n'%(outdir3,sample,imagename))
f4.write('%s/images/%s/%s.jpg\n'%(outdir4,sample,imagename))
f5.write('%s/images/%s/%s.jpg\n'%(outdir5,sample,imagename))
if i%50==0:
print('i=%d %s w=%d h=%d nthis=%d maxprad=%f nfiles=%d anntot=%d'%(i,fjpg1,w,h,nthis,maxprad,nfiles,nann))
nfiles=nfiles+1
if i%50==0:
print('classes = %d %d %d %d %d'%(n1,n2,n3,n4,n5))
f1.close()
f2.close()
f3.close()
f4.close()
f5.close()
filenames = [f1,f2,f3,f4,f5]
with open('/%sall.txt'%(sample), 'w') as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
outfile.close()
print('Final classes = %d %d %d %d %d tot=%d'%(n1,n2,n3,n4,n5,n1+n2+n3+n4+n5))
print('nfiles=%d nann=%d'%(nfiles,nann))