Out[3]:
Counter({0.0: 682, 1.0: 4057, 2.0: 38})
logloss of train is 0.11326009097947366
OrderedDict([('ALB', 527), ('BET', 15), ('DOL', 13), ('LAG', 38), ('NoF', 179), ('OTHER', 73), ('SHARK', 15), ('YFT', 140)])
OrderedDict([('ALB', 1731), ('BET', 200), ('DOL', 116), ('LAG', 67), ('NoF', 453), ('OTHER', 299), ('SHARK', 175), ('YFT', 736)])
OrderedDict([('ALB', 1719), ('BET', 200), ('DOL', 117), ('LAG', 67), ('NoF', 465), ('OTHER', 299), ('SHARK', 176), ('YFT', 734)])
OrderedDict([('ALB', 1990.479), ('BET', 56.655), ('DOL', 49.101), ('LAG', 143.526), ('NoF', 676.083), ('OTHER', 275.721), ('SHARK', 56.655), ('YFT', 528.78)])
#crop test images by detections_full_AGNOSTICnms.pkl
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
import pickle
with open('../data/RFCN_detections/detections_full_AGNOSTICnms_resnet101_rfcn_ohem_iter_30000.pkl','rb') as f:
detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1')
CONF_THRESH = 0.5
outputs = []
count = np.zeros(len(detections_full_AGNOSTICnms))
for im in range(len(detections_full_AGNOSTICnms)):
outputs_im = []
detects_im = detections_full_AGNOSTICnms[im]
for i in range(len(detects_im)):
if np.max(detects_im[i,5:]) >= CONF_THRESH:
outputs_im.append(detects_im[i,:])
count[im] = len(outputs_im)
if len(outputs_im) == 0:
ind = np.argmax(np.max(detects_im[:,5:], axis=1))
outputs_im.append(detects_im[ind,:])
outputs_im = np.asarray(outputs_im)
outputs.append(outputs_im)
print(sum([outputs[i].shape[0] for i in range(len(outputs))]))
#submission from detections_full_AGNOSTICnms.pkl
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
import pickle
with open('../data/RFCN_detections/detections_full_AGNOSTICnms_resnet101_rfcn_ohem_iter_30000.pkl','rb') as f:
detections_full_AGNOSTICnms = pickle.load(f, encoding='latin1')
CONF_THRESH = 0.5
outputs = np.ndarray((len(detections_full_AGNOSTICnms), len(FISH_CLASSES)), dtype=np.float64)
for im in range(len(detections_full_AGNOSTICnms)):
outputs_im = []
detects_im = detections_full_AGNOSTICnms[im]
for i in range(len(detects_im)):
if np.max(detects_im[i,5:]) >= CONF_THRESH:
outputs_im.append(detects_im[i,:])
if len(outputs_im) == 0:
ind = np.argmax(np.max(detects_im[:,5:], axis=1))
l = [0,0,0,0]
l.extend(np.ndarray.tolist(detects_im[ind,4:]))
outputs_im.append(l)
outputs_im = np.asarray(outputs_im)
outputs[im] = np.mean(outputs_im, axis=0)[4:]
CLIP_THRESH = 0.02
outputs_cliped = np.clip(outputs, CLIP_THRESH, 1, out=None)
outputs_cliped = outputs_cliped/np.sum(outputs_cliped, axis=1, keepdims=True)
test_preds = outputs_cliped[:1000]
train_preds = outputs_cliped[1000:]
with open("../RFCN/ImageSets/Main/test.txt","r") as f:
ims = f.readlines()
test_files = [im[:-1]+'.jpg' for im in ims[:1000]]
train_files = [im[:-1] for im in ims[1000:]]
with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
train_file_labels = f.readlines()
log_losses = []
for i in range(len(train_preds)):
im = train_files[i]
for im_label in train_file_labels:
if im == im_label[:9]:
label = im_label[10:-1]
index = FISH_CLASSES.index(label)
log_losses.append(-math.log(train_preds[i][index]))
log_loss = sum(log_losses) / float(len(log_losses))
print('logloss of train is', log_loss )
import datetime
submission = pd.DataFrame(test_preds, columns=FISH_CLASSES)
#submission.loc[:, 'image'] = pd.Series(test_files, index=submission.index)
submission.insert(0, 'image', test_files)
now = datetime.datetime.now()
info = 'RFCN_AGONOSTICnms_resnet101_conf{:.1f}_'.format(CONF_THRESH) + 'clip{:.2f}_'.format(CLIP_THRESH) + '{:.4f}'.format(log_loss)
sub_file = 'submission_' + info + '_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
submission.to_csv(sub_file, index=False)
#crop test images by detections_full.pkl
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
import pickle
with open('../data/RFCN_detections/detections_full_resnet101_rfcn_ohem_iter_30000.pkl','rb') as f:
detections_full = pickle.load(f, encoding='latin1')
CONF_THRESH = 0.5
outputs = []
count = np.zeros(len(detections_full[0]))
for im in range(len(detections_full[0])):
outputs_im = []
detects_im = []
for cls in range(1,len(FISH_CLASSES)):
detects_im_cls = detections_full[cls][im]
for i in range(len(detects_im_cls)):
if np.max(detects_im_cls[i,4+cls]) >= CONF_THRESH:
outputs_im.append(detects_im_cls[i,:])
count[im] = len(outputs_im)
for cls in range(1,len(FISH_CLASSES)):
detects_im.append(detections_full[cls][im])
detects_im = np.vstack(detects_im)
if len(outputs_im) == 0:
ind = np.argmax(np.max(detects_im[:,5:], axis=1))
outputs_im.append(detects_im[ind,:])
outputs_im = np.asarray(outputs_im)
outputs.append(outputs_im)
print(sum([outputs[i].shape[0] for i in range(len(outputs))]))
#submission from detections_full.pkl
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
import pickle
with open('../data/RFCN_detections/detections_full_resnet101_rfcn_ohem_iter_30000.pkl','rb') as f:
detections_full = pickle.load(f, encoding='latin1')
CONF_THRESH = 0.5
outputs = np.ndarray((len(detections_full[0]), len(FISH_CLASSES)), dtype=np.float64)
for im in range(len(detections_full[0])):
outputs_im = []
detects_im = []
for cls in range(1,len(FISH_CLASSES)):
detects_im_cls = detections_full[cls][im]
for i in range(len(detects_im_cls)):
if np.max(detects_im_cls[i,4+cls]) >= CONF_THRESH:
outputs_im.append(detects_im_cls[i,:])
for cls in range(1,len(FISH_CLASSES)):
detects_im.append(detections_full[cls][im])
detects_im = np.vstack(detects_im)
if len(outputs_im) == 0:
ind = np.argmax(np.max(detects_im[:,5:], axis=1))
outputs_im.append(detects_im[ind,:])
outputs_im = np.asarray(outputs_im)
outputs[im] = np.mean(outputs_im, axis=0)[4:]
CLIP_THRESH = 0.02
outputs_cliped = np.clip(outputs, CLIP_THRESH, 1, out=None)
outputs_cliped = outputs_cliped/np.sum(outputs_cliped, axis=1, keepdims=True)
test_preds = outputs_cliped[:1000]
train_preds = outputs_cliped[1000:]
with open("../RFCN/ImageSets/Main/test.txt","r") as f:
ims = f.readlines()
test_files = [im[:-1]+'.jpg' for im in ims[:1000]]
train_files = [im[:-1] for im in ims[1000:]]
with open("../RFCN/ImageSets/Main/train_test.txt","r") as f:
train_file_labels = f.readlines()
log_losses = []
for i in range(len(train_preds)):
im = train_files[i]
for im_label in train_file_labels:
if im == im_label[:9]:
label = im_label[10:-1]
index = FISH_CLASSES.index(label)
log_losses.append(-math.log(train_preds[i][index]))
log_loss = sum(log_losses) / float(len(log_losses))
print('logloss of train is', log_loss )
import datetime
submission = pd.DataFrame(test_preds, columns=FISH_CLASSES)
#submission.loc[:, 'image'] = pd.Series(test_files, index=submission.index)
submission.insert(0, 'image', test_files)
now = datetime.datetime.now()
info = 'RFCN_resnet101_conf{:.2f}_'.format(CONF_THRESH) + 'clip{:.2f}_'.format(CLIP_THRESH) + '{:.4f}'.format(log_loss)
sub_file = 'submission_' + info + '_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
submission.to_csv(sub_file, index=False)