In [1]:
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from skimage import measure
%matplotlib inline
import os
import numpy as np # linear algebra
In [2]:
###function outlines fish.
###i should be the address of the image
###show is a boolean value indicating whether or not you want the plot printed
###sens is the sensitivity
def fishskin(i, show, sens):
img = Image.open(i)
img_gray = img.convert('L')
contours = measure.find_contours(img_gray, sens)
contours_long = list()
for x in range (0, len(contours)):
if len(contours[x]) > 99:
contours_long.append(contours[x])
if show:
fig, ax = plt.subplots()
ax.imshow(img, interpolation='nearest', cmap=plt.cm.gray)
for n, contour in enumerate(contours_long):
ax.plot(contour[:, 1], contour[:, 0], linewidth=1)
ax.axis('image')
ax.set_xticks([])
ax.set_yticks([])
plt.show()
return(contours_long)
In [3]:
test_cons = fishskin('../data/raw/train/ALB/test.jpg', True, 100)
In [4]:
test_cons2 = fishskin('../data/raw/train/ALB/test.jpg', True, 80)
In [5]:
albs = os.listdir('../data/raw/train/ALB/')
bets = os.listdir('../data/raw/train/BET/')
dols = os.listdir('../data/raw/train/DOL/')
lags = os.listdir('../data/raw/train/LAG/')
nofs = os.listdir('../data/raw/train/NoF/')
oths = os.listdir('../data/raw/train/OTHER/')
shas = os.listdir('../data/raw/train/SHARK/')
yfts = os.listdir('../data/raw/train/YFT/')
In [6]:
for i in albs[0:5]:
fishskin(('../data/raw/train/ALB/'+i),True, 100)
In [7]:
for i in bets[0:5]:
fishskin(('../data/raw/train/BET/'+i),True, 100)
In [8]:
for i in dols[0:5]:
fishskin(('../data/raw/train/DOL/'+i),True,100)
In [9]:
for i in lags[0:5]:
fishskin(('../data/raw/train/LAG/'+i),True,100)
In [10]:
for i in nofs[0:5]:
fishskin(('../data/raw/train/NoF/'+i),True,100)
In [11]:
for i in oths[0:5]:
fishskin(('../data/raw/train/OTHER/'+i),True, 100)
In [12]:
for i in shas[0:5]:
fishskin(('../data/raw/train/SHARK/'+i),True,100)
In [13]:
for i in yfts[0:5]:
fishskin(('../data/raw/train/YFT/'+i),True, 100)
In [25]:
albs_con_list = list()
for i in albs:
# print i
# albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False, 100))
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,100))
except:
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,90))
except:
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,80))
except:
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,70))
except:
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,60))
except:
try:
albs_con_list.append(fishskin(('../data/raw/train/ALB/'+i),False,50))
except:
albs_con_list.append(np.nan)
#getting an error, possibly there are just way too many albacore?
#it's not because it's too many albacore, because looking at the problem fish alone still throws the error
#lower sensitivities the image gets through
#add a try catch statement to lower the sensitivity only for images that fail
#if it gets below sens == 50 just skip the image
In [18]:
lags_con_list = list()
for i in lags:
lags_con_list.append(fishskin(('../data/raw/train/LAG/'+i),False, 100))
In [19]:
bets_con_list = list()
for i in bets:
# print i
# if i != 'img_00441.jpg':
# if i != 'img_01481.jpg':
# if i != 'img_04028.jpg':
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,100))
except:
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,90))
except:
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,80))
except:
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,70))
except:
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,60))
except:
try:
bets_con_list.append(fishskin(('../data/raw/train/BET/'+i),False,50))
except:
bets_con_list.append(np.nan)
In [20]:
dols_con_list = list()
for i in dols:
# dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False))
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,100))
except:
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,90))
except:
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,80))
except:
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,70))
except:
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,60))
except:
try:
dols_con_list.append(fishskin(('../data/raw/train/DOL/'+i),False,50))
except:
dols_con_list.append(np.nan)
In [21]:
nofs_con_list = list()
for i in nofs:
# nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False))
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,100))
except:
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,90))
except:
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,80))
except:
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,70))
except:
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,60))
except:
try:
nofs_con_list.append(fishskin(('../data/raw/train/NoF/'+i),False,50))
except:
nofs_con_list.append(np.nan)
In [22]:
oths_con_list = list()
for i in oths:
# oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False))
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,100))
except:
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,90))
except:
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,80))
except:
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,70))
except:
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,60))
except:
try:
oths_con_list.append(fishskin(('../data/raw/train/OTHER/'+i),False,50))
except:
oths_con_list.append(np.nan)
In [23]:
shas_con_list = list()
for i in shas:
# shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False))
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,100))
except:
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,90))
except:
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,80))
except:
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,70))
except:
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,60))
except:
try:
shas_con_list.append(fishskin(('../data/raw/train/SHARK/'+i),False,50))
except:
shas_con_list.append(np.nan)
In [24]:
yfts_con_list = list()
for i in yfts:
# yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False))
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,100))
except:
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,90))
except:
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,80))
except:
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,70))
except:
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,60))
except:
try:
yfts_con_list.append(fishskin(('../data/raw/train/YFT/'+i),False,50))
except:
yfts_con_list.append(np.nan)
In [ ]:
test = Image.open ('../data/raw/train/BET/img_00441.jpg')
In [ ]:
fishskin('../data/raw/train/BET/img_00441.jpg', True, 90)
#doesn't have errors at lower sensitivity
In [30]:
albs_df = pd.DataFrame(albs_con_list)
albs_df.to_csv('../data/processed/albs_contours.csv')
In [31]:
lags_df = pd.DataFrame(lags_con_list)
bets_df = pd.DataFrame(bets_con_list)
dols_df = pd.DataFrame(dols_con_list)
nofs_df = pd.DataFrame(nofs_con_list)
oths_df = pd.DataFrame(oths_con_list)
shas_df = pd.DataFrame(shas_con_list)
yfts_df = pd.DataFrame(yfts_con_list)
lags_df.to_csv('../data/processed/lags_contours.csv')
bets_df.to_csv('../data/processed/bets_contours.csv')
dols_df.to_csv('../data/processed/dols_contours.csv')
nofs_df.to_csv('../data/processed/nofs_contours.csv')
oths_df.to_csv('../data/processed/oths_contours.csv')
shas_df.to_csv('../data/processed/shas_contours.csv')
yfts_df.to_csv('../data/processed/yfts_contours.csv')
In [37]:
lags_df.shape
#67 rows, 76 columns
Out[37]:
In [47]:
len(lags)
#67 lag fish
#so when coerced into a dataframe each row is an image, and each column is a contour
Out[47]:
In [49]:
len(lags_con_list[0])
Out[49]:
In [ ]:
#skimage.exposure. equalize hist
#blurring image may also get rid of some unneccesary stuff
#fill holes webpage that Jason found
In [129]:
#some possible features... average number of contours, average length of those contours, average color of image within contour
In [130]:
albs_info = pd.DataFrame()
albs_info['filename']=albs
bets_info = pd.DataFrame()
bets_info['filename']=bets
dols_info = pd.DataFrame()
dols_info['filename']=dols
lags_info = pd.DataFrame()
lags_info['filename']=lags
nofs_info = pd.DataFrame()
nofs_info['filename']=nofs
oths_info = pd.DataFrame()
oths_info['filename']=oths
shas_info = pd.DataFrame()
shas_info['filename']=shas
yfts_info = pd.DataFrame()
yfts_info['filename']=yfts
In [131]:
albs_info['n_cons']=[len(x) for x in albs_con_list]
bets_info['n_cons']=[len(x) for x in bets_con_list]
dols_info['n_cons']=[len(x) for x in dols_con_list]
lags_info['n_cons']=[len(x) for x in lags_con_list]
nofs_info['n_cons']=[len(x) for x in nofs_con_list]
oths_info['n_cons']=[len(x) for x in oths_con_list]
shas_info['n_cons']=[len(x) for x in shas_con_list]
yfts_info['n_cons']=[len(x) for x in yfts_con_list]
In [132]:
albs_info['avg_con_len']=np.nan
for x in range(0,(len(albs_con_list)-1)):
lens = [len(i) for i in albs_con_list[x]]
albs_info.loc[x,'avg_con_len']=np.mean(lens)
In [133]:
bets_info['avg_con_len']=np.nan
for x in range(0,(len(bets_con_list)-1)):
lens = [len(i) for i in bets_con_list[x]]
bets_info.loc[x,'avg_con_len']=np.mean(lens)
In [134]:
dols_info['avg_con_len']=np.nan
for x in range(0,(len(dols_con_list)-1)):
lens = [len(i) for i in dols_con_list[x]]
dols_info.loc[x,'avg_con_len']=np.mean(lens)
In [135]:
lags_info['avg_con_len']=np.nan
for x in range(0,(len(lags_con_list)-1)):
lens = [len(i) for i in lags_con_list[x]]
lags_info.loc[x,'avg_con_len']=np.mean(lens)
In [136]:
nofs_info['avg_con_len']=np.nan
for x in range(0,(len(nofs_con_list)-1)):
lens = [len(i) for i in nofs_con_list[x]]
nofs_info.loc[x,'avg_con_len']=np.mean(lens)
In [137]:
oths_info['avg_con_len']=np.nan
for x in range(0,(len(oths_con_list)-1)):
lens = [len(i) for i in oths_con_list[x]]
oths_info.loc[x,'avg_con_len']=np.mean(lens)
In [138]:
shas_info['avg_con_len']=np.nan
for x in range(0,(len(shas_con_list)-1)):
lens = [len(i) for i in shas_con_list[x]]
shas_info.loc[x,'avg_con_len']=np.mean(lens)
In [139]:
yfts_info['avg_con_len']=np.nan
for x in range(0,(len(yfts_con_list)-1)):
lens = [len(i) for i in yfts_con_list[x]]
yfts_info.loc[x,'avg_con_len']=np.mean(lens)
In [144]:
albs_info['avg_shape']=np.nan
albs_info['avg_length']=np.nan
albs_info['avg_width']=np.nan
albs_info['avg_area']=np.nan
for fish in range(0,(len(albs_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in albs_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
albs_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
albs_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
albs_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
albs_info.loc[fish,'avg_area']=np.mean(fishareas)
In [141]:
###need bounding boxes to get some idea of shape (i.e., width to length ratio) and color
###each item in xyz_con_list is a fish, each fish contains a list of contours, contours
###are an array [x,y] tuples
###so bounds for the box will be the min(x),min(y); min(x),max(y); max(x),min(y); max(x),max(y)
bets_info['avg_shape']=np.nan
bets_info['avg_length']=np.nan
bets_info['avg_width']=np.nan
bets_info['avg_area']=np.nan
for fish in range(0,(len(bets_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in bets_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
bets_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
bets_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
bets_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
bets_info.loc[fish,'avg_area']=np.mean(fishareas)
###can't just put the mins and maxes in the bets_info file because they are by CONTOUR not by FISH
In [145]:
dols_info['avg_shape']=np.nan
dols_info['avg_length']=np.nan
dols_info['avg_width']=np.nan
dols_info['avg_area']=np.nan
for fish in range(0,(len(dols_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in dols_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
dols_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
dols_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
dols_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
dols_info.loc[fish,'avg_area']=np.mean(fishareas)
In [146]:
lags_info['avg_shape']=np.nan
lags_info['avg_length']=np.nan
lags_info['avg_width']=np.nan
lags_info['avg_area']=np.nan
for fish in range(0,(len(lags_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in lags_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
lags_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
lags_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
lags_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
lags_info.loc[fish,'avg_area']=np.mean(fishareas)
In [147]:
nofs_info['avg_shape']=np.nan
nofs_info['avg_length']=np.nan
nofs_info['avg_width']=np.nan
nofs_info['avg_area']=np.nan
for fish in range(0,(len(nofs_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in nofs_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
nofs_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
nofs_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
nofs_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
nofs_info.loc[fish,'avg_area']=np.mean(fishareas)
In [148]:
oths_info['avg_shape']=np.nan
oths_info['avg_length']=np.nan
oths_info['avg_width']=np.nan
oths_info['avg_area']=np.nan
for fish in range(0,(len(oths_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in oths_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
oths_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
oths_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
oths_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
oths_info.loc[fish,'avg_area']=np.mean(fishareas)
In [149]:
shas_info['avg_shape']=np.nan
shas_info['avg_length']=np.nan
shas_info['avg_width']=np.nan
shas_info['avg_area']=np.nan
for fish in range(0,(len(shas_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in shas_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
shas_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
shas_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
shas_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
shas_info.loc[fish,'avg_area']=np.mean(fishareas)
In [150]:
yfts_info['avg_shape']=np.nan
yfts_info['avg_length']=np.nan
yfts_info['avg_width']=np.nan
yfts_info['avg_area']=np.nan
for fish in range(0,(len(yfts_con_list)-1)):
fishshapes = []
fishlengths = []
fishwidths = []
fishareas = []
for contour in yfts_con_list[fish]:
xs = [pixel[0] for pixel in contour]
ys = [pixel[1] for pixel in contour]
minx = min(xs)
maxx = max(xs)
miny = min(ys)
maxy = max(ys)
fishshapes.append((maxx-minx)/(maxy-miny))
yfts_info.loc[fish,'avg_shape']=np.mean(fishshapes)
fishlengths.append(maxx-minx)
yfts_info.loc[fish,'avg_length']=np.mean(fishlengths)
fishwidths.append(maxy-miny)
yfts_info.loc[fish,'avg_width']=np.mean(fishwidths)
fishareas.append((maxx-minx)*(maxy-miny))
yfts_info.loc[fish,'avg_area']=np.mean(fishareas)
In [162]:
albs_info.hist(figsize = (3,3), column = 'avg_shape')
#bets_info.hist(figsize = (3,3), column = 'avg_shape')
dols_info.hist(figsize = (3,3), column = 'avg_shape')
#lags_info.hist(figsize = (3,3), column = 'avg_shape')
#nofs_info.hist(figsize = (3,3), column = 'avg_shape')
#oths_info.hist(figsize = (3,3), column = 'avg_shape')
#shas_info.hist(figsize = (3,3), column = 'avg_shape')
#yfts_info.hist(figsize = (3,3), column = 'avg_shape')
Out[162]:
In [156]:
albs_info.describe()
Out[156]:
In [159]:
nofs_info.describe()
Out[159]:
In [167]:
albs_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/albs_shape_info.csv')
bets_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/bets_shape_info.csv')
dols_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/dols_shape_info.csv')
lags_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/lags_shape_info.csv')
nofs_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/nofs_shape_info.csv')
oths_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/oths_shape_info.csv')
shas_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/shas_shape_info.csv')
yfts_info.to_csv('/Users/abbiepopa/Documents/DataSci_Projects/sashimdig/data/processed/yfts_shape_info.csv')