In [1]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
df = pd.read_csv('../urls/data/scraped_urls.psv.gz', sep='|')
In [5]:
df.head()
Out[5]:
In [9]:
for i in range(2,5):
print(i)
In [3]:
import numpy as np
In [3]:
import subprocess
In [253]:
subprocess.call(['mkdir','-p','temp'])
for fish in list(set(df.fish)):
subprocess.call(['mkdir','-p','temp/'+fish.replace(' ','_')])
In [ ]:
i
In [249]:
ioffset = 1998
In [4]:
prev_fish='alewife'
for i in range(1415,1420):#len(df)):
row = df.iloc[i]
if np.mod(i,50) == 0:
print(i/(len(df))*100., '%')
url = df.iloc[i].url
if (url[-4:] == '.jpg') and (url.find(' ') == -1):
try:
urlretrieve(url,'temp/' + row.fish.replace(' ','_') + '/' + row.fish.replace(' ','_') + '-' + str(i) + '.jpg')
except:
print('bad url: ',url)
In [251]:
i
Out[251]:
In [6]:
url = df.iloc[0].url
In [7]:
url
Out[7]:
In [15]:
from PIL import Image
import requests
from io import StringIO, BytesIO
In [11]:
response = requests.get(url)
img = Image.open(StringIO(response.content))
In [33]:
response = requests.get(url)
Image.open(BytesIO(response.content))
Out[33]:
In [48]:
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions
import numpy as np
model = ResNet50(weights='imagenet')
In [49]:
from urllib.request import urlretrieve
urlretrieve(url,url.split('/')[-1])
Out[49]:
In [50]:
from urllib.request import urlretrieve
urlretrieve(url,url.split('/')[-1])
img_path = url.split('/')[-1]
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
In [ ]:
model = applications.VGG16(include_top=False, weights='imagenet')
In [53]:
preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
In [ ]:
In [54]:
from keras.applications.vgg16 import VGG16
model2 = VGG16(weights='imagenet', include_top=False)
In [56]:
preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])
In [58]:
response = requests.get(url)
Image.open(BytesIO(response.content))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
In [132]:
Image.open('data/test/carp/carp-527.jpg')
Out[132]:
In [ ]:
img = Image.open('data/test/carp/carp-527.jpg')
In [59]:
preds = model.predict(x)
In [61]:
from keras.applications.vgg16 import decode_predictions
In [67]:
decode_predictions(preds, top=3)[0]
In [63]:
model3 = VGG16(weights='imagenet')
In [64]:
preds = model3.predict(x)
In [66]:
preds = model2.predict(x)
In [69]:
np.shape(preds)
Out[69]:
In [74]:
x = np.array([1,2,3])
In [76]:
x[0]
Out[76]:
In [78]:
x = np.concatenate((preds,preds))
In [79]:
np.shape(x)
Out[79]:
In [82]:
df.groupby('fish').count()
Out[82]:
In [6]:
carp = df[df.fish == 'carp']
walleye = df[df.fish == 'walleye']
In [7]:
carp.head()
Out[7]:
In [94]:
for i in range(len(carp)):
url = carp.iloc[i].url
try:
if url.split('.')[-1].find('jpg') >= 0:
urlretrieve(url,'carp/carp-' + str(i) + '.jpg')
except:
print('bad url: ',url)
In [96]:
for i in range(len(walleye)):
url = walleye.iloc[i].url
try:
if url.split('.')[-1].find('jpg') >= 0:
urlretrieve(url,'walleye/waleye-' + str(i) + '.jpg')
except:
print('bad url: ',url)
In [3]:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
import os
# dimensions of our images.
img_width, img_height = 150, 150
top_model_weights_path = 'bottleneck_fc_model.h5'
train_data_dir = 'data/train'
test_data_dir = 'data/test'
epochs = 50
batch_size = 5
def save_bottlebeck_features(data_dir):
datagen = ImageDataGenerator(rescale=1. / 255)
fish = os.listdir(data_dir)
fish_count = 0
for f in fish:
fish_count += len(os.listdir(data_dir + '/' + f))
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
datagen = ImageDataGenerator(rescale=1. / 255)
generator = datagen.flow_from_directory(
data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
return model.predict_generator(generator, fish_count // batch_size)
def train_top_model(train_data, validation_data):
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy', metrics=['accuracy'])
model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
return model
In [31]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
img_width, img_height = 150, 150
train_data_dir = 'data/train'
test_data_dir = 'data/test'
epochs = 50
batch_size = 32
def get_generator(data_dir):
datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True)
return datagen.flow_from_directory(
data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='binary')
def train_inception(train_data, validation_data):
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)
# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(1, activation='sigmoid')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
layer.trainable = False
model.compile(optimizer='rmsprop',
loss='binary_crossentropy', metrics=['accuracy'])
model.fit_generator(train_data,
50,
epochs=epochs,
validation_data=validation_data,
validation_steps=25)
return model
In [32]:
test_data = get_generator(test_data_dir)
train_data = get_generator(train_data_dir)\
In [33]:
inception_model = train_inception(train_data, test_data)
In [34]:
model.save_weights('inception_v1_model.h5')
In [5]:
from keras.applications.resnet50 import ResNet50
In [6]:
model = ResNet50(weights='imagenet')
In [13]:
model.layers.pop()
Out[13]:
In [14]:
for layer in model.layers:
layer.trainable = False
In [15]:
model.add(Dense(1, activation='sigmoid'))
In [8]:
len(model.layers)
Out[8]:
In [ ]:
model.layer
In [11]:
model.layers_by_depth?
In [10]:
model2 = Sequential()
for layer in model.layers_by_depth:
model2.add(layer)
In [7]:
ResNet50?
In [4]:
test_data = save_bottlebeck_features(test_data_dir)
In [ ]:
train_data = save_bottlebeck_features(train_data_dir)
In [194]:
model = train_top_model(train_data, validation_data)
In [195]:
premodel = applications.VGG16(include_top=False, weights='imagenet')
In [196]:
Image.open('data/train/carp/carp-107.jpg')
Out[196]:
In [217]:
Image.open('data/train/walleye/waleye-119.jpg')
Out[217]:
In [216]:
img = image.load_img('data/train/walleye/waleye-119.jpg', target_size=(img_width, img_height))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
preds = model.predict(premodel.predict(x))
print(preds)
In [126]:
np.shape(train_data)
Out[126]:
In [187]:
np.shape(validation_data)
Out[187]:
In [112]:
np.array([0]*5 + [1]*4)
Out[112]:
In [91]:
url
Out[91]:
In [85]:
i=1
urlretrieve(url,'carp/carp-' + str(i) + '.' + url.split('.')[-1])
Out[85]:
In [ ]:
from urllib.request import urlretrieve
urlretrieve(url,url.split('/')[-1])