In [1]:
import mltoolbox.image.classification as model
from google.datalab.ml import *
worker_dir = '/content/datalab/tmp/coast'
preprocessed_dir = worker_dir + '/coast300'
model_dir = worker_dir + '/model300'
In [2]:
train_set = BigQueryDataSet('SELECT image_url, label FROM coast.train WHERE rand() < 0.04')
model.preprocess(train_set, preprocessed_dir)
In [3]:
import logging
logging.getLogger().setLevel(logging.INFO)
model.train(preprocessed_dir, 30, 1000, model_dir)
logging.getLogger().setLevel(logging.WARNING)
You can start hosted Tensorboard to check events.
In [4]:
tb_id = TensorBoard.start(model_dir)
In [5]:
summary = Summary(model_dir)
summary.list_events()
Out[5]:
In [6]:
summary.plot('accuracy')
summary.plot('loss')
We will do more evaluation with more data using batch prediction.
In [7]:
# gs://tamucc_coastline/esi_images/IMG_2849_SecDE_Spr12.jpg,3B
# gs://tamucc_coastline/esi_images/IMG_0047_SecBC_Spr12.jpg,10A
# gs://tamucc_coastline/esi_images/IMG_0617_SecBC_Spr12.jpg,7
# gs://tamucc_coastline/esi_images/IMG_2034_SecEGH_Sum12_Pt2.jpg,10A
images = [
'gs://tamucc_coastline/esi_images/IMG_2849_SecDE_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_0047_SecBC_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_0617_SecBC_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_2034_SecEGH_Sum12_Pt2.jpg'
]
# Set show_image to True to see the images
model.predict(model_dir, images, show_image=False)
Out[7]:
Batch prediction. Note that we sample eval data so we use about 200 instances.
In [8]:
eval_set = BigQueryDataSet('select * from coast.eval WHERE rand()<0.1')
model.batch_predict(eval_set, model_dir, output_bq_table='coast.eval200tinymodel')
In [9]:
ConfusionMatrix.from_bigquery('select * from coast.eval200tinymodel').plot()
Compute accuracy per label.
In [10]:
%%bq query --name accuracy
SELECT
target,
SUM(CASE WHEN target=predicted THEN 1 ELSE 0 END) as correct,
COUNT(*) as total,
SUM(CASE WHEN target=predicted THEN 1 ELSE 0 END)/COUNT(*) as accuracy
FROM
coast.eval200tinymodel
GROUP BY
target
In [11]:
accuracy.execute().result()
Out[11]:
You can view the results using Feature-Slice-View. This time do logloss.
In [12]:
%%bq query --name logloss
SELECT feature, AVG(-logloss) as logloss, COUNT(*) as count FROM
(
SELECT feature, CASE WHEN correct=1 THEN LOG(prob) ELSE LOG(1-prob) END as logloss
FROM
(
SELECT
target as feature,
CASE WHEN target=predicted THEN 1 ELSE 0 END as correct,
target_prob as prob
FROM coast.eval200tinymodel
)
)
GROUP BY feature
In [13]:
FeatureSliceView().plot(logloss)
In [14]:
import shutil
import google.datalab.bigquery as bq
TensorBoard.stop(tb_id)
bq.Table('coast.eval200tinymodel').delete()
shutil.rmtree(worker_dir)
In [ ]: