In [1]:
import mltoolbox.image.classification as model
from google.datalab.ml import *
import os
bucket = 'gs://' + datalab_project_id() + '-coast'
preprocessed_dir = bucket + '/preprocessed'
staging_dir = bucket + '/staging'
model_dir = bucket + '/model'
In [3]:
train_set = BigQueryDataSet('SELECT image_url, label FROM coast.train')
job = model.preprocess_async(train_set, preprocessed_dir, cloud={'num_workers': 5})
job.wait()
In [ ]:
model.train_async(preprocessed_dir, 64, 8000, model_dir,
cloud=CloudTrainingConfig('us-central1', 'BASIC')).wait()
Check your job status. You can run:
Job('inception_train_170216_235400').describe()
to see job status.
In [ ]:
tb_id = TensorBoard.start(model_dir)
In [ ]:
summary = Summary(model_dir)
summary.list_events()
In [ ]:
summary.plot('accuracy')
summary.plot('loss')
In [ ]:
Models().create('coast')
ModelVersions('coast').deploy('v1', model_dir)
In [ ]:
ModelVersions('coast').describe('v1')
In [ ]:
# gs://tamucc_coastline/esi_images/IMG_2849_SecDE_Spr12.jpg,3B
# gs://tamucc_coastline/esi_images/IMG_0047_SecBC_Spr12.jpg,10A
# gs://tamucc_coastline/esi_images/IMG_0617_SecBC_Spr12.jpg,7
# gs://tamucc_coastline/esi_images/IMG_2034_SecEGH_Sum12_Pt2.jpg,10A
images = [
'gs://tamucc_coastline/esi_images/IMG_2849_SecDE_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_0047_SecBC_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_0617_SecBC_Spr12.jpg',
'gs://tamucc_coastline/esi_images/IMG_2034_SecEGH_Sum12_Pt2.jpg'
]
# resize=True because otherwise the images are too large to send for online prediction.
model.predict('coast.v1', images, resize=True, cloud=True)
In [ ]:
eval_set = BigQueryDataSet(sql='select * from coast.eval')
model.batch_predict_async(eval_set, model_dir, output_bq_table='coast.eval_results_full',
cloud={'num_workers': 3, 'temp_location': staging_dir}).wait()
Check evaluation results. It is much better than local run results --- of course, we used much more data, and we ran more training steps.
In [ ]:
%%bq query --name accuracy
SELECT
target,
SUM(CASE WHEN target=predicted THEN 1 ELSE 0 END) as correct,
COUNT(*) as total,
SUM(CASE WHEN target=predicted THEN 1 ELSE 0 END)/COUNT(*) as accuracy
FROM
coast.eval_results_full
GROUP BY
target
In [ ]:
accuracy.execute().result()
In [ ]:
%%bq query --name logloss
SELECT feature, AVG(-logloss) as logloss, count(*) as count FROM
(
SELECT feature, CASE WHEN correct=1 THEN LOG(prob) ELSE LOG(1-prob) END as logloss
FROM
(
SELECT
target as feature,
CASE WHEN target=predicted THEN 1 ELSE 0 END as correct,
target_prob as prob
FROM coast.eval_results_full
)
)
GROUP BY feature
In [ ]:
FeatureSliceView().plot(logloss)
In [ ]:
import shutil
import google.datalab.bigquery as bq
TensorBoard.stop(tb_id)
bq.Table('coast.eval_results_full').delete()
ModelVersions('coast').delete('v1')
Models().delete('coast')
!gsutil -m rm -r {preprocessed_dir}
!gsutil -m rm -r {model_dir}
In [ ]: