In [ ]:
PROJECT = 'cloud-training-demos'
REGION = 'us-central1'
In [ ]:
import os
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
In [ ]:
%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION
In [ ]:
%%bq query
SELECT
*
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
LIMIT 10
In [ ]:
%%bq query
SELECT
weight_pounds, -- this is the label; because it is continuous, we need to use regression
CAST(is_male AS STRING) AS is_male,
mother_age,
CAST(plurality AS STRING) AS plurality,
gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
LIMIT 10
In [ ]:
%%bash
bq --location=US mk -d demo
In [ ]:
%%bq query
CREATE or REPLACE MODEL demo.babyweight_model_asis
OPTIONS
(model_type='linear_reg', labels=['weight_pounds']) AS
WITH natality_data AS (
SELECT
weight_pounds,-- this is the label; because it is continuous, we need to use regression
CAST(is_male AS STRING) AS is_male,
mother_age,
CAST(plurality AS STRING) AS plurality,
gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
)
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks
FROM
natality_data
WHERE
MOD(hashmonth, 4) < 3 -- select 75% of the data as training
In [ ]:
%%bq query
SELECT * FROM ML.TRAINING_INFO(MODEL demo.babyweight_model_asis);
In [ ]:
import google.datalab.bigquery as bq
df = bq.Query("SELECT * FROM ML.TRAINING_INFO(MODEL demo.babyweight_model_asis)").execute().result().to_dataframe()
# plot both lines in same graph
import matplotlib.pyplot as plt
plt.plot( 'iteration', 'loss', data=df, marker='o', color='orange', linewidth=2)
plt.plot( 'iteration', 'eval_loss', data=df, marker='', color='green', linewidth=2, linestyle='dashed')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend();
In [ ]:
%%bq query
SELECT
*
FROM
ml.PREDICT(MODEL demo.babyweight_model_asis,
(SELECT
weight_pounds,
CAST(is_male AS STRING) AS is_male,
mother_age,
CAST(plurality AS STRING) AS plurality,
gestation_weeks
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
))
LIMIT 100
In [ ]:
%%bq query
SELECT
weight_pounds,
CAST(is_male AS STRING) AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
CAST(plurality AS STRING) AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
LIMIT 25
In [ ]:
%%bq query
SELECT
weight_pounds,
'Unknown' AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
IF(plurality > 1, 'Multiple', 'Single') AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
LIMIT 25
In [ ]:
%%bq query
WITH with_ultrasound AS (
SELECT
weight_pounds,
CAST(is_male AS STRING) AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
CAST(plurality AS STRING) AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
),
without_ultrasound AS (
SELECT
weight_pounds,
'Unknown' AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
IF(plurality > 1, 'Multiple', 'Single') AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
),
preprocessed AS (
SELECT * from with_ultrasound
UNION ALL
SELECT * from without_ultrasound
)
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks
FROM
preprocessed
WHERE
MOD(hashmonth, 4) < 3
LIMIT 25
In [ ]:
%%bq query
CREATE or REPLACE MODEL demo.babyweight_model_fc
OPTIONS
(model_type='linear_reg', labels=['weight_pounds']) AS
WITH with_ultrasound AS (
SELECT
weight_pounds,
CAST(is_male AS STRING) AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
CAST(plurality AS STRING) AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
),
without_ultrasound AS (
SELECT
weight_pounds,
'Unknown' AS is_male,
IF(mother_age < 18, 'LOW',
IF(mother_age > 45, 'HIGH',
CAST(mother_age AS STRING))) AS mother_age,
IF(plurality > 1, 'Multiple', 'Single') AS plurality,
CAST(gestation_weeks AS STRING) AS gestation_weeks,
ABS(FARM_FINGERPRINT(CONCAT(CAST(YEAR AS STRING), CAST(month AS STRING)))) AS hashmonth
FROM
publicdata.samples.natality
WHERE
year > 2000
AND gestation_weeks > 0
AND mother_age > 0
AND plurality > 0
AND weight_pounds > 0
),
preprocessed AS (
SELECT * from with_ultrasound
UNION ALL
SELECT * from without_ultrasound
)
SELECT
weight_pounds,
is_male,
mother_age,
plurality,
gestation_weeks
FROM
preprocessed
WHERE
MOD(hashmonth, 4) < 3
In [ ]:
import google.datalab.bigquery as bq
df = bq.Query("SELECT * FROM ML.TRAINING_INFO(MODEL demo.babyweight_model_fc)").execute().result().to_dataframe()
# plot both lines in same graph
import matplotlib.pyplot as plt
plt.plot( 'iteration', 'loss', data=df, marker='o', color='orange', linewidth=2)
plt.plot( 'iteration', 'eval_loss', data=df, marker='', color='green', linewidth=2, linestyle='dashed')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.legend();
In [ ]:
%%bq query
SELECT
*
FROM
ml.PREDICT(MODEL demo.babyweight_model_fc,
(SELECT
'True' AS is_male,
'28' AS mother_age,
'1' AS plurality,
'38' AS gestation_weeks
))