Disclaimer: The following code demonstrates a sample BQML model creation based on GA-BQ export. This is meant for inspiration only. We expect analysts/data scientists to identify the right set of features to create retargeted audiences based on their business needs.
In [ ]:
project_id = "my_project" #@param {type:"string"}
example_dataset = "my_schema" #@param {type:"string"}
In [ ]:
from google.colab import auth
auth.authenticate_user()
In [ ]:
%%bigquery --project=$project_id df
WITH sample_raw_data AS (
SELECT CAST(CEIL(RAND() * 100) AS INT64) AS clientId, * EXCEPT (clientId) FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170801` LIMIT 1000
),
visit_data AS (
SELECT clientId, SUM(totals.visits) AS all_visits, CAST(ROUND(RAND() * 1) AS INT64) AS converted
FROM sample_raw_data
GROUP BY clientId
)
SELECT *
FROM visit_data
In [ ]:
df.head()
In [ ]:
import math
training_data_size = math.ceil(df.shape[0] * 0.7)
In [ ]:
training_data = df[:training_data_size]
test_data = df[training_data_size:]
In [ ]:
training_data.to_csv('training.csv', index=False)
test_data.to_csv('test.csv', index=False)
In [ ]:
!bq load --project_id $project_id --autodetect --source_format='CSV' $example_dataset".training_data" training.csv
!bq load --project_id $project_id --autodetect --source_format='CSV' $example_dataset".test_data" test.csv
In [ ]:
%%bigquery --project=$project_id
CREATE OR REPLACE MODEL `my_schema.test_model`
OPTIONS (model_type='logistic_reg', input_label_cols=['converted'], auto_class_weights=True) AS
(
SELECT all_visits, converted
FROM `my_schema.training_data`
)
In [ ]:
%%bigquery --project=$project_id
SELECT *
FROM ML.EVALUATE(MODEL `my_schema.test_model`,
(SELECT * FROM `my_schema.test_data`))
In [ ]:
%%bigquery --project=$project_id predicted_df
SELECT clientId AS ga_userId, NTILE(10) OVER (ORDER BY prob DESC) AS ga_dimension1
FROM ML.PREDICT(MODEL `my_schema.test_model`,
(SELECT * FROM `my_schema.test_data`)),
UNNEST(predicted_converted_probs)
In [ ]:
predicted_df.head()