In [ ]:
%load_ext sql
In [ ]:
# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib
%sql postgresql://fmcquillan@localhost:5432/madlib
In [34]:
%sql select madlib.version();
Out[34]:
In [35]:
%%sql
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set(
pred FLOAT8, -- predicted values
obs FLOAT8 -- actual observed values
);
INSERT INTO test_set VALUES
(37.5,53.1), (12.3,34.2), (74.2,65.4), (91.1,82.1);
SELECT * FROM test_set;
Out[35]:
In [36]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_abs_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
Out[36]:
In [37]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_abs_perc_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
Out[37]:
In [38]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_perc_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
Out[38]:
In [ ]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_squared_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
In [ ]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.r2_score( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
In [39]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.adjusted_r2_score( 'test_set', 'table_out', 'pred', 'obs', 3, 100);
SELECT * FROM table_out;
Out[39]:
Create the sample data for binary classifier metrics:
In [40]:
%%sql
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set AS
SELECT ((a*8)::integer)/8.0 pred, -- prediction probability TRUE
((a*0.5+random()*0.5)>0.5) obs -- actual observations
FROM (select random() as a from generate_series(1,100)) x;
SELECT * FROM test_set;
Out[40]:
Run the Binary Classifier metrics function and View the True Positive Rate and the False Positive Rate:
In [41]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.binary_classifier( 'test_set', 'table_out', 'pred', 'obs');
SELECT threshold, tpr, fpr FROM table_out ORDER BY threshold;
Out[41]:
View all metrics at a given threshold value:
In [ ]:
%%sql
SELECT * FROM table_out WHERE threshold=0.5;
Run the Area Under ROC curve function:
In [42]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.area_under_roc( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;
Out[42]:
Create the sample data for confusion matrix.
In [ ]:
%%sql
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set AS
SELECT (x+y)%5+1 AS pred,
(x*y)%5 AS obs
FROM generate_series(1,5) x,
generate_series(1,5) y;
SELECT * FROM test_set;
In [ ]:
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.confusion_matrix( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out ORDER BY class;