Prediction metrics

This module provides a set of metrics to evaluate the quality of predictions of a model. A typical function will take a set of "prediction" and "observation" values and use them to calculate the desired metric, unless noted otherwise.



In [ ]:

    
%load_ext sql



In [ ]:

    
# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib
%sql postgresql://fmcquillan@localhost:5432/madlib



In [34]:

    
%sql select madlib.version();









    



1 rows affected.






    Out[34]:





    
        version
    
    
        MADlib version: 1.9.1, git revision: rc/v1.9-rc1-39-g1929aed, cmake configuration time: Tue Aug 30 00:17:02 UTC 2016, build type: RelWithDebInfo, build system: Darwin-14.5.0, C compiler: Clang, C++ compiler: Clang

Continuous variables



In [35]:

    
%%sql 
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set(
                  pred FLOAT8,  -- predicted values
                  obs FLOAT8    -- actual observed values
                );
INSERT INTO test_set VALUES
  (37.5,53.1), (12.3,34.2), (74.2,65.4), (91.1,82.1);

SELECT * FROM test_set;









    



Done.
Done.
4 rows affected.
4 rows affected.






    Out[35]:





    
        pred
        obs
    
    
        37.5
        53.1
    
    
        12.3
        34.2
    
    
        74.2
        65.4
    
    
        91.1
        82.1



In [36]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_abs_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;









    



Done.
1 rows affected.
1 rows affected.






    Out[36]:





    
        mean_abs_error
    
    
        13.825



In [37]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_abs_perc_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;









    



Done.
1 rows affected.
1 rows affected.






    Out[37]:





    
        mean_abs_perc_error
    
    
        0.294578793636



In [38]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_perc_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;









    



Done.
1 rows affected.
1 rows affected.






    Out[38]:





    
        mean_perc_error
    
    
        -0.172489300328



In [ ]:

    
%%sql 
DROP TABLE IF EXISTS table_out;
SELECT madlib.mean_squared_error( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;



In [ ]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.r2_score( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;



In [39]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.adjusted_r2_score( 'test_set', 'table_out', 'pred', 'obs', 3, 100);
SELECT * FROM table_out;









    



Done.
1 rows affected.
1 rows affected.






    Out[39]:





    
        r2_score
        adjusted_r2_score
    
    
        0.279929088443
        0.257426872457

Binary classification

Create the sample data for binary classifier metrics:



In [40]:

    
%%sql
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set AS
    SELECT ((a*8)::integer)/8.0 pred,   -- prediction probability TRUE
        ((a*0.5+random()*0.5)>0.5) obs  -- actual observations
    FROM (select random() as a from generate_series(1,100)) x;
SELECT * FROM test_set;









    



Done.
100 rows affected.
100 rows affected.






    Out[40]:





    
        pred
        obs
    
    
        0.87500000000000000000
        True
    
    
        0E-20
        False
    
    
        1.00000000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        1.00000000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        1.00000000000000000000
        True
    
    
        0E-20
        False
    
    
        0.87500000000000000000
        True
    
    
        0.50000000000000000000
        False
    
    
        0E-20
        False
    
    
        0.12500000000000000000
        False
    
    
        0E-20
        False
    
    
        0.75000000000000000000
        True
    
    
        0.25000000000000000000
        True
    
    
        0.62500000000000000000
        False
    
    
        0.50000000000000000000
        False
    
    
        1.00000000000000000000
        True
    
    
        0E-20
        False
    
    
        0.87500000000000000000
        True
    
    
        0.62500000000000000000
        False
    
    
        0.37500000000000000000
        False
    
    
        0.12500000000000000000
        False
    
    
        0.87500000000000000000
        True
    
    
        0.75000000000000000000
        True
    
    
        0.37500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.50000000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.75000000000000000000
        False
    
    
        1.00000000000000000000
        True
    
    
        0.50000000000000000000
        False
    
    
        0.87500000000000000000
        True
    
    
        0.62500000000000000000
        True
    
    
        0.37500000000000000000
        False
    
    
        0.87500000000000000000
        True
    
    
        0.75000000000000000000
        True
    
    
        0.62500000000000000000
        True
    
    
        0.75000000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.75000000000000000000
        False
    
    
        0.25000000000000000000
        False
    
    
        0.75000000000000000000
        True
    
    
        0.50000000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.62500000000000000000
        False
    
    
        0.25000000000000000000
        False
    
    
        0.75000000000000000000
        True
    
    
        0.37500000000000000000
        False
    
    
        0.50000000000000000000
        False
    
    
        0.12500000000000000000
        False
    
    
        0.50000000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        0.12500000000000000000
        False
    
    
        0.62500000000000000000
        True
    
    
        0.37500000000000000000
        True
    
    
        0.50000000000000000000
        False
    
    
        0.62500000000000000000
        True
    
    
        0.75000000000000000000
        True
    
    
        0.12500000000000000000
        False
    
    
        0.50000000000000000000
        True
    
    
        1.00000000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        0.62500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        False
    
    
        0.50000000000000000000
        False
    
    
        0.75000000000000000000
        True
    
    
        0.62500000000000000000
        True
    
    
        0.75000000000000000000
        True
    
    
        0.75000000000000000000
        False
    
    
        0.12500000000000000000
        False
    
    
        0.25000000000000000000
        True
    
    
        0.12500000000000000000
        False
    
    
        0.50000000000000000000
        False
    
    
        0.62500000000000000000
        True
    
    
        0.12500000000000000000
        False
    
    
        0.62500000000000000000
        False
    
    
        0.37500000000000000000
        False
    
    
        0.87500000000000000000
        True
    
    
        0.50000000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        0E-20
        False
    
    
        0.75000000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.75000000000000000000
        False
    
    
        0.87500000000000000000
        True
    
    
        0.87500000000000000000
        False
    
    
        0E-20
        False
    
    
        0.37500000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        0.37500000000000000000
        True
    
    
        0.87500000000000000000
        True
    
    
        0.25000000000000000000
        False
    
    
        1.00000000000000000000
        True
    
    
        0.50000000000000000000
        False

Run the Binary Classifier metrics function and View the True Positive Rate and the False Positive Rate:



In [41]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.binary_classifier( 'test_set', 'table_out', 'pred', 'obs');
SELECT threshold, tpr, fpr FROM table_out ORDER BY threshold;









    



Done.
1 rows affected.
9 rows affected.






    Out[41]:





    
        threshold
        tpr
        fpr
    
    
        0E-20
        1.0
        1.0
    
    
        0.12500000000000000000
        1.0
        0.844444444444
    
    
        0.25000000000000000000
        1.0
        0.666666666667
    
    
        0.37500000000000000000
        0.963636363636
        0.488888888889
    
    
        0.50000000000000000000
        0.890909090909
        0.4
    
    
        0.62500000000000000000
        0.8
        0.222222222222
    
    
        0.75000000000000000000
        0.672727272727
        0.133333333333
    
    
        0.87500000000000000000
        0.490909090909
        0.0444444444444
    
    
        1.00000000000000000000
        0.127272727273
        0.0

View all metrics at a given threshold value:



In [ ]:

    
%%sql
SELECT * FROM table_out WHERE threshold=0.5;

Run the Area Under ROC curve function:



In [42]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.area_under_roc( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out;









    



Done.
1 rows affected.
1 rows affected.






    Out[42]:





    
        area_under_roc
    
    
        0.87151515151515151515251515151515151515150

Multi-class classification

Create the sample data for confusion matrix.



In [ ]:

    
%%sql
DROP TABLE IF EXISTS test_set;
CREATE TABLE test_set AS
    SELECT (x+y)%5+1 AS pred,
        (x*y)%5 AS obs
    FROM generate_series(1,5) x,
        generate_series(1,5) y;
SELECT * FROM test_set;



In [ ]:

    
%%sql
DROP TABLE IF EXISTS table_out;
SELECT madlib.confusion_matrix( 'test_set', 'table_out', 'pred', 'obs');
SELECT * FROM table_out ORDER BY class;

pred	obs
0.87500000000000000000	True
0E-20	False
1.00000000000000000000	True
0.87500000000000000000	True
0.87500000000000000000	True
0.87500000000000000000	True
1.00000000000000000000	True
0.25000000000000000000	False
1.00000000000000000000	True
0E-20	False
0.87500000000000000000	True
0.50000000000000000000	False
0E-20	False
0.12500000000000000000	False
0E-20	False
0.75000000000000000000	True
0.25000000000000000000	True
0.62500000000000000000	False
0.50000000000000000000	False
1.00000000000000000000	True
0E-20	False
0.87500000000000000000	True
0.62500000000000000000	False
0.37500000000000000000	False
0.12500000000000000000	False
0.87500000000000000000	True
0.75000000000000000000	True
0.37500000000000000000	True
0.87500000000000000000	True
0.50000000000000000000	True
0.87500000000000000000	True
0.75000000000000000000	False
1.00000000000000000000	True
0.50000000000000000000	False
0.87500000000000000000	True
0.62500000000000000000	True
0.37500000000000000000	False
0.87500000000000000000	True
0.75000000000000000000	True
0.62500000000000000000	True
0.75000000000000000000	True
0.87500000000000000000	True
0.75000000000000000000	False
0.25000000000000000000	False
0.75000000000000000000	True
0.50000000000000000000	True
0.87500000000000000000	True
0.87500000000000000000	True
0.62500000000000000000	False
0.25000000000000000000	False
0.75000000000000000000	True
0.37500000000000000000	False
0.50000000000000000000	False
0.12500000000000000000	False
0.50000000000000000000	True
0.25000000000000000000	False
0.12500000000000000000	False
0.62500000000000000000	True
0.37500000000000000000	True
0.50000000000000000000	False
0.62500000000000000000	True
0.75000000000000000000	True
0.12500000000000000000	False
0.50000000000000000000	True
1.00000000000000000000	True
0.25000000000000000000	False
0.62500000000000000000	True
0.87500000000000000000	True
0.87500000000000000000	False
0.50000000000000000000	False
0.75000000000000000000	True
0.62500000000000000000	True
0.75000000000000000000	True
0.75000000000000000000	False
0.12500000000000000000	False
0.25000000000000000000	True
0.12500000000000000000	False
0.50000000000000000000	False
0.62500000000000000000	True
0.12500000000000000000	False
0.62500000000000000000	False
0.37500000000000000000	False
0.87500000000000000000	True
0.50000000000000000000	True
0.25000000000000000000	False
0E-20	False
0.75000000000000000000	True
0.87500000000000000000	True
0.87500000000000000000	True
0.75000000000000000000	False
0.87500000000000000000	True
0.87500000000000000000	False
0E-20	False
0.37500000000000000000	True
0.25000000000000000000	False
0.37500000000000000000	True
0.87500000000000000000	True
0.25000000000000000000	False
1.00000000000000000000	True
0.50000000000000000000	False

threshold	tpr	fpr
0E-20	1.0	1.0
0.12500000000000000000	1.0	0.844444444444
0.25000000000000000000	1.0	0.666666666667
0.37500000000000000000	0.963636363636	0.488888888889
0.50000000000000000000	0.890909090909	0.4
0.62500000000000000000	0.8	0.222222222222
0.75000000000000000000	0.672727272727	0.133333333333
0.87500000000000000000	0.490909090909	0.0444444444444
1.00000000000000000000	0.127272727273	0.0