In [ ]:
%load_ext sql
In [ ]:
# %sql postgresql://gpdbchina@10.194.10.68:55000/madlib
%sql postgresql://fmcquillan@localhost:5432/madlib
In [36]:
%sql select madlib.version();
Out[36]:
Create a simple dataset to demonstrate a basic pivot:
In [37]:
%%sql
DROP TABLE IF EXISTS pivset CASCADE;
CREATE TABLE pivset(
id INTEGER,
piv INTEGER,
val FLOAT8
);
INSERT INTO pivset VALUES
(0, 10, 1),
(0, 10, 2),
(0, 20, 3),
(1, 20, 4),
(1, 30, 5),
(1, 30, 6),
(1, 10, 7),
(NULL, 10, 8),
(1, NULL, 9),
(1, 10, NULL);
SELECT * FROM pivset ORDER BY id;
Out[37]:
In [38]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset', -- source data
'pivout', -- output data
'id', -- index (rows in the output table)
'piv', -- pivot columns
'val'); -- values to be summarized in the output table
SELECT * FROM pivout ORDER BY id;
Out[38]:
Now let's add some more columns to our data set and create a view:
In [39]:
%%sql
DROP VIEW IF EXISTS pivset_ext;
CREATE VIEW pivset_ext AS
SELECT *,
COALESCE(id + (val / 3)::integer, 0) AS id2,
COALESCE(100*(val / 3)::integer, 0) AS piv2,
COALESCE(val + 10, 0) AS val2
FROM pivset;
SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext
ORDER BY id,id2,piv,piv2,val,val2;
Out[39]:
Let's use a different aggregate function on the view we just created:
In [40]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum');
SELECT * FROM pivout ORDER BY id;
Out[40]:
Now create a custom aggregate. Note that the aggregate must have a strict transition function:
In [41]:
%%sql
DROP FUNCTION IF EXISTS array_add1 (ANYARRAY, ANYELEMENT) CASCADE;
CREATE FUNCTION array_add1(ANYARRAY, ANYELEMENT) RETURNS ANYARRAY AS $$
SELECT $1 || $2
$$ LANGUAGE sql STRICT;
DROP AGGREGATE IF EXISTS array_accum1 (anyelement);
CREATE AGGREGATE array_accum1 (anyelement) (
sfunc = array_add1, -- state transition function
stype = anyarray, -- current internal state of the aggregate (temp variable)
initcond = '{}' -- IC is NULL
);
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1'); -- OK since STRICT
-- SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_agg'); -- this will throw error since not STRICT
SELECT * FROM pivout ORDER BY id;
Out[41]:
Keep null values in the pivot column:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True);
SELECT * FROM pivout ORDER BY id;
Fill null results with a value of interest:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111');
SELECT * FROM pivout ORDER BY id;
Use multiple index columns:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 'val');
SELECT * FROM pivout ORDER BY id,id2;
Use multiple pivot columns:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
SELECT * FROM pivout ORDER BY id;
Use multiple value columns:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
SELECT * FROM pivout ORDER BY id;
Use multiple aggregate functions on the same value column (cross product):
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
SELECT * FROM pivout ORDER BY id;
Use different aggregate functions for different value columns:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
'val=avg, val2=sum');
SELECT * FROM pivout ORDER BY id;
Use multiple aggregate functions for different value columns:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True);
SELECT * FROM pivout ORDER BY id,id2;
Combine all of the options:
In [42]:
%%sql
DROP TABLE IF EXISTS pivout;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True);
SELECT * FROM pivout ORDER BY id,id2;
Out[42]:
Create a dictionary for output column names:
In [ ]:
%%sql
DROP TABLE IF EXISTS pivout, pivout_dictionary;
SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
'val=avg, val2=[avg,sum]', '111', True, True);
SELECT * FROM pivout_dictionary;
In [ ]:
%%sql
SELECT * FROM pivout ORDER BY id,id2;