src/ports/postgres/modules/utilities/pivot.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
  *
  * @file pivot.sql_in
  *
  * @brief SQL functions for pivoting
  * @date June 2016
  *
  * @sa Creates a pivot table for data summarization.
  *
  *//* ----------------------------------------------------------------------- */

 m4_include(`SQLCommon.m4')

 /**
 @addtogroup grp_pivot

 <div class="toc"><b>Contents</b>
 <ul>
 <li><a href="#pivoting">Pivoting</a></li>
 <li><a href="#notes">Notes</a></li>
 <li><a href="#examples">Examples</a></li>
 <li><a href="#literature">Literature</a></li>
 </ul>
 </div>

 @brief Pivoting and data summarization tools for preparing data
 for modeling operations.

 @anchor pivoting
 The goal of the MADlib pivot function is to provide a data summarization tool
 that can do basic OLAP type operations on data stored in one table and output
 the summarized data to a second table.


 <pre class="syntax">
 pivot(
 	source_table,
     output_table,
     index,
     pivot_cols,
     pivot_values,
     aggregate_func,
     fill_value,
     keep_null,
     output_col_dictionary,
     output_type
     )
 </pre>
 \b Arguments
 <dl class="arglist">
     <dt>source_table</dt>
     <dd>VARCHAR. Name of the source table (or view) containing data to
     pivot.</dd>

     <dt>output_table</dt>
     <dd>VARCHAR. Name of output table that contains the pivoted data.
     The output table contains all the columns present in
     the <em>'index'</em> column list, plus additional columns for each
     distinct value in <em>'pivot_cols'</em>.

     @note The names of the columns in the output table are auto-generated.
     Please see the examples section below to see how this works in practice.
     The convention used is to concatenate the following strings and separate
     each by an underscore '_' :
     - name of the value column <em>'pivot_values'</em>
     - aggregate function
     - name of the pivot column <em>'pivot_cols'</em>
     - values in the pivot column
     </dd>

     <dt>index </dt>
     <dd>VARCHAR. Comma-separated columns that will form the index of the output
     pivot table.  By index we mean the values to group by; these are the rows
     in the output pivot table.</dd>

     <dt>pivot_cols </dt>
     <dd>VARCHAR. Comma-separated columns that will form the columns of the
     output pivot table.</dd>

     <dt>pivot_values </dt>
     <dd>VARCHAR. Comma-separated columns that contain the values to be
     summarized in the output pivot table.</dd>

     <dt>aggregate_func (optional)</dt>
     <dd>VARCHAR. default: 'AVG'. A comma-separated list of aggregates to be
     applied to values. These can be PostgreSQL built-in aggregates [1] or UDAs. It is
     possible to assign a set of aggregates per value column. Please refer to the
      examples 12\-14 below for syntax details.</dd>

      @note  Only aggregates with
      strict transition functions are permitted here.
      A strict transition function means rows with null values are ignored;
      the function is not called and the previous state value is retained.
      If you need some other behavior for null inputs, this should
      be done prior to calling the pivot function.
      Aggregates with strict transition
      functions are described in [2,3].

     <dt>fill_value (optional)</dt>
     <dd>VARCHAR. default: NULL. If specified, determines how to fill NULL
     values resulting from pivot operation. This is a global parameter (not
     applied per aggregate) and is applied post-aggregation to the output
     table.</dd>

     <dt>keep_null (optional)</dt>
     <dd>BOOLEAN. default: FALSE. If TRUE, then pivot columns are created
     corresponding to NULL categories. If FALSE, then no pivot columns will be
     created for NULL categories.</dd>

     <dt>output_col_dictionary (optional)</dt>
     <dd>BOOLEAN. default: FALSE. This parameter is used to handle
     auto-generated column names that exceed the PostgreSQL limit of 63 bytes
     (could be a common occurrence). If TRUE, column names will be set as
     numerical IDs and will create a dictionary table called output_table
     appended with _dictionary. If FALSE, will auto-generate column names in
     the usual way unless the limit of 63 bytes will be exceeded. In this case,
     a dictionary output file will be created and a message given to the user.
     </dd>

     <dt>output_type (optional)</dt>
     <dd>VARCHAR. default: 'column'.  This parameter controls the output format
     of the pivoted variables. If 'column', a column is created for each pivot
     variable. PostgreSQL limits the number of columns in a table
     (250 - 1600 depending on column types).
     If the total number of output columns exceeds this limit, then make this
     parameter either 'array' (to combine the output columns into an array) or
     'svec' (to cast the array output to <em>'madlib.svec'</em> type).
     If you have an 'aggregate_func' that has an array return type,
     it cannot be combined with 'output_type'='array' or 'svec'.

     A dictionary will be created (<em>output_col_dictionary=TRUE</em>)
     when 'output_type' is 'array' or 'svec' to define each index into the array.
     </dd>

 </dl>

 @anchor notes
 @note
 - NULLs in the index column are treated like any other value.
 - NULLs in the pivot column are ignored unless keep_null is TRUE.
 - Only strict transition functions are
 allowed so NULLs are ignored.
 - It is not allowed to set the fill_value parameter without setting the
 aggregate_func parameter due to possible ambiguity. Set
 aggregate_func to NULL for the default behavior and use fill_value as desired.
 Please note that full_value must be of the same type as the output of the
 aggregate_func (or capable of being cast to the same type by PostgreSQL),
 or else an error will result.
 - It is not allowed to set the output_col_dictionary parameter without setting
 the keep_null parameter due to possible ambiguity. Set
 keep_null to NULL for the default behavior and use output_col_dictionary as
 desired.
 - Expressions (instead of column names) are not supported. Create a view with the
 desired expressions and pass it as the input table (see example 3 below).
 - It is allowed to pass a partial mapping for the aggregate_func parameter. The
 missing value columns will be aggregated using the default function (average).

 @anchor examples
 @examp

 -# Create a simple dataset to demonstrate a basic pivot:
 <pre class="example">
 DROP TABLE IF EXISTS pivset CASCADE; -- View below may depend on table so use CASCADE
 CREATE TABLE pivset(
                   id INTEGER,
                   piv INTEGER,
                   val FLOAT8
                 );
 INSERT INTO pivset VALUES
 	(0, 10, 1),
 	(0, 10, 2),
 	(0, 20, 3),
 	(1, 20, 4),
 	(1, 30, 5),
 	(1, 30, 6),
 	(1, 10, 7),
 	(NULL, 10, 8),
 	(1, NULL, 9),
 	(1, 10, NULL);
 </pre>

 -# Pivot the table:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset', 'pivout', 'id', 'piv', 'val');
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
  id | val_avg_piv_10 | val_avg_piv_20 | val_avg_piv_30
 ----+----------------+----------------+----------------
   0 |            1.5 |              3 |
   1 |              7 |              4 |            5.5
     |              8 |                |
 </pre>
 Here NULL is showing as an empty cell in the output.

 -# Now let's add some more columns to our data set and create a view:
 <pre class="example">
 DROP VIEW IF EXISTS pivset_ext;
 CREATE VIEW pivset_ext AS
     SELECT *,
     COALESCE(id + (val / 3)::integer, 0) AS id2,
     COALESCE(100*(val / 3)::integer, 0) AS piv2,
     COALESCE(val + 10, 0) AS val2
    FROM pivset;
 SELECT id,id2,piv,piv2,val,val2 FROM pivset_ext
 ORDER BY id,id2,piv,piv2,val,val2;
 </pre>
 <pre class="result">
  id | id2 | piv | piv2 | val | val2
 ----+-----+-----+------+-----+------
   0 |   0 |  10 |    0 |   1 |   11
   0 |   1 |  10 |  100 |   2 |   12
   0 |   1 |  20 |  100 |   3 |   13
   1 |   0 |  10 |    0 |     |    0
   1 |   2 |  20 |  100 |   4 |   14
   1 |   3 |  10 |  200 |   7 |   17
   1 |   3 |  30 |  200 |   5 |   15
   1 |   3 |  30 |  200 |   6 |   16
   1 |   4 |     |  300 |   9 |   19
     |   0 |  10 |  300 |   8 |   18
 (10 rows)
 </pre>

 -# Let's use a different aggregate function on the view we
 just created:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum');
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
  id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30
 ----+----------------+----------------+----------------
   0 |              3 |              3 |
   1 |              7 |              4 |             11
     |              8 |                |
 </pre>

 -# Now create a custom aggregate.  Note that the aggregate
 must have a strict transition function:
 <pre class="example">
 DROP FUNCTION IF EXISTS array_add1 (ANYARRAY, ANYELEMENT) CASCADE;
 CREATE FUNCTION array_add1(ANYARRAY, ANYELEMENT) RETURNS ANYARRAY AS $$
   SELECT $1 || $2
 $$ LANGUAGE sql STRICT;
 DROP AGGREGATE IF EXISTS array_accum1 (anyelement);
 CREATE AGGREGATE array_accum1 (anyelement) (
     sfunc = array_add1,
     stype = anyarray,
     initcond = '{}'
 );
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'array_accum1');
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
   id | val_array_accum1_piv_10 | val_array_accum1_piv_20 | val_array_accum1_piv_30
 ----+-------------------------+-------------------------+-------------------------
   0 | {1,2}                   | {3}                     | {}
   1 | {7}                     | {4}                     | {5,6}
     | {8}                     | {}                      | {}
 </pre>

 -# Keep null values in the pivot column:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', True);
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
  id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30 | val_sum_piv_null
 ----+----------------+----------------+----------------+------------------
   0 |              3 |              3 |                |
   1 |              7 |              4 |             11 |                9
     |              8 |                |                |
 </pre>

 -# Fill null results with a value of interest:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'sum', '111');
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
  id | val_sum_piv_10 | val_sum_piv_20 | val_sum_piv_30
 ----+----------------+----------------+----------------
   0 |              3 |              3 |            111
   1 |              7 |              4 |             11
     |              8 |            111 |            111
 </pre>

 -# Use multiple index columns:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id,id2', 'piv', 'val');
 SELECT * FROM pivout ORDER BY id,id2;
 </pre>
 <pre class="result">
  id | id2 | val_avg_piv_10 | val_avg_piv_20 | val_avg_piv_30
 ----+-----+----------------+----------------+----------------
   0 |   0 |              1 |                |
   0 |   1 |              2 |              3 |
   1 |   0 |                |                |
   1 |   2 |                |              4 |
   1 |   3 |              7 |                |            5.5
   1 |   4 |                |                |
     |   0 |              8 |                |
 </pre>

 -# Use multiple pivot columns with columnar output:
 <pre class="example">
 \\x on
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val');
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]-----------+----
 id                      | 0
 val_avg_piv_10_piv2_0   | 1
 val_avg_piv_10_piv2_100 | 2
 val_avg_piv_10_piv2_200 |
 val_avg_piv_10_piv2_300 |
 val_avg_piv_20_piv2_0   |
 val_avg_piv_20_piv2_100 | 3
 val_avg_piv_20_piv2_200 |
 val_avg_piv_20_piv2_300 |
 val_avg_piv_30_piv2_0   |
 val_avg_piv_30_piv2_100 |
 val_avg_piv_30_piv2_200 |
 val_avg_piv_30_piv2_300 |
 -[ RECORD 2 ]-----------+----
 id                      | 1
 val_avg_piv_10_piv2_0   |
 val_avg_piv_10_piv2_100 |
 val_avg_piv_10_piv2_200 | 7
 val_avg_piv_10_piv2_300 |
 val_avg_piv_20_piv2_0   |
 val_avg_piv_20_piv2_100 | 4
 val_avg_piv_20_piv2_200 |
 val_avg_piv_20_piv2_300 |
 val_avg_piv_30_piv2_0   |
 val_avg_piv_30_piv2_100 |
 val_avg_piv_30_piv2_200 | 5.5
 val_avg_piv_30_piv2_300 |
 ...
 </pre>

 -# Use multiple pivot columns (same as above) with an array output:
 <pre class="example">
 DROP TABLE IF EXISTS pivout, pivout_dictionary;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv, piv2', 'val',
                     NULL, NULL, FALSE, FALSE, 'array');
 \\x off
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
    id   |                          val_avg
 --------+------------------------------------------------------------
       0 | {1,2,NULL,NULL,NULL,3,NULL,NULL,NULL,NULL,NULL,NULL}
       1 | {NULL,NULL,7,NULL,NULL,4,NULL,NULL,NULL,NULL,5.5,NULL}
  [NULL] | {NULL,NULL,NULL,8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
 </pre>
 <pre class="example">
 -- Use the dictionary to understand what each index of an array corresponds to
 SELECT * FROM pivout_dictionary;
 </pre>
 <pre class="result">
  __pivot_cid__ | pval | agg | piv | piv2 |         col_name
 ---------------+------+-----+-----+------+---------------------------
  1             | val  | avg |  10 |    0 | "val_avg_piv_10_piv2_0"
  2             | val  | avg |  10 |  100 | "val_avg_piv_10_piv2_100"
  3             | val  | avg |  10 |  200 | "val_avg_piv_10_piv2_200"
  4             | val  | avg |  10 |  300 | "val_avg_piv_10_piv2_300"
  5             | val  | avg |  20 |    0 | "val_avg_piv_20_piv2_0"
  6             | val  | avg |  20 |  100 | "val_avg_piv_20_piv2_100"
  7             | val  | avg |  20 |  200 | "val_avg_piv_20_piv2_200"
  8             | val  | avg |  20 |  300 | "val_avg_piv_20_piv2_300"
  9             | val  | avg |  30 |    0 | "val_avg_piv_30_piv2_0"
  10            | val  | avg |  30 |  100 | "val_avg_piv_30_piv2_100"
  11            | val  | avg |  30 |  200 | "val_avg_piv_30_piv2_200"
  12            | val  | avg |  30 |  300 | "val_avg_piv_30_piv2_300"
 </pre>

 -# Use multiple value columns:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2');
 \\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]---+-----
 id              | 0
 val_avg_piv_10  | 1.5
 val_avg_piv_20  | 3
 val_avg_piv_30  |
 val2_avg_piv_10 | 11.5
 val2_avg_piv_20 | 13
 val2_avg_piv_30 |
 -[ RECORD 2 ]---+-----
 id              | 1
 val_avg_piv_10  | 7
 val_avg_piv_20  | 4
 val_avg_piv_30  | 5.5
 val2_avg_piv_10 | 8.5
 val2_avg_piv_20 | 14
 val2_avg_piv_30 | 15.5
 ...
 </pre>


 -# Use multiple aggregate functions on the same value column (cross product):
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val', 'avg, sum');
 \\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]--+----
 id             | 0
 val_avg_piv_10 | 1.5
 val_avg_piv_20 | 3
 val_avg_piv_30 |
 val_sum_piv_10 | 3
 val_sum_piv_20 | 3
 val_sum_piv_30 |
 -[ RECORD 2 ]--+----
 id             | 1
 val_avg_piv_10 | 7
 val_avg_piv_20 | 4
 val_avg_piv_30 | 5.5
 val_sum_piv_10 | 7
 val_sum_piv_20 | 4
 val_sum_piv_30 | 11
 ...
 </pre>

 -# Use different aggregate functions for different value columns:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
     'val=avg, val2=sum');
 \\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]---+----
 id              | 0
 val_avg_piv_10  | 1.5
 val_avg_piv_20  | 3
 val_avg_piv_30  |
 val2_sum_piv_10 | 23
 val2_sum_piv_20 | 13
 val2_sum_piv_30 |
 -[ RECORD 2 ]---+----
 id              | 1
 val_avg_piv_10  | 7
 val_avg_piv_20  | 4
 val_avg_piv_30  | 5.5
 val2_sum_piv_10 | 17
 val2_sum_piv_20 | 14
 val2_sum_piv_30 | 31
 ...
 </pre>

 -# Use multiple aggregate functions for different value columns:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id', 'piv', 'val, val2',
     'val=avg, val2=[avg,sum]');
 \\x on
 SELECT * FROM pivout ORDER BY id;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]---+-----
 id              | 0
 val_avg_piv_10  | 1.5
 val_avg_piv_20  | 3
 val_avg_piv_30  |
 val2_avg_piv_10 | 11.5
 val2_avg_piv_20 | 13
 val2_avg_piv_30 |
 val2_sum_piv_10 | 23
 val2_sum_piv_20 | 13
 val2_sum_piv_30 |
 -[ RECORD 2 ]---+-----
 id              | 1
 val_avg_piv_10  | 7
 val_avg_piv_20  | 4
 val_avg_piv_30  | 5.5
 val2_avg_piv_10 | 8.5
 val2_avg_piv_20 | 14
 val2_avg_piv_30 | 15.5
 val2_sum_piv_10 | 17
 val2_sum_piv_20 | 14
 val2_sum_piv_30 | 31
 ...
 </pre>

 -# Combine all of the options:
 <pre class="example">
 DROP TABLE IF EXISTS pivout;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
     'val=avg, val2=[avg,sum]', '111', True);
 \\x on
 SELECT * FROM pivout ORDER BY id,id2;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]--------------+-----
 id                         | 0
 id2                        | 0
 val_avg_piv_null_piv2_0    | 111
 val_avg_piv_null_piv2_100  | 111
 val_avg_piv_null_piv2_200  | 111
 val_avg_piv_null_piv2_300  | 111
 val_avg_piv_10_piv2_0      | 1
 val_avg_piv_10_piv2_100    | 111
 val_avg_piv_10_piv2_200    | 111
 val_avg_piv_10_piv2_300    | 111
 val_avg_piv_20_piv2_0      | 111
 val_avg_piv_20_piv2_100    | 111
 val_avg_piv_20_piv2_200    | 111
 val_avg_piv_20_piv2_300    | 111
 val_avg_piv_30_piv2_0      | 111
 val_avg_piv_30_piv2_100    | 111
 val_avg_piv_30_piv2_200    | 111
 val_avg_piv_30_piv2_300    | 111
 val2_avg_piv_null_piv2_0   | 111
 val2_avg_piv_null_piv2_100 | 111
 val2_avg_piv_null_piv2_200 | 111
 val2_avg_piv_null_piv2_300 | 111
 val2_avg_piv_10_piv2_0     | 11
 val2_avg_piv_10_piv2_100   | 111
 ...
 -[ RECORD 2 ]--------------+-----
 id                         | 0
 id2                        | 1
 val_avg_piv_null_piv2_0    | 111
 val_avg_piv_null_piv2_100  | 111
 val_avg_piv_null_piv2_200  | 111
 val_avg_piv_null_piv2_300  | 111
 val_avg_piv_10_piv2_0      | 111
 val_avg_piv_10_piv2_100    | 2
 val_avg_piv_10_piv2_200    | 111
 val_avg_piv_10_piv2_300    | 111
 val_avg_piv_20_piv2_0      | 111
 val_avg_piv_20_piv2_100    | 3
 val_avg_piv_20_piv2_200    | 111
 val_avg_piv_20_piv2_300    | 111
 val_avg_piv_30_piv2_0      | 111
 val_avg_piv_30_piv2_100    | 111
 val_avg_piv_30_piv2_200    | 111
 val_avg_piv_30_piv2_300    | 111
 val2_avg_piv_null_piv2_0   | 111
 val2_avg_piv_null_piv2_100 | 111
 val2_avg_piv_null_piv2_200 | 111
 val2_avg_piv_null_piv2_300 | 111
 ...
 </pre>

 -# Create a dictionary for output column names:
 <pre class="example">
 DROP TABLE IF EXISTS pivout, pivout_dictionary;
 SELECT madlib.pivot('pivset_ext', 'pivout', 'id, id2', 'piv, piv2', 'val, val2',
     'val=avg, val2=[avg,sum]', '111', True, True);
 \\x off
 SELECT * FROM pivout_dictionary order by __pivot_cid__;
 </pre>
 <pre class="result">
 __pivot_cid__ | pval | agg |  piv   | piv2 |           col_name
 ---------------+------+-----+--------+------+------------------------------
  __p_1__       | val  | avg | [NULL] |    0 | "val_avg_piv_null_piv2_0"
  __p_2__       | val  | avg | [NULL] |  100 | "val_avg_piv_null_piv2_100"
  __p_3__       | val  | avg | [NULL] |  200 | "val_avg_piv_null_piv2_200"
  __p_4__       | val  | avg | [NULL] |  300 | "val_avg_piv_null_piv2_300"
  __p_5__       | val  | avg |     10 |    0 | "val_avg_piv_10_piv2_0"
  __p_6__       | val  | avg |     10 |  100 | "val_avg_piv_10_piv2_100"
  __p_7__       | val  | avg |     10 |  200 | "val_avg_piv_10_piv2_200"
  __p_8__       | val  | avg |     10 |  300 | "val_avg_piv_10_piv2_300"
  __p_9__       | val  | avg |     20 |    0 | "val_avg_piv_20_piv2_0"
  __p_10__      | val  | avg |     20 |  100 | "val_avg_piv_20_piv2_100"
  __p_11__      | val  | avg |     20 |  200 | "val_avg_piv_20_piv2_200"
  __p_12__      | val  | avg |     20 |  300 | "val_avg_piv_20_piv2_300"
  __p_13__      | val  | avg |     30 |    0 | "val_avg_piv_30_piv2_0"
  __p_14__      | val  | avg |     30 |  100 | "val_avg_piv_30_piv2_100"
  __p_15__      | val  | avg |     30 |  200 | "val_avg_piv_30_piv2_200"
  __p_16__      | val  | avg |     30 |  300 | "val_avg_piv_30_piv2_300"
  __p_17__      | val2 | avg | [NULL] |    0 | "val2_avg_piv_null_piv2_0"
  __p_18__      | val2 | avg | [NULL] |  100 | "val2_avg_piv_null_piv2_100"
  __p_19__      | val2 | avg | [NULL] |  200 | "val2_avg_piv_null_piv2_200"
  __p_20__      | val2 | avg | [NULL] |  300 | "val2_avg_piv_null_piv2_300"
  __p_21__      | val2 | avg |     10 |    0 | "val2_avg_piv_10_piv2_0"
 ...
 (48 rows)
 </pre>
 <pre class="example">
 \\x on
 SELECT * FROM pivout ORDER BY id,id2;
 </pre>
 <pre class="result">
 -[ RECORD 1 ]----
 id       | 0
 id2      | 0
 __p_1__  | 111
 __p_2__  | 111
 __p_3__  | 111
 __p_4__  | 111
 __p_5__  | 1
 __p_6__  | 111
 __p_7__  | 111
 __p_8__  | 111
 __p_9__  | 111
 __p_10__ | 111
 __p_11__ | 111
 __p_12__ | 111
 __p_13__ | 111
 ...
 -[ RECORD 2 ]----
 id       | 0
 id2      | 1
 __p_1__  | 111
 __p_2__  | 111
 __p_3__  | 111
 __p_4__  | 111
 __p_5__  | 111
 __p_6__  | 2
 __p_7__  | 111
 __p_8__  | 111
 __p_9__  | 111
 __p_10__ | 3
 __p_11__ | 111
 __p_12__ | 111
 __p_13__ | 111
 ...
 -[ RECORD 3 ]----
 id       | 1
 id2      | 0
 __p_1__  | 111
 __p_2__  | 111
 __p_3__  | 111
 __p_4__  | 111
 __p_5__  | 111
 __p_6__  | 111
 __p_7__  | 111
 __p_8__  | 111
 __p_9__  | 111
 __p_10__ | 111
 __p_11__ | 111
 __p_12__ | 111
 __p_13__ | 111
 ...
 </pre>

 @anchor literature
 @literature

 NOTE: The following links refer to documentation resources for the
 current PostgreSQL database version. Depending upon your database
 platform version, you may need to change "current" references in the
 links to your database version.

 If your database platform uses the Greenplum Database (or related
 variants), please check with the project community and/or your
 database vendor to identify the PostgreSQL version it is based on.

 @anchor svm-lit-1
 [1] https://www.postgresql.org/docs/current/static/functions-aggregate.html

 [2] https://www.postgresql.org/docs/current/static/sql-createaggregate.html

 [3] https://www.postgresql.org/docs/current/static/xaggr.html
 */

 -------------------------------------------------------------------------


 /**
  * @brief Helper function that can be used to pivot tables
  *
  * @param source_table          The original data table
  * @param output_table          The output table that contains the dummy
  *                              variable columns
  * @param index                 The index columns to group by the records by
  * @param pivot_cols            The columns to pivot the table
  * @param pivot_values          The value columns to be summarized in the
  *                              pivoted table
  * @param aggregate_func        The aggregate function to be applied to the
  *                              values
  * @param fill_value            If specified, determines how to fill NULL
  *                              values resulting from pivot operation
  * @param keep_null             The flag for determining how to handle NULL
  *                              values in pivot columns
  * @param output_col_dictionary The flag for enabling the creation of the
  *                              output dictionary for shorter column names
  * @return Void
  *
  */

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table            TEXT,
     output_table            TEXT,
     index                   TEXT,
     pivot_cols              TEXT,
     pivot_values            TEXT,
     aggregate_func          TEXT,
     fill_value              TEXT,
     keep_null               BOOLEAN,
     output_col_dictionary   BOOLEAN,
     output_type             TEXT

 ) RETURNS VOID AS $$
     PythonFunction(utilities, pivot, pivot)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table            TEXT,
     output_table            TEXT,
     index                   TEXT,
     pivot_cols              TEXT,
     pivot_values            TEXT,
     aggregate_func          TEXT,
     fill_value              TEXT,
     keep_null               BOOLEAN,
     output_col_dictionary   BOOLEAN

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT,
     aggregate_func      TEXT,
     fill_value          TEXT,
     keep_null           BOOLEAN

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, $8, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT,
     aggregate_func      TEXT,
     fill_value          TEXT

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, $7, FALSE, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT,
     aggregate_func      TEXT,
     keep_null           BOOLEAN

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, NULL, $7, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');


 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT,
     aggregate_func      TEXT

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, $6, NULL, FALSE, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT,
     keep_null           BOOLEAN

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, NULL, NULL, $6, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     source_table        TEXT,
     output_table        TEXT,
     index               TEXT,
     pivot_cols          TEXT,
     pivot_values        TEXT

 ) RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.pivot($1, $2, $3, $4, $5, NULL, NULL, FALSE, FALSE)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 -- Online help
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot(
     message VARCHAR
 ) RETURNS VARCHAR AS $$
     PythonFunction(utilities, pivot, pivot_help)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');

 --------------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.pivot()
 RETURNS VARCHAR AS $$
     SELECT MADLIB_SCHEMA.pivot('');
 $$ LANGUAGE sql IMMUTABLE
 m4_ifdef(`\_\_HAS_FUNCTION_PROPERTIES\_\_', `CONTAINS SQL', `');
 --------------------------------------------------------------------------------