blob: 8397fca718c73484b45ee6237ee6296202f45bfb [file] [log] [blame]
/* ----------------------------------------------------------------------- */
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* @file path.sql_in
*
* @brief SQL functions for pathing functions
* @date Sep 2015
*
*/
/* ----------------------------------------------------------------------- */
DROP TABLE IF EXISTS "Weblog", "Path_output", "Path_output_tuples" CASCADE;
CREATE TABLE "Weblog" (event_timestamp TIMESTAMP,
user_id INT,
age_group INT,
income_group INT,
gender TEXT,
region TEXT,
household_size INT,
"Click_event" INT,
purchase_event INT,
revenue FLOAT,
"Margin" FLOAT);
INSERT INTO "Weblog" VALUES
(to_timestamp('04/14/2012 23:43:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 1, 1, 112, 36),
(to_timestamp('04/14/2012 23:56:00', 'MM/DD/YYYY HH24:MI:SS'), 101881, 2, 4, 'Male', 'West', 5, 0, 0, 0, 0),
(to_timestamp('04/15/2012 01:04:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 0, 0, 0, 0),
(to_timestamp('04/15/2012 01:15:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 0, 0, 0, 0),
(to_timestamp('04/15/2012 02:53:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 1, 1, 117, 28),
(to_timestamp('04/15/2012 04:11:00', 'MM/DD/YYYY HH24:MI:SS'), 103711, 4, 3, 'Female', 'Central', 5, 0, 0, 0, 0),
(to_timestamp('04/15/2012 04:25:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 1, 1, 91, 28),
(to_timestamp('04/15/2012 06:26:00', 'MM/DD/YYYY HH24:MI:SS'), 102871, 3, 4, 'Female', 'Central', 5, 0, 0, 0, 0),
(to_timestamp('04/15/2012 06:32:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 0, 0, 0, 0),
(to_timestamp('04/15/2012 07:02:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 1, 1, 118, 39),
(to_timestamp('04/15/2012 08:51:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 0, 0, 0, 0),
(to_timestamp('04/15/2012 09:28:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 1, 103, 32),
(to_timestamp('04/15/2012 10:19:00', 'MM/DD/YYYY HH24:MI:SS'), 103711, 4, 3, 'Female', 'Central', 5, 0, 0, 0, 0),
(to_timestamp('04/15/2012 11:40:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 0, 0, 0, 0),
(to_timestamp('04/15/2012 12:58:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 1, 148, 23),
(to_timestamp('04/15/2012 14:18:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 1, 113, 29),
(to_timestamp('04/15/2012 22:20:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 1, 108, 38),
(to_timestamp('04/15/2012 23:13:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 0, 0, 0, 0),
(to_timestamp('04/15/2012 23:14:00', 'MM/DD/YYYY HH24:MI:SS'), 103711, 4, 3, 'Female', 'Central', 5, 0, 0, 0, 0),
(to_timestamp('04/16/2012 01:55:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 0, 0, 0, 0),
(to_timestamp('04/16/2012 02:12:00', 'MM/DD/YYYY HH24:MI:SS'), 100821, 1, 4, 'Unknown', 'West', 3, 1, 1, 153, 26),
(to_timestamp('04/16/2012 04:20:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 0, 0, 0, 0),
(to_timestamp('04/16/2012 05:38:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 0, 0, 0),
(to_timestamp('04/16/2012 05:44:00', 'MM/DD/YYYY HH24:MI:SS'), 102201, 3, 3, 'Female', 'East', 3, 1, 0, 0, 0),
(to_timestamp('04/16/2012 05:59:00', 'MM/DD/YYYY HH24:MI:SS'), 102871, 3, 4, 'Female', 'Central', 5, 1, 0, 0, 0),
(to_timestamp('04/16/2012 09:35:00', 'MM/DD/YYYY HH24:MI:SS'), 102871, 3, 4, 'Female', 'Central', 5, 1, 0, 0, 0),
(to_timestamp('04/16/2012 10:40:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 0, 0, 0, 0),
(to_timestamp('04/16/2012 14:23:00', 'MM/DD/YYYY HH24:MI:SS'), 102871, 3, 4, 'Female', 'Central', 5, 0, 0, 0, 0),
(to_timestamp('04/16/2012 20:46:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 1, 1, 131, 28),
(to_timestamp('04/16/2012 21:11:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 1, 1, 127, 27),
(to_timestamp('04/16/2012 22:35:00', 'MM/DD/YYYY HH24:MI:SS'), 101121, 2, 2, 'Unknown', 'West', 4, 0, 0, 0, 0),
(to_timestamp('04/16/2012 23:51:00', 'MM/DD/YYYY HH24:MI:SS'), 101881, 2, 4, 'Male', 'West', 5, 0, 0, 0, 0),
(to_timestamp('04/16/2012 23:55:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 0, 0, 0, 0),
(to_timestamp('04/16/2012 23:56:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 1, 0, 0, 0),
(to_timestamp('04/16/2012 23:57:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 1, 1, 456, 77);
/*
SELECT * FROM weblog ORDER BY event_timestamp ASC;
*/
SELECT path(
'"Weblog"', -- Name of the table
'"Path_output"', -- Table name to store the path results
'user_id, age_group > 1, income_group > 1', -- Partition expression to group the data table
'event_timestamp ASC', -- Order expression to sort the tuples of the data table
'I:="Click_event"=0 AND purchase_event=0, Click:="Click_event"=1 AND purchase_event=0, Conv:=purchase_event=1', -- Definition of various symbols used in the pattern definition
'I(click){1}(CONV){1}', -- Definition of the path pattern to search for
'COUNT(*)' -- Aggregate/window functions to be applied on the matched paths
,TRUE
);
------------------------------------------------------------
SELECT assert(relative_error(array_agg(count), ARRAY[3, 3]) < 1e-6,
'wrong results in path')
FROM "Path_output";
SELECT * FROM "Path_output_tuples";
SELECT * FROM "Path_output";
DROP TABLE "Path_output", "Path_output_tuples";
SELECT path(
'"Weblog"', -- Name of the table
'"Path_output"', -- Table name to store the path results
NULL, -- Partition expression to group the data table
'event_timestamp ASC', -- Order expression to sort the tuples of the data table
'I:="Click_event"=0 AND purchase_event=0, Click:="Click_event"=1 AND purchase_event=0, Conv:=purchase_event=1', -- Definition of various symbols used in the pattern definition
'I(click){1}(CONV){1}', -- Definition of the path pattern to search for
'COUNT(*) as count' -- Aggregate/window functions to be applied on the matched paths
,TRUE
);
------------------------------------------------------------
SELECT assert(count::integer=3::integer, 'wrong results in path')
FROM "Path_output";
INSERT INTO "Weblog" VALUES
(to_timestamp('04/15/2012 02:15:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 0, 0, 0, 0),
(to_timestamp('04/15/2012 02:59:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 1, 0, 0, 0),
(to_timestamp('04/15/2012 04:32:00', 'MM/DD/YYYY HH24:MI:SS'), 101331, 2, 4, 'Female', 'East', 5, 1, 1, 112, 36);
DROP TABLE "Path_output", "Path_output_tuples";
SELECT path(
'"Weblog"', -- Name of the table
'"Path_output"', -- Table name to store the path results
'user_id', -- Partition expression to group the data table
'event_timestamp ASC', -- Order expression to sort the tuples of the data table
'I:="Click_event"=0 AND purchase_event=0, Click_event:="Click_event"=1 AND purchase_event=0, Conv:=purchase_event=1', -- Definition of various symbols used in the pattern definition
'I(click_event){1}(CONV){1}', -- Definition of the path pattern to search for
'SUM("Margin") as sum_of_margin, SUM(revenue) as sum_of_revenue', -- Aggregate/window functions to be applied on the matched paths
TRUE
);
------------------------------------------------------------
SELECT * FROM "Path_output_tuples";
SELECT * FROM "Path_output";