blob: 2041b1f3b640d8a403880c46aadfb59f7e95ce0b [file] [log] [blame]
---------------------------------------------------------------------------
-- Rules:
-- ------
-- 1) Any DB objects should be created w/o schema prefix,
-- since this file is executed in a separate schema context.
-- 2) There should be no DROP statements in this script, since
-- all objects created in the default schema will be cleaned-up outside.
---------------------------------------------------------------------------
---------------------------------------------------------------------------
-- Setup:
---------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION assoc_array_eq
(
arr1 TEXT[],
arr2 TEXT[]
)
RETURNS BOOL AS $$
SELECT COUNT(*) = array_upper($1, 1) AND array_upper($1, 1) = array_upper($2, 1)
FROM (SELECT unnest($1) id) t1, (SELECT unnest($2) id) t2
WHERE t1.id = t2.id;
$$ LANGUAGE sql IMMUTABLE;
CREATE OR REPLACE FUNCTION install_test() RETURNS VOID AS $$
declare
result1 TEXT;
result2 TEXT;
result3 TEXT;
result4 TEXT;
result5 TEXT;
result_maxiter TEXT;
res MADLIB_SCHEMA.assoc_rules_results;
output_schema TEXT;
output_table TEXT;
total_rules INT;
total_time INTERVAL;
begin
DROP TABLE IF EXISTS test_data1;
CREATE TABLE test_data1 (
trans_id INT
, product INT
);
DROP TABLE IF EXISTS test_data2;
CREATE TABLE test_data2 (
trans_id INT
, product VARCHAR
);
INSERT INTO test_data1 VALUES (1,1);
INSERT INTO test_data1 VALUES (1,2);
INSERT INTO test_data1 VALUES (3,3);
INSERT INTO test_data1 VALUES (8,4);
INSERT INTO test_data1 VALUES (10,1);
INSERT INTO test_data1 VALUES (10,2);
INSERT INTO test_data1 VALUES (10,3);
INSERT INTO test_data1 VALUES (19,2);
INSERT INTO test_data2 VALUES (1, 'beer');
INSERT INTO test_data2 VALUES (1, 'diapers');
INSERT INTO test_data2 VALUES (1, 'chips');
INSERT INTO test_data2 VALUES (2, 'beer');
INSERT INTO test_data2 VALUES (2, 'diapers');
INSERT INTO test_data2 VALUES (3, 'beer');
INSERT INTO test_data2 VALUES (3, 'diapers');
INSERT INTO test_data2 VALUES (4, 'beer');
INSERT INTO test_data2 VALUES (4, 'chips');
INSERT INTO test_data2 VALUES (5, 'beer');
INSERT INTO test_data2 VALUES (6, 'beer');
INSERT INTO test_data2 VALUES (6, 'diapers');
INSERT INTO test_data2 VALUES (6, 'chips');
INSERT INTO test_data2 VALUES (7, 'beer');
INSERT INTO test_data2 VALUES (7, 'diapers');
DROP TABLE IF EXISTS test1_exp_result;
CREATE TABLE test1_exp_result (
ruleid integer,
pre text[],
post text[],
support double precision,
confidence double precision,
lift double precision,
conviction double precision
) ;
DROP TABLE IF EXISTS test2_exp_result;
CREATE TABLE test2_exp_result (
ruleid integer,
pre text[],
post text[],
support double precision,
confidence double precision,
lift double precision,
conviction double precision,
lhs_1d BOOL,
rhs_1d BOOL
) ;
INSERT INTO test1_exp_result VALUES (7, '{3}', '{1}', 0.20000000000000001, 0.5, 1.2499999999999998, 1.2);
INSERT INTO test1_exp_result VALUES (4, '{2}', '{1}', 0.40000000000000002, 0.66666666666666674, 1.6666666666666667, 1.8000000000000003);
INSERT INTO test1_exp_result VALUES (1, '{1}', '{2,3}', 0.20000000000000001, 0.5, 2.4999999999999996, 1.6000000000000001);
INSERT INTO test1_exp_result VALUES (9, '{2,3}', '{1}', 0.20000000000000001, 1, 2.4999999999999996, 0);
INSERT INTO test1_exp_result VALUES (6, '{1,2}', '{3}', 0.20000000000000001, 0.5, 1.2499999999999998, 1.2);
INSERT INTO test1_exp_result VALUES (8, '{3}', '{2}', 0.20000000000000001, 0.5, 0.83333333333333337, 0.80000000000000004);
INSERT INTO test1_exp_result VALUES (5, '{1}', '{2}', 0.40000000000000002, 1, 1.6666666666666667, 0);
INSERT INTO test1_exp_result VALUES (2, '{3}', '{2,1}', 0.20000000000000001, 0.5, 1.2499999999999998, 1.2);
INSERT INTO test1_exp_result VALUES (10, '{3,1}', '{2}', 0.20000000000000001, 1, 1.6666666666666667, 0);
INSERT INTO test1_exp_result VALUES (3, '{1}', '{3}', 0.20000000000000001, 0.5, 1.2499999999999998, 1.2);
INSERT INTO test2_exp_result VALUES (7, '{chips,diapers}', '{beer}', 0.2857142857142857, 1, 1, 0, false, true);
INSERT INTO test2_exp_result VALUES (2, '{chips}', '{diapers}', 0.2857142857142857, 0.66666666666666663, 0.93333333333333324, 0.85714285714285698, true, true);
INSERT INTO test2_exp_result VALUES (1, '{chips}', '{diapers,beer}', 0.2857142857142857, 0.66666666666666663, 0.93333333333333324, 0.85714285714285698, true, false);
INSERT INTO test2_exp_result VALUES (6, '{diapers}', '{beer}', 0.7142857142857143, 1, 1, 0, true, true);
INSERT INTO test2_exp_result VALUES (4, '{beer}', '{diapers}', 0.7142857142857143, 0.7142857142857143, 1, 1, true, true);
INSERT INTO test2_exp_result VALUES (3, '{chips,beer}', '{diapers}', 0.2857142857142857, 0.66666666666666663, 0.93333333333333324, 0.85714285714285698, false, true);
INSERT INTO test2_exp_result VALUES (5, '{chips}', '{beer}', 0.42857142857142855, 1, 1, 0, true, true);
res = MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data1','madlib_installcheck_assoc_rules', false);
-- ensure the members in the returned type was not changed by mistake.
output_schema = res.output_schema;
output_table = res.output_table;
total_rules = res.total_rules;
total_time = res.total_time;
SELECT INTO result1 CASE WHEN count(*) = 10 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules t1, test1_exp_result t2
WHERE assoc_array_eq(t1.pre, t2.pre) AND
assoc_array_eq(t1.post, t2.post) AND
abs(t1.support - t2.support) < 1E-10 AND
abs(t1.confidence - t2.confidence) < 1E-10;
IF result1 = 'FAIL' THEN
RAISE EXCEPTION 'Association rules mining failed. No results were returned for result 1.';
END IF;
DROP TABLE IF EXISTS madlib_installcheck_assoc_rules.assoc_rules;
PERFORM MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false);
SELECT INTO result2 CASE WHEN count(*) = 7 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules t1, test2_exp_result t2
WHERE assoc_array_eq(t1.pre, t2.pre) AND
assoc_array_eq(t1.post, t2.post) AND
abs(t1.support - t2.support) < 1E-10 AND
abs(t1.confidence - t2.confidence) < 1E-10;
IF (result2 = 'FAIL') THEN
RAISE EXCEPTION 'Association rules mining failed. No results were returned for result 2.';
END IF;
-- Test for max_RHS_size=2. No rules with RHS greater than 1 item must exist.
DROP TABLE IF EXISTS madlib_installcheck_assoc_rules.assoc_rules;
PERFORM MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, NULL, NULL, 1);
SELECT INTO result3 CASE WHEN count(*) = 6 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules t1, test2_exp_result t2
WHERE assoc_array_eq(t1.pre, t2.pre) AND
assoc_array_eq(t1.post, t2.post) AND
abs(t1.support - t2.support) < 1E-10 AND
abs(t1.confidence - t2.confidence) < 1E-10 AND
rhs_1d=true;
IF result3 = 'FAIL' THEN
RAISE EXCEPTION 'Association rules mining failed. Assertion failed when max_RHS_size=1';
END IF;
-- Test for max_LHS_size=2. No rules with LHS greater than 1 item must exist.
DROP TABLE IF EXISTS madlib_installcheck_assoc_rules.assoc_rules;
PERFORM MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, NULL, 1, NULL);
SELECT INTO result4 CASE WHEN count(*) = 5 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules t1, test2_exp_result t2
WHERE assoc_array_eq(t1.pre, t2.pre) AND
assoc_array_eq(t1.post, t2.post) AND
abs(t1.support - t2.support) < 1E-10 AND
abs(t1.confidence - t2.confidence) < 1E-10 AND
lhs_1d=true;
IF result4 = 'FAIL' THEN
RAISE EXCEPTION 'Association rules mining failed. Assertion failed when max_LHS_size=1';
END IF;
-- Test for max_itemset_size=2. No rules with either LHS or RHS should contain greater than 1 item.
DROP TABLE IF EXISTS madlib_installcheck_assoc_rules.assoc_rules;
PERFORM MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, 2, NULL, NULL);
SELECT INTO result5 CASE WHEN count(*) = 4 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules t1, test2_exp_result t2
WHERE assoc_array_eq(t1.pre, t2.pre) AND
assoc_array_eq(t1.post, t2.post) AND
abs(t1.support - t2.support) < 1E-10 AND
abs(t1.confidence - t2.confidence) < 1E-10 AND
rhs_1d=true AND
lhs_1d=true;
IF result5 = 'FAIL' THEN
RAISE EXCEPTION 'Association rules mining failed. Assertion failed when max_itemset_size=2';
END IF;
DROP TABLE IF EXISTS madlib_installcheck_assoc_rules.assoc_rules;
PERFORM MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, 2);
SELECT INTO result_maxiter CASE WHEN count(*) = 4 then 'PASS' ELSE 'FAIL' END
FROM assoc_rules;
DROP TABLE IF EXISTS test2_exp_result;
DROP TABLE IF EXISTS test1_exp_result;
IF result_maxiter = 'FAIL' THEN
RAISE EXCEPTION 'Association rules mining error when max_iter parameter specified.';
END IF;
RAISE INFO 'Association rules dev check output test cases passed.';
RETURN;
end $$ language plpgsql;
---------------------------------------------------------------------------
-- Test
---------------------------------------------------------------------------
SELECT install_test();
-- Input test cases.
SELECT MADLIB_SCHEMA.assert(MADLIB_SCHEMA.trap_error($TRAP$
SELECT MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, 2, 0, NULL);
$TRAP$) = 1, 'Should error out if max_LHS_size is < 1');
SELECT MADLIB_SCHEMA.assert(MADLIB_SCHEMA.trap_error($TRAP$
SELECT MADLIB_SCHEMA.assoc_rules (.1, .5, 'trans_id', 'product', 'test_data2','madlib_installcheck_assoc_rules', false, NULL, 5, -1);
$TRAP$) = 1, 'Should error out if max_RHS_size is < 1');
DROP TABLE IF EXISTS test_data1;
DROP TABLE IF EXISTS test_data2;