blob: 46c353bb3fc2552c42bdd40ef2a44fdf4fb879fb [file]
CREATE EXTENSION bloom;
CREATE TABLE tst (
i int4,
t text
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (col1 = 3);
ALTER INDEX bloomidx SET (length=80);
SET enable_seqscan=on;
SET enable_bitmapscan=off;
SET enable_indexscan=off;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
SET enable_seqscan=off;
SET enable_bitmapscan=on;
SET enable_indexscan=on;
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7;
QUERY PLAN
------------------------------------------------
Aggregate
-> Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on tst
Filter: (i = 7)
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE t = '5';
QUERY PLAN
------------------------------------------------
Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on tst
Filter: (t = '5'::text)
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
QUERY PLAN
-----------------------------------------------------
Aggregate
-> Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on tst
Filter: ((i = 7) AND (t = '5'::text))
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
SELECT count(*) FROM tst WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
DELETE FROM tst;
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
VACUUM ANALYZE tst;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
DELETE FROM tst WHERE i > 1 OR t = '5';
VACUUM tst;
INSERT INTO tst SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
VACUUM FULL tst;
SELECT count(*) FROM tst WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tst WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tst WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
-- Try an unlogged table too
CREATE UNLOGGED TABLE tstu (
i int4,
t text
);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Apache Cloudberry data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
INSERT INTO tstu SELECT i%10, substr(md5(i::text), 1, 1) FROM generate_series(1,2000) i;
CREATE INDEX bloomidxu ON tstu USING bloom (i, t) WITH (col2 = 4);
SET enable_seqscan=off;
SET enable_bitmapscan=on;
SET enable_indexscan=on;
EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7;
QUERY PLAN
------------------------------------------------
Aggregate
-> Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on tstu
Filter: (i = 7)
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE t = '5';
QUERY PLAN
------------------------------------------------
Aggregate
-> Gather Motion 3:1 (slice1; segments: 3)
-> Seq Scan on tstu
Filter: (t = '5'::text)
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
EXPLAIN (COSTS OFF) SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
QUERY PLAN
-----------------------------------------------------
Aggregate
-> Gather Motion 1:1 (slice1; segments: 1)
-> Seq Scan on tstu
Filter: ((i = 7) AND (t = '5'::text))
Optimizer: Pivotal Optimizer (GPORCA)
(5 rows)
SELECT count(*) FROM tstu WHERE i = 7;
count
-------
200
(1 row)
SELECT count(*) FROM tstu WHERE t = '5';
count
-------
112
(1 row)
SELECT count(*) FROM tstu WHERE i = 7 AND t = '5';
count
-------
13
(1 row)
RESET enable_seqscan;
RESET enable_bitmapscan;
RESET enable_indexscan;
-- Run amvalidator function on our opclasses
SELECT opcname, amvalidate(opc.oid)
FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod
WHERE amname = 'bloom'
ORDER BY 1;
opcname | amvalidate
----------+------------
int4_ops | t
text_ops | t
(2 rows)
--
-- relation options
--
DROP INDEX bloomidx;
CREATE INDEX bloomidx ON tst USING bloom (i, t) WITH (length=7, col1=4);
SELECT reloptions FROM pg_class WHERE oid = 'bloomidx'::regclass;
reloptions
-------------------
{length=7,col1=4}
(1 row)
-- check for min and max values
\set VERBOSITY terse
CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (length=0);
ERROR: value 0 out of bounds for option "length"
CREATE INDEX bloomidx2 ON tst USING bloom (i, t) WITH (col1=0);
ERROR: value 0 out of bounds for option "col1"