blob: 0366798f389e7ea94c6dd9c8d93fe8f0e370ad5e [file] [log] [blame]
--
-- Setup external tables to allow:
-- * "reading" from the database to parallel streams
-- * "writing" into the database from parallel streams
create extension if not exists plpython3u;
--
-- This table is our example database table
--
DROP TABLE IF EXISTS example CASCADE;
CREATE TABLE example(
id varchar(10),
name varchar(20),
value1 float8,
value2 float8,
value3 float8,
value4 float8
)
WITH (appendonly = true) DISTRIBUTED BY (id);
--
-- Define our binary formatter function
--
CREATE OR REPLACE FUNCTION formatter_export(record) RETURNS bytea
AS '$libdir/gpformatter.so', 'formatter_export'
LANGUAGE C IMMUTABLE;
CREATE OR REPLACE FUNCTION formatter_import() RETURNS record
AS '$libdir/gpformatter.so', 'formatter_import'
LANGUAGE C IMMUTABLE;
--
-- Setup an external table to read from this (e.g, have the database WRITE out)
--
DROP EXTERNAL TABLE IF EXISTS example_out;
CREATE WRITABLE EXTERNAL WEB TABLE example_out(like example)
EXECUTE '$GPHOME/bin/sassender localhost 2000' FORMAT 'CUSTOM' (FORMATTER='formatter_export') DISTRIBUTED BY (id);
--
-- Setup an external table to write into the table (e.g. have the database read in new rows)
--
DROP EXTERNAL TABLE IF EXISTS example_in;
CREATE EXTERNAL WEB TABLE example_in(like example)
EXECUTE '$GPHOME/bin/sasreceiver localhost 2000' ON ALL FORMAT 'CUSTOM' (FORMATTER='formatter_import');
---
--- DATA GENERATION
---
CREATE OR REPLACE FUNCTION dat_gen(c int) RETURNS SETOF example as $$
import random, math
random.seed(1)
alphabet = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
for x in range(c):
id = ''.join(random.sample(alphabet, int(random.random()*7+3)))
name = ''.join(random.sample(alphabet, int(random.random()*12+8)))
value1 = random.random() * 100 + 20
value2 = value1 - 10 + random.random()*math.log(value1)*5
value3 = (value1 * value2) / (value1 + value2)**2
value4 = math.log(value1) + random.random()*3
yield {'id': id, 'name': name, 'value1': value1, 'value2': value2, 'value3': value3, 'value4': value4}
$$ language plpython3u;
-- Add 1K records on each segment
\echo 'loading data...'
INSERT INTO example SELECT (a).* FROM (SELECT dat_gen(1000) a from gp_dist_random('gp_id')) q;
select 'loaded ' || pg_size_pretty(pg_relation_size('example')) || ' of data' as "data loaded";
-- Done with the data generation function
DROP FUNCTION dat_gen(int);