core/sql/regress/udr/TEST002 - trafodion - Git at Google

 -- @@@ START COPYRIGHT @@@
 --
 -- Licensed to the Apache Software Foundation (ASF) under one
 -- or more contributor license agreements.  See the NOTICE file
 -- distributed with this work for additional information
 -- regarding copyright ownership.  The ASF licenses this file
 -- to you under the Apache License, Version 2.0 (the
 -- "License"); you may not use this file except in compliance
 -- with the License.  You may obtain a copy of the License at
 --
 --   http://www.apache.org/licenses/LICENSE-2.0
 --
 -- Unless required by applicable law or agreed to in writing,
 -- software distributed under the License is distributed on an
 -- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 -- KIND, either express or implied.  See the License for the
 -- specific language governing permissions and limitations
 -- under the License.
 --
 -- @@@ END COPYRIGHT @@@

 set pattern $$QUOTE$$ '''';      -- Needed for metadata insert of DLL path

 -- We may use fake hardware settings to parallelize UDFs
 --control query default ARKCMP_FAKE_HW 'ON';
 --control query default DEF_NUM_SMP_CPUS '4';
 --control query default DEF_NUM_NODES_IN_ACTIVE_CLUSTERS '1';
 --control query default ATTEMPT_ESP_PARALLELISM 'ON';


 obey TEST002(clean_up);
 log LOG002 clear;
 sh rm -f LOG002-SECONDARY;
 obey TEST002(compile_libs);
 obey TEST002(create_tables);
 obey TEST002(register_functions);
 obey TEST002(tests);
 log;
 --obey TEST002(clean_up);
 exit;

 ?section clean_up
 --------------------------------------------------------------------------
 drop table doc_table;
 drop table t002_Timeseries;
 drop table_mapping function timeseries;
 drop table_mapping function tokenizer;
 drop table_mapping function tokenizer1;
 drop library TEST002;

 ?section compile_libs
 --------------------------------------------------------------------------
 log;
 sh rm -f ./TEST002.dll;
 sh sh $$scriptsdir$$/tools/dll-compile.ksh TEST002.cpp
   2>&1 | tee -a LOG002-SECONDARY;
 set pattern $$DLL$$ TEST002.dll;
 log LOG002;

 ?section create_tables
 --------------------------------------------------------------------------
 create table doc_table (a int not null, text_data varchar(4000), primary key (a));
 insert into doc_table values
  (1,'Hello world! Hello world! This is a sample ');

 create table t002_Timeseries(p int,
                      ts timestamp,
                      val1 int,
                      val2 numeric(9,2));
 insert into t002_Timeseries values
  (1, timestamp '2015-01-01 00:00:00',  1,  1.00),
  (1, timestamp '2015-01-01 00:01:00', 60, 60.00),
  (2, timestamp '2015-01-01 00:00:03',  1,  1.00),
  (2, timestamp '2015-01-01 00:01:00', 60, 60.00),
  (3, timestamp '2015-01-01 00:00:00',  1,  1.00),
  (3, timestamp '2015-01-01 00:00:00',  2,  2.00),
  (3, timestamp '2015-01-01 00:00:06',  6,  6.00),
  (3, timestamp '2015-01-01 00:00:07',  7,  6.00),
  (3, timestamp '2015-01-01 00:00:30', 30,  null),
  (3, timestamp '2015-01-01 00:00:35',null,   35),
  (3, timestamp '2015-01-01 00:00:40',null, null),
  (3, timestamp '2015-01-01 00:01:00', 60, 60.00);

 ?section register_functions
 --------------------------------------------------------------------------

 create library TEST002 file $$QUOTE$$ $$REGRRUNDIR$$/$$DLL$$ $$QUOTE$$;

 create table_mapping function tokenizer(pattern char(1))
 returns (outval char(40))
 external name 'TOKENIZER'
 language cpp
 library TEST002;

 -- Register tokenizer1 without column information
 create table_mapping function tokenizer1(pattern char(1))
 returns (outval char(40))
 external name 'TOKENIZER'
 library TEST002;

 ?section tests
 --------------------------------------------------------------------------
 select token, count(*) from
 UDF(tokenizer(TABLE(select text_data from doc_table), ' ')) XO(token)
 group by token order by 2,1;

 prepare s1 from
 select * from
 UDF(tokenizer(TABLE(select text_data from
 doc_table where a < ?p2), cast(?p1 as char(1)))) XO(token);

 set param ?p1 ' ' ;
 set param ?p2 2;
 execute s1 ;

 select token, count(*) from
 UDF(tokenizer(TABLE(select * from (values
 ('This is the first row'),
 ('This is the second row'),
 ('This is the third row'),
 ('This is the fourth row'),
 ('This is the fifth row'),
 ('This is the sixth row'),
 ('This is the seventh row'),
 ('This is the eigth row')
 ) as val(text_data) ), ' ')) XO(token)
 group by token
 order by token;

 select * from
 UDF(tokenizer(TABLE(select text_data from doc_table), 1)) XO(token);


 select * from
 UDF(tokenizer(TABLE(select text_data from doc_table), ' ', 1)) XO(token);

 prepare s1 from
 select token, count(*) from
 UDF(tokenizer1(TABLE(select * from (values
 (trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) || '/udr/romeo1.txt'),
 (trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) || '/udr/romeo2.txt'),
 (trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) || '/udr/romeo3.txt'),
 (trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) || '/udr/romeo4.txt')
 ) as val(text_data) ), ' ')) XO(token)
 group by token
 order by 2,1;

 execute s1 ;

 -- some simple tests for the event_log_reader predefined TMUDF
 -- (result of the UDF is not deterministic)
 select [last 0] * from udf(event_log_reader());
 select [last 0] log_ts + interval '1' day,
                 severity || 'x',
                 substring(component from 1 for 2),
                 node_number + 1,
                 cpu + 2,
                 pin * 4,
                 process_name || query_id,
                 sql_code,
                 message,
                 log_file_node,
                 log_file_name,
                 log_file_line -1,
                 case when parse_status = ' ' then 'ok' else parse_status end
 from udf(event_log_reader('f'));
 select coalesce(min(log_file_node), 0) from udf(event_log_reader('f')) x
 where x.log_file_line <100;

 -- copy a file with known content to the log directory and see whether
 -- we get the correct results
 sh cp $$scriptsdir$$/udr/TEST002.sample_events $$TRAF_HOME$$/logs/master_exec_regr_999_99999.log;

 select count(*) as num_events,
        max(log_file_line) as num_lines,
        count(distinct query_id) num_query_ids,
        sum(case when parse_status = ' ' then 0 else 1 end) num_parse_errors
 from udf(event_log_reader('f'))
 where log_file_name = 'master_exec_regr_999_99999.log';
 -- there are 22 events in 33 lines in this file, with 13 unique query ids and 3 parse errors

 select * from udf(event_log_reader('f'))
 where log_file_name = 'master_exec_regr_999_99999.log'
   and (log_file_line in (1,3,8,16,23) or parse_status <> ' ')
 order by log_file_line;
 -- lines 20, 30 and 31 have parse errors and are also displayed

 sh rm $$TRAF_HOME$$/logs/master_exec_regr_999_99999.log;

 -- some negative test cases
 prepare s from select * from udf(event_log_reader(10));
 -- parameter must be a string
 prepare s from select * from udf(event_log_reader(?));
 -- parameter must be available at compile time
 prepare s from select * from udf(event_log_reader(table(select * from (values (1)) as x)));
 -- table-valued input not allowed
 prepare s from select query_id, log_file_name, parse_status
 from udf(event_log_reader());
 -- log_file_name and parse_status columns not available without 'f' option

 -- some simple tests for the timeseries builtin UDF

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           partition by p
                           order by ts),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL1', 'FC',
                     'VAL1', 'LC',
                     'VAL2', 'FC',
                     'VAL2', 'LC'))
 order by p, slice_time;

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           partition by p
                           order by ts),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL1', 'FCI',
                     'VAL1', 'LCI',
                     'VAL2', 'FCI',
                     'VAL2', 'LCI',
                     'VAL1', 'FL',
                     'VAL1', 'LL',
                     'VAL2', 'FL',
                     'VAL2', 'LL',
                     'VAL1', 'FLi',
                     'VAL1', 'LLi',
                     'VAL2', 'FLi',
                     'VAL2', 'LLi'))
 order by p, slice_time;

 -- JDBC queries through a TMUDF. Note that this requires the Trafodion T4
 -- driver jar to exist in $TRAF_HOME/udr/external_libs.
 sh mkdir -p $$TRAF_HOME$$/udr/external_libs;
 sh cp $$TRAF_HOME$$/export/lib/jdbcT2.jar $$TRAF_HOME$$/udr/external_libs;

 -- with default ports, $$QUOTE$$$$JDBC_T4_URL$$$$QUOTE$$ should be
 -- 'jdbc:t4jdbc://localhost:23400/:'
 prepare s_traf from
 select *
 from udf(jdbc('jdbcT4-${TRAFODION_VER}.jar',
               'org.trafodion.jdbc.t4.T4Driver',
               $$QUOTE$$$$JDBC_T4_URL$$$$QUOTE$$,
               'any', -- no user id
               'any', -- no password
               'source',
               'select * from (values (''Hello'', ''World''), (''Hallo'', ''Welt'')) T(a,b)'));
 execute s_traf;

 -- negative tests

 select * from udf(timeseries());
 -- must have one table-valued input

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           order by p),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL1', 'FC',
                     'VAL1', 'LC',
                     'VAL2', 'FC',
                     'VAL2', 'LC'));
 -- order by must be for a timestamp value

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           order by ts),
                     'SLICE_TIME',
                     5,
                     'VAL1', 'FC',
                     'VAL1', 'LC',
                     'VAL2', 'FC',
                     'VAL2', 'LC'));
 -- time slice width must be an interval

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           partition by p
                           order by ts),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL1', 'AB',
                     'VAL1', 'LC'));
 -- 'AB' is not a valid instruction

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           partition by p
                           order by ts),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL9', 'FC'));
 -- There is no column VAL9

 select *
 from udf(timeseries(table(select * from t002_Timeseries
                           partition by p
                           order by ts),
                     'SLICE_TIME',
                     interval '5' second,
                     'VAL9', 'FC',
                     'VAL1'));
 -- Arguments not in pairs of column names and instructions

 -- eclipse timeseries builtin function with a true UDF
 create table_mapping function timeseries(pattern char(1))
 returns (outval char(40))
 external name 'TOKENIZER'
 library TEST002;

 select token, count(*) from
 UDF(timeseries(TABLE(select text_data from doc_table), ' ')) XO(token)
 group by token order by 2,1;
 -- should do tokenizing, not timeseries

 drop table_mapping function timeseries;
	-- @@@ START COPYRIGHT @@@
	--
	-- Licensed to the Apache Software Foundation (ASF) under one
	-- or more contributor license agreements. See the NOTICE file
	-- distributed with this work for additional information
	-- regarding copyright ownership. The ASF licenses this file
	-- to you under the Apache License, Version 2.0 (the
	-- "License"); you may not use this file except in compliance
	-- with the License. You may obtain a copy of the License at
	--
	-- http://www.apache.org/licenses/LICENSE-2.0
	--
	-- Unless required by applicable law or agreed to in writing,
	-- software distributed under the License is distributed on an
	-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	-- KIND, either express or implied. See the License for the
	-- specific language governing permissions and limitations
	-- under the License.
	--
	-- @@@ END COPYRIGHT @@@

	set pattern $$QUOTE$$ ''''; -- Needed for metadata insert of DLL path

	-- We may use fake hardware settings to parallelize UDFs
	--control query default ARKCMP_FAKE_HW 'ON';
	--control query default DEF_NUM_SMP_CPUS '4';
	--control query default DEF_NUM_NODES_IN_ACTIVE_CLUSTERS '1';
	--control query default ATTEMPT_ESP_PARALLELISM 'ON';


	obey TEST002(clean_up);
	log LOG002 clear;
	sh rm -f LOG002-SECONDARY;
	obey TEST002(compile_libs);
	obey TEST002(create_tables);
	obey TEST002(register_functions);
	obey TEST002(tests);
	log;
	--obey TEST002(clean_up);
	exit;

	?section clean_up
	--------------------------------------------------------------------------
	drop table doc_table;
	drop table t002_Timeseries;
	drop table_mapping function timeseries;
	drop table_mapping function tokenizer;
	drop table_mapping function tokenizer1;
	drop library TEST002;

	?section compile_libs
	--------------------------------------------------------------------------
	log;
	sh rm -f ./TEST002.dll;
	sh sh $$scriptsdir$$/tools/dll-compile.ksh TEST002.cpp
	2>&1 \| tee -a LOG002-SECONDARY;
	set pattern $$DLL$$ TEST002.dll;
	log LOG002;

	?section create_tables
	--------------------------------------------------------------------------
	create table doc_table (a int not null, text_data varchar(4000), primary key (a));
	insert into doc_table values
	(1,'Hello world! Hello world! This is a sample ');

	create table t002_Timeseries(p int,
	ts timestamp,
	val1 int,
	val2 numeric(9,2));
	insert into t002_Timeseries values
	(1, timestamp '2015-01-01 00:00:00', 1, 1.00),
	(1, timestamp '2015-01-01 00:01:00', 60, 60.00),
	(2, timestamp '2015-01-01 00:00:03', 1, 1.00),
	(2, timestamp '2015-01-01 00:01:00', 60, 60.00),
	(3, timestamp '2015-01-01 00:00:00', 1, 1.00),
	(3, timestamp '2015-01-01 00:00:00', 2, 2.00),
	(3, timestamp '2015-01-01 00:00:06', 6, 6.00),
	(3, timestamp '2015-01-01 00:00:07', 7, 6.00),
	(3, timestamp '2015-01-01 00:00:30', 30, null),
	(3, timestamp '2015-01-01 00:00:35',null, 35),
	(3, timestamp '2015-01-01 00:00:40',null, null),
	(3, timestamp '2015-01-01 00:01:00', 60, 60.00);

	?section register_functions
	--------------------------------------------------------------------------

	create library TEST002 file $$QUOTE$$ $$REGRRUNDIR$$/$$DLL$$ $$QUOTE$$;

	create table_mapping function tokenizer(pattern char(1))
	returns (outval char(40))
	external name 'TOKENIZER'
	language cpp
	library TEST002;

	-- Register tokenizer1 without column information
	create table_mapping function tokenizer1(pattern char(1))
	returns (outval char(40))
	external name 'TOKENIZER'
	library TEST002;

	?section tests
	--------------------------------------------------------------------------
	select token, count(*) from
	UDF(tokenizer(TABLE(select text_data from doc_table), ' ')) XO(token)
	group by token order by 2,1;

	prepare s1 from
	select * from
	UDF(tokenizer(TABLE(select text_data from
	doc_table where a < ?p2), cast(?p1 as char(1)))) XO(token);

	set param ?p1 ' ' ;
	set param ?p2 2;
	execute s1 ;

	select token, count(*) from
	UDF(tokenizer(TABLE(select * from (values
	('This is the first row'),
	('This is the second row'),
	('This is the third row'),
	('This is the fourth row'),
	('This is the fifth row'),
	('This is the sixth row'),
	('This is the seventh row'),
	('This is the eigth row')
	) as val(text_data) ), ' ')) XO(token)
	group by token
	order by token;

	select * from
	UDF(tokenizer(TABLE(select text_data from doc_table), 1)) XO(token);


	select * from
	UDF(tokenizer(TABLE(select text_data from doc_table), ' ', 1)) XO(token);

	prepare s1 from
	select token, count(*) from
	UDF(tokenizer1(TABLE(select * from (values
	(trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) \|\| '/udr/romeo1.txt'),
	(trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) \|\| '/udr/romeo2.txt'),
	(trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) \|\| '/udr/romeo3.txt'),
	(trim($$QUOTE$$ $$scriptsdir$$ $$QUOTE$$) \|\| '/udr/romeo4.txt')
	) as val(text_data) ), ' ')) XO(token)
	group by token
	order by 2,1;

	execute s1 ;

	-- some simple tests for the event_log_reader predefined TMUDF
	-- (result of the UDF is not deterministic)
	select [last 0] * from udf(event_log_reader());
	select [last 0] log_ts + interval '1' day,
	severity \|\| 'x',
	substring(component from 1 for 2),
	node_number + 1,
	cpu + 2,
	pin * 4,
	process_name \|\| query_id,
	sql_code,
	message,
	log_file_node,
	log_file_name,
	log_file_line -1,
	case when parse_status = ' ' then 'ok' else parse_status end
	from udf(event_log_reader('f'));
	select coalesce(min(log_file_node), 0) from udf(event_log_reader('f')) x
	where x.log_file_line <100;

	-- copy a file with known content to the log directory and see whether
	-- we get the correct results
	sh cp $$scriptsdir$$/udr/TEST002.sample_events $$TRAF_HOME$$/logs/master_exec_regr_999_99999.log;

	select count(*) as num_events,
	max(log_file_line) as num_lines,
	count(distinct query_id) num_query_ids,
	sum(case when parse_status = ' ' then 0 else 1 end) num_parse_errors
	from udf(event_log_reader('f'))
	where log_file_name = 'master_exec_regr_999_99999.log';
	-- there are 22 events in 33 lines in this file, with 13 unique query ids and 3 parse errors

	select * from udf(event_log_reader('f'))
	where log_file_name = 'master_exec_regr_999_99999.log'
	and (log_file_line in (1,3,8,16,23) or parse_status <> ' ')
	order by log_file_line;
	-- lines 20, 30 and 31 have parse errors and are also displayed

	sh rm $$TRAF_HOME$$/logs/master_exec_regr_999_99999.log;

	-- some negative test cases
	prepare s from select * from udf(event_log_reader(10));
	-- parameter must be a string
	prepare s from select * from udf(event_log_reader(?));
	-- parameter must be available at compile time
	prepare s from select * from udf(event_log_reader(table(select * from (values (1)) as x)));
	-- table-valued input not allowed
	prepare s from select query_id, log_file_name, parse_status
	from udf(event_log_reader());
	-- log_file_name and parse_status columns not available without 'f' option

	-- some simple tests for the timeseries builtin UDF

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	partition by p
	order by ts),
	'SLICE_TIME',
	interval '5' second,
	'VAL1', 'FC',
	'VAL1', 'LC',
	'VAL2', 'FC',
	'VAL2', 'LC'))
	order by p, slice_time;

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	partition by p
	order by ts),
	'SLICE_TIME',
	interval '5' second,
	'VAL1', 'FCI',
	'VAL1', 'LCI',
	'VAL2', 'FCI',
	'VAL2', 'LCI',
	'VAL1', 'FL',
	'VAL1', 'LL',
	'VAL2', 'FL',
	'VAL2', 'LL',
	'VAL1', 'FLi',
	'VAL1', 'LLi',
	'VAL2', 'FLi',
	'VAL2', 'LLi'))
	order by p, slice_time;

	-- JDBC queries through a TMUDF. Note that this requires the Trafodion T4
	-- driver jar to exist in $TRAF_HOME/udr/external_libs.
	sh mkdir -p $$TRAF_HOME$$/udr/external_libs;
	sh cp $$TRAF_HOME$$/export/lib/jdbcT2.jar $$TRAF_HOME$$/udr/external_libs;

	-- with default ports, $$QUOTE$$$$JDBC_T4_URL$$$$QUOTE$$ should be
	-- 'jdbc:t4jdbc://localhost:23400/:'
	prepare s_traf from
	select *
	from udf(jdbc('jdbcT4-${TRAFODION_VER}.jar',
	'org.trafodion.jdbc.t4.T4Driver',
	$$QUOTE$$$$JDBC_T4_URL$$$$QUOTE$$,
	'any', -- no user id
	'any', -- no password
	'source',
	'select * from (values (''Hello'', ''World''), (''Hallo'', ''Welt'')) T(a,b)'));
	execute s_traf;

	-- negative tests

	select * from udf(timeseries());
	-- must have one table-valued input

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	order by p),
	'SLICE_TIME',
	interval '5' second,
	'VAL1', 'FC',
	'VAL1', 'LC',
	'VAL2', 'FC',
	'VAL2', 'LC'));
	-- order by must be for a timestamp value

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	order by ts),
	'SLICE_TIME',
	5,
	'VAL1', 'FC',
	'VAL1', 'LC',
	'VAL2', 'FC',
	'VAL2', 'LC'));
	-- time slice width must be an interval

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	partition by p
	order by ts),
	'SLICE_TIME',
	interval '5' second,
	'VAL1', 'AB',
	'VAL1', 'LC'));
	-- 'AB' is not a valid instruction

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	partition by p
	order by ts),
	'SLICE_TIME',
	interval '5' second,
	'VAL9', 'FC'));
	-- There is no column VAL9

	select *
	from udf(timeseries(table(select * from t002_Timeseries
	partition by p
	order by ts),
	'SLICE_TIME',
	interval '5' second,
	'VAL9', 'FC',
	'VAL1'));
	-- Arguments not in pairs of column names and instructions

	-- eclipse timeseries builtin function with a true UDF
	create table_mapping function timeseries(pattern char(1))
	returns (outval char(40))
	external name 'TOKENIZER'
	library TEST002;

	select token, count(*) from
	UDF(timeseries(TABLE(select text_data from doc_table), ' ')) XO(token)
	group by token order by 2,1;
	-- should do tokenizing, not timeseries

	drop table_mapping function timeseries;