src/test/regress/sql/conversion.sql - cloudberry - Git at Google

 --
 -- create user defined conversion
 --

 -- directory paths and dlsuffix are passed to us in environment variables
 \getenv libdir PG_LIBDIR
 \getenv dlsuffix PG_DLSUFFIX

 \set regresslib :libdir '/regress' :dlsuffix

 CREATE FUNCTION test_enc_setup() RETURNS void
     AS :'regresslib', 'test_enc_setup'
     LANGUAGE C STRICT;
 SELECT FROM test_enc_setup();

 CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
     AS :'regresslib', 'test_enc_conversion'
     LANGUAGE C STRICT;

 SELECT FROM test_enc_setup();

 CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
 SET SESSION AUTHORIZATION regress_conversion_user;
 CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
 --
 -- cannot make same name conversion in same schema
 --
 CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
 --
 -- create default conversion with qualified name
 --
 CREATE DEFAULT CONVERSION public.mydef FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
 --
 -- cannot make default conversion with same schema/for_encoding/to_encoding
 --
 CREATE DEFAULT CONVERSION public.mydef2 FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
 -- test comments
 COMMENT ON CONVERSION myconv_bad IS 'foo';
 COMMENT ON CONVERSION myconv IS 'bar';
 COMMENT ON CONVERSION myconv IS NULL;
 --
 -- rename conversion
 --
 ALTER CONVERSION myconv RENAME TO myconv1;
 ALTER CONVERSION myconv1 OWNER TO regress_conversion_user;

 --
 -- list all conversions
 --
 \dc;

 --
 -- drop user defined conversion
 --
 DROP CONVERSION myconv1;
 DROP CONVERSION mydef;
 --
 -- Note: the built-in conversions are exercised in opr_sanity.sql,
 -- so there's no need to do that here.
 --
 --
 -- return to the superuser
 --
 RESET SESSION AUTHORIZATION;
 DROP USER regress_conversion_user;

 --
 -- Test built-in conversion functions.
 --

 -- Helper function to test a conversion. Uses the test_enc_conversion function
 -- that was created in the create_function_0 test.
 create or replace function test_conv(
   input IN bytea,
   src_encoding IN text,
   dst_encoding IN text,

   result OUT bytea,
   errorat OUT bytea,
   error OUT text)
 language plpgsql as
 $$
 declare
   validlen int;
 begin
   -- First try to perform the conversion with noError = false. If that errors out,
   -- capture the error message, and try again with noError = true. The second call
   -- should succeed and return the position of the error, return that too.
   begin
     select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
     errorat = NULL;
     error := NULL;
   exception when others then
     error := sqlerrm;
     select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
     errorat = substr(input, validlen + 1);
   end;
   return;
 end;
 $$;


 --
 -- UTF-8
 --
 -- The description column must be unique.
 CREATE TABLE utf8_verification_inputs (inbytes bytea, description text PRIMARY KEY);
 insert into utf8_verification_inputs  values
   ('\x66006f',	'NUL byte'),
   ('\xaf',		'bare continuation'),
   ('\xc5',		'missing second byte in 2-byte char'),
   ('\xc080',	'smallest 2-byte overlong'),
   ('\xc1bf',	'largest 2-byte overlong'),
   ('\xc280',	'next 2-byte after overlongs'),
   ('\xdfbf',	'largest 2-byte'),
   ('\xe9af',	'missing third byte in 3-byte char'),
   ('\xe08080',	'smallest 3-byte overlong'),
   ('\xe09fbf',	'largest 3-byte overlong'),
   ('\xe0a080',	'next 3-byte after overlong'),
   ('\xed9fbf',	'last before surrogates'),
   ('\xeda080',	'smallest surrogate'),
   ('\xedbfbf',	'largest surrogate'),
   ('\xee8080',	'next after surrogates'),
   ('\xefbfbf',	'largest 3-byte'),
   ('\xf1afbf',	'missing fourth byte in 4-byte char'),
   ('\xf0808080',	'smallest 4-byte overlong'),
   ('\xf08fbfbf',	'largest 4-byte overlong'),
   ('\xf0908080',	'next 4-byte after overlong'),
   ('\xf48fbfbf',	'largest 4-byte'),
   ('\xf4908080',	'smallest too large'),
   ('\xfa9a9a8a8a',	'5-byte');

 -- Test UTF-8 verification slow path
 select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs;

 -- Test UTF-8 verification with ASCII padding appended to provide
 -- coverage for algorithms that work on multiple bytes at a time.
 -- The error message for a sequence starting with a 4-byte lead
 -- will contain all 4 bytes if they are present, so various
 -- expressions below add 3 ASCII bytes to the end to ensure
 -- consistent error messages.
 -- The number 64 below needs to be at least the value of STRIDE_LENGTH in wchar.c.

 -- Test multibyte verification in fast path
 with test_bytes as (
   select
     inbytes,
     description,
     (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from utf8_verification_inputs
 ), test_padded as (
   select
     description,
     (test_conv(inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error
   from test_bytes
 )
 select
   description,
   b.error as orig_error,
   p.error as error_after_padding
 from test_padded p
 join test_bytes b
 using (description)
 where p.error is distinct from b.error
 order by description;

 -- Test ASCII verification in fast path where incomplete
 -- UTF-8 sequences fall at the end of the preceding chunk.
 with test_bytes as (
   select
     inbytes,
     description,
     (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from utf8_verification_inputs
 ), test_padded as (
   select
     description,
     (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error
   from test_bytes
 )
 select
   description,
   b.error as orig_error,
   p.error as error_after_padding
 from test_padded p
 join test_bytes b
 using (description)
 where p.error is distinct from b.error
 order by description;

 -- Test cases where UTF-8 sequences within short text
 -- come after the fast path returns.
 with test_bytes as (
   select
     inbytes,
     description,
     (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from utf8_verification_inputs
 ), test_padded as (
   select
     description,
     (test_conv(repeat('.', 64)::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from test_bytes
 )
 select
   description,
   b.error as orig_error,
   p.error as error_after_padding
 from test_padded p
 join test_bytes b
 using (description)
 where p.error is distinct from b.error
 order by description;

 -- Test cases where incomplete UTF-8 sequences fall at the
 -- end of the part checked by the fast path.
 with test_bytes as (
   select
     inbytes,
     description,
     (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from utf8_verification_inputs
 ), test_padded as (
   select
     description,
     (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
   from test_bytes
 )
 select
   description,
   b.error as orig_error,
   p.error as error_after_padding
 from test_padded p
 join test_bytes b
 using (description)
 where p.error is distinct from b.error
 order by description;

 CREATE TABLE utf8_inputs (inbytes bytea, description text);
 insert into utf8_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\xc3a4c3b6',	'valid, extra latin chars'),
   ('\xd184d0bed0be',	'valid, cyrillic'),
   ('\x666f6fe8b1a1',	'valid, kanji/Chinese'),
   ('\xe382abe3829a',	'valid, two chars that combine to one in EUC_JIS_2004'),
   ('\xe382ab',		'only first half of combined char in EUC_JIS_2004'),
   ('\xe382abe382',	'incomplete combination when converted EUC_JIS_2004'),
   ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
   ('\x666f6fefa8aa',	'valid, needs mapping function to convert to GB18030'),
   ('\x66e8b1ff6f6f',	'invalid byte sequence'),
   ('\x66006f',		'invalid, NUL byte'),
   ('\x666f6fe8b100',	'invalid, NUL byte'),
   ('\x666f6fe8b1',	'incomplete character at end');

 -- Test UTF-8 verification
 select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
 -- Test conversions from UTF-8
 select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
 select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
 select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
 select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
 select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
 select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;

 --
 -- EUC_JIS_2004
 --
 CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
 insert into euc_jis_2004_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\x666f6fbedd',	'valid'),
   ('\xa5f7',		'valid, translates to two UTF-8 chars '),
   ('\xbeddbe',		'incomplete char '),
   ('\x666f6f00bedd',	'invalid, NUL byte'),
   ('\x666f6fbe00dd',	'invalid, NUL byte'),
   ('\x666f6fbedd00',	'invalid, NUL byte'),
   ('\xbe04',		'invalid byte sequence');

 -- Test EUC_JIS_2004 verification
 select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
 -- Test conversions from EUC_JIS_2004
 select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;

 --
 -- SHIFT-JIS-2004
 --
 CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
 insert into shiftjis2004_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\x666f6f8fdb',	'valid'),
   ('\x666f6f81c0',	'valid, no translation to UTF-8'),
   ('\x666f6f82f5',	'valid, translates to two UTF-8 chars '),
   ('\x666f6f8fdb8f',	'incomplete char '),
   ('\x666f6f820a',	'incomplete char, followed by newline '),
   ('\x666f6f008fdb',	'invalid, NUL byte'),
   ('\x666f6f8f00db',	'invalid, NUL byte'),
   ('\x666f6f8fdb00',	'invalid, NUL byte');

 -- Test SHIFT-JIS-2004 verification
 select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
 -- Test conversions from SHIFT-JIS-2004
 select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
 select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;

 --
 -- GB18030
 --
 CREATE TABLE gb18030_inputs (inbytes bytea, description text);
 insert into gb18030_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\x666f6fcff3',	'valid'),
   ('\x666f6f8431a530',	'valid, no translation to UTF-8'),
   ('\x666f6f84309c38',	'valid, translates to UTF-8 by mapping function'),
   ('\x666f6f84309c',	'incomplete char '),
   ('\x666f6f84309c0a',	'incomplete char, followed by newline '),
   ('\x666f6f84',		'incomplete char at end'),
   ('\x666f6f84309c3800', 'invalid, NUL byte'),
   ('\x666f6f84309c0038', 'invalid, NUL byte');

 -- Test GB18030 verification.  Round-trip through text so the backing of the
 -- bytea values is palloc, not shared_buffers.  This lets Valgrind detect
 -- reads past the end.
 select description, inbytes, (test_conv(inbytes::text::bytea, 'gb18030', 'gb18030')).* from gb18030_inputs;
 -- Test conversions from GB18030
 select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;


 --
 -- ISO-8859-5
 --
 CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
 insert into iso8859_5_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\xe4dede',		'valid'),
   ('\x00',		'invalid, NUL byte'),
   ('\xe400dede',	'invalid, NUL byte'),
   ('\xe4dede00',	'invalid, NUL byte');

 -- Test ISO-8859-5 verification
 select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
 -- Test conversions from ISO-8859-5
 select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
 select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
 select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;

 --
 -- Big5
 --
 CREATE TABLE big5_inputs (inbytes bytea, description text);
 insert into big5_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\x666f6fb648',	'valid'),
   ('\x666f6fa27f',	'valid, no translation to UTF-8'),
   ('\x666f6fb60048',	'invalid, NUL byte'),
   ('\x666f6fb64800',	'invalid, NUL byte');

 -- Test Big5 verification
 select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
 -- Test conversions from Big5
 select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
 select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;

 --
 -- MULE_INTERNAL
 --
 CREATE TABLE mic_inputs (inbytes bytea, description text);
 insert into mic_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
   ('\x8bc68bcf8bcf',	'valid (in KOI8R)'),
   ('\x8bc68bcf8b',	'invalid,incomplete char'),
   ('\x92bedd',		'valid (in SHIFT_JIS)'),
   ('\x92be',		'invalid, incomplete char)'),
   ('\x666f6f95a3c1',	'valid (in Big5)'),
   ('\x666f6f95a3',	'invalid, incomplete char'),
   ('\x9200bedd',	'invalid, NUL byte'),
   ('\x92bedd00',	'invalid, NUL byte'),
   ('\x8b00c68bcf8bcf',	'invalid, NUL byte');

 -- Test MULE_INTERNAL verification
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
 -- Test conversions from MULE_INTERNAL
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
 select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
	--
	-- create user defined conversion
	--

	-- directory paths and dlsuffix are passed to us in environment variables
	\getenv libdir PG_LIBDIR
	\getenv dlsuffix PG_DLSUFFIX

	\set regresslib :libdir '/regress' :dlsuffix

	CREATE FUNCTION test_enc_setup() RETURNS void
	AS :'regresslib', 'test_enc_setup'
	LANGUAGE C STRICT;
	SELECT FROM test_enc_setup();

	CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
	AS :'regresslib', 'test_enc_conversion'
	LANGUAGE C STRICT;

	SELECT FROM test_enc_setup();

	CREATE USER regress_conversion_user WITH NOCREATEDB NOCREATEROLE;
	SET SESSION AUTHORIZATION regress_conversion_user;
	CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
	--
	-- cannot make same name conversion in same schema
	--
	CREATE CONVERSION myconv FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
	--
	-- create default conversion with qualified name
	--
	CREATE DEFAULT CONVERSION public.mydef FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
	--
	-- cannot make default conversion with same schema/for_encoding/to_encoding
	--
	CREATE DEFAULT CONVERSION public.mydef2 FOR 'LATIN1' TO 'UTF8' FROM iso8859_1_to_utf8;
	-- test comments
	COMMENT ON CONVERSION myconv_bad IS 'foo';
	COMMENT ON CONVERSION myconv IS 'bar';
	COMMENT ON CONVERSION myconv IS NULL;
	--
	-- rename conversion
	--
	ALTER CONVERSION myconv RENAME TO myconv1;
	ALTER CONVERSION myconv1 OWNER TO regress_conversion_user;

	--
	-- list all conversions
	--
	\dc;

	--
	-- drop user defined conversion
	--
	DROP CONVERSION myconv1;
	DROP CONVERSION mydef;
	--
	-- Note: the built-in conversions are exercised in opr_sanity.sql,
	-- so there's no need to do that here.
	--
	--
	-- return to the superuser
	--
	RESET SESSION AUTHORIZATION;
	DROP USER regress_conversion_user;

	--
	-- Test built-in conversion functions.
	--

	-- Helper function to test a conversion. Uses the test_enc_conversion function
	-- that was created in the create_function_0 test.
	create or replace function test_conv(
	input IN bytea,
	src_encoding IN text,
	dst_encoding IN text,

	result OUT bytea,
	errorat OUT bytea,
	error OUT text)
	language plpgsql as
	$$
	declare
	validlen int;
	begin
	-- First try to perform the conversion with noError = false. If that errors out,
	-- capture the error message, and try again with noError = true. The second call
	-- should succeed and return the position of the error, return that too.
	begin
	select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
	errorat = NULL;
	error := NULL;
	exception when others then
	error := sqlerrm;
	select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
	errorat = substr(input, validlen + 1);
	end;
	return;
	end;
	$$;


	--
	-- UTF-8
	--
	-- The description column must be unique.
	CREATE TABLE utf8_verification_inputs (inbytes bytea, description text PRIMARY KEY);
	insert into utf8_verification_inputs values
	('\x66006f', 'NUL byte'),
	('\xaf', 'bare continuation'),
	('\xc5', 'missing second byte in 2-byte char'),
	('\xc080', 'smallest 2-byte overlong'),
	('\xc1bf', 'largest 2-byte overlong'),
	('\xc280', 'next 2-byte after overlongs'),
	('\xdfbf', 'largest 2-byte'),
	('\xe9af', 'missing third byte in 3-byte char'),
	('\xe08080', 'smallest 3-byte overlong'),
	('\xe09fbf', 'largest 3-byte overlong'),
	('\xe0a080', 'next 3-byte after overlong'),
	('\xed9fbf', 'last before surrogates'),
	('\xeda080', 'smallest surrogate'),
	('\xedbfbf', 'largest surrogate'),
	('\xee8080', 'next after surrogates'),
	('\xefbfbf', 'largest 3-byte'),
	('\xf1afbf', 'missing fourth byte in 4-byte char'),
	('\xf0808080', 'smallest 4-byte overlong'),
	('\xf08fbfbf', 'largest 4-byte overlong'),
	('\xf0908080', 'next 4-byte after overlong'),
	('\xf48fbfbf', 'largest 4-byte'),
	('\xf4908080', 'smallest too large'),
	('\xfa9a9a8a8a', '5-byte');

	-- Test UTF-8 verification slow path
	select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs;

	-- Test UTF-8 verification with ASCII padding appended to provide
	-- coverage for algorithms that work on multiple bytes at a time.
	-- The error message for a sequence starting with a 4-byte lead
	-- will contain all 4 bytes if they are present, so various
	-- expressions below add 3 ASCII bytes to the end to ensure
	-- consistent error messages.
	-- The number 64 below needs to be at least the value of STRIDE_LENGTH in wchar.c.

	-- Test multibyte verification in fast path
	with test_bytes as (
	select
	inbytes,
	description,
	(test_conv(inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from utf8_verification_inputs
	), test_padded as (
	select
	description,
	(test_conv(inbytes \|\| repeat('.', 64)::bytea, 'utf8', 'utf8')).error
	from test_bytes
	)
	select
	description,
	b.error as orig_error,
	p.error as error_after_padding
	from test_padded p
	join test_bytes b
	using (description)
	where p.error is distinct from b.error
	order by description;

	-- Test ASCII verification in fast path where incomplete
	-- UTF-8 sequences fall at the end of the preceding chunk.
	with test_bytes as (
	select
	inbytes,
	description,
	(test_conv(inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from utf8_verification_inputs
	), test_padded as (
	select
	description,
	(test_conv(repeat('.', 64 - length(inbytes))::bytea \|\| inbytes \|\| repeat('.', 64)::bytea, 'utf8', 'utf8')).error
	from test_bytes
	)
	select
	description,
	b.error as orig_error,
	p.error as error_after_padding
	from test_padded p
	join test_bytes b
	using (description)
	where p.error is distinct from b.error
	order by description;

	-- Test cases where UTF-8 sequences within short text
	-- come after the fast path returns.
	with test_bytes as (
	select
	inbytes,
	description,
	(test_conv(inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from utf8_verification_inputs
	), test_padded as (
	select
	description,
	(test_conv(repeat('.', 64)::bytea \|\| inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from test_bytes
	)
	select
	description,
	b.error as orig_error,
	p.error as error_after_padding
	from test_padded p
	join test_bytes b
	using (description)
	where p.error is distinct from b.error
	order by description;

	-- Test cases where incomplete UTF-8 sequences fall at the
	-- end of the part checked by the fast path.
	with test_bytes as (
	select
	inbytes,
	description,
	(test_conv(inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from utf8_verification_inputs
	), test_padded as (
	select
	description,
	(test_conv(repeat('.', 64 - length(inbytes))::bytea \|\| inbytes \|\| repeat('.', 3)::bytea, 'utf8', 'utf8')).error
	from test_bytes
	)
	select
	description,
	b.error as orig_error,
	p.error as error_after_padding
	from test_padded p
	join test_bytes b
	using (description)
	where p.error is distinct from b.error
	order by description;

	CREATE TABLE utf8_inputs (inbytes bytea, description text);
	insert into utf8_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\xc3a4c3b6', 'valid, extra latin chars'),
	('\xd184d0bed0be', 'valid, cyrillic'),
	('\x666f6fe8b1a1', 'valid, kanji/Chinese'),
	('\xe382abe3829a', 'valid, two chars that combine to one in EUC_JIS_2004'),
	('\xe382ab', 'only first half of combined char in EUC_JIS_2004'),
	('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
	('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
	('\x666f6fefa8aa', 'valid, needs mapping function to convert to GB18030'),
	('\x66e8b1ff6f6f', 'invalid byte sequence'),
	('\x66006f', 'invalid, NUL byte'),
	('\x666f6fe8b100', 'invalid, NUL byte'),
	('\x666f6fe8b1', 'incomplete character at end');

	-- Test UTF-8 verification
	select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
	-- Test conversions from UTF-8
	select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
	select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
	select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
	select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
	select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
	select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;

	--
	-- EUC_JIS_2004
	--
	CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
	insert into euc_jis_2004_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\x666f6fbedd', 'valid'),
	('\xa5f7', 'valid, translates to two UTF-8 chars '),
	('\xbeddbe', 'incomplete char '),
	('\x666f6f00bedd', 'invalid, NUL byte'),
	('\x666f6fbe00dd', 'invalid, NUL byte'),
	('\x666f6fbedd00', 'invalid, NUL byte'),
	('\xbe04', 'invalid byte sequence');

	-- Test EUC_JIS_2004 verification
	select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
	-- Test conversions from EUC_JIS_2004
	select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;

	--
	-- SHIFT-JIS-2004
	--
	CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
	insert into shiftjis2004_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\x666f6f8fdb', 'valid'),
	('\x666f6f81c0', 'valid, no translation to UTF-8'),
	('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
	('\x666f6f8fdb8f', 'incomplete char '),
	('\x666f6f820a', 'incomplete char, followed by newline '),
	('\x666f6f008fdb', 'invalid, NUL byte'),
	('\x666f6f8f00db', 'invalid, NUL byte'),
	('\x666f6f8fdb00', 'invalid, NUL byte');

	-- Test SHIFT-JIS-2004 verification
	select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
	-- Test conversions from SHIFT-JIS-2004
	select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
	select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;

	--
	-- GB18030
	--
	CREATE TABLE gb18030_inputs (inbytes bytea, description text);
	insert into gb18030_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\x666f6fcff3', 'valid'),
	('\x666f6f8431a530', 'valid, no translation to UTF-8'),
	('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
	('\x666f6f84309c', 'incomplete char '),
	('\x666f6f84309c0a', 'incomplete char, followed by newline '),
	('\x666f6f84', 'incomplete char at end'),
	('\x666f6f84309c3800', 'invalid, NUL byte'),
	('\x666f6f84309c0038', 'invalid, NUL byte');

	-- Test GB18030 verification. Round-trip through text so the backing of the
	-- bytea values is palloc, not shared_buffers. This lets Valgrind detect
	-- reads past the end.
	select description, inbytes, (test_conv(inbytes::text::bytea, 'gb18030', 'gb18030')).* from gb18030_inputs;
	-- Test conversions from GB18030
	select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;


	--
	-- ISO-8859-5
	--
	CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
	insert into iso8859_5_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\xe4dede', 'valid'),
	('\x00', 'invalid, NUL byte'),
	('\xe400dede', 'invalid, NUL byte'),
	('\xe4dede00', 'invalid, NUL byte');

	-- Test ISO-8859-5 verification
	select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
	-- Test conversions from ISO-8859-5
	select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
	select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
	select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;

	--
	-- Big5
	--
	CREATE TABLE big5_inputs (inbytes bytea, description text);
	insert into big5_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\x666f6fb648', 'valid'),
	('\x666f6fa27f', 'valid, no translation to UTF-8'),
	('\x666f6fb60048', 'invalid, NUL byte'),
	('\x666f6fb64800', 'invalid, NUL byte');

	-- Test Big5 verification
	select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
	-- Test conversions from Big5
	select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
	select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;

	--
	-- MULE_INTERNAL
	--
	CREATE TABLE mic_inputs (inbytes bytea, description text);
	insert into mic_inputs values
	('\x666f6f', 'valid, pure ASCII'),
	('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
	('\x8bc68bcf8b', 'invalid,incomplete char'),
	('\x92bedd', 'valid (in SHIFT_JIS)'),
	('\x92be', 'invalid, incomplete char)'),
	('\x666f6f95a3c1', 'valid (in Big5)'),
	('\x666f6f95a3', 'invalid, incomplete char'),
	('\x9200bedd', 'invalid, NUL byte'),
	('\x92bedd00', 'invalid, NUL byte'),
	('\x8b00c68bcf8bcf', 'invalid, NUL byte');

	-- Test MULE_INTERNAL verification
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
	-- Test conversions from MULE_INTERNAL
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
	select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;