src/test/regress/expected/gpcopy_encoding.out - cloudberry - Git at Google

 --
 -- Test different combinations of client and server encodings with COPY.
 --
 CREATE DATABASE utf8db ENCODING 'utf8' TEMPLATE=template0 LC_COLLATE='C' LC_CTYPE='C';
 CREATE DATABASE latin1db ENCODING 'latin1' TEMPLATE=template0 LC_COLLATE='C' LC_CTYPE='C';
 -- First, connect to the UTF-8 database, and use COPY TO with non-ASCII data.
 -- Use both explicit ENCODING, and client_encoding, to specify the output
 -- encoding.
 \c utf8db
 set client_encoding='utf8';
 CREATE TABLE enctest (t text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 insert into enctest values (chr(196)); -- Latin Capital Letter a with Diaeresis
 -- with UTF-8 as the server encoding, it should be stored as two bytes.
 select octet_length(t) from enctest;
  octet_length
 --------------
             2
 (1 row)

 copy enctest to '/tmp/enctest_utf_to_latin1-1' encoding 'latin1';
 set client_encoding='latin1';
 copy enctest to stdout;
 Ä
 copy enctest to '/tmp/enctest_utf_to_latin1-2';
 -- Connect to 'latin1' database, and load back the files we just created.
 -- This is to check that they were created correctly, and that the ENCODING
 -- option works correctly also in COPY FROM.
 \c latin1db
 CREATE TABLE enctest (t text);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
 set client_encoding='latin1';
 copy enctest from '/tmp/enctest_utf_to_latin1-1';
 copy enctest from '/tmp/enctest_utf_to_latin1-2';
 set client_encoding='utf8';
 copy enctest from '/tmp/enctest_utf_to_latin1-1' encoding 'latin1';
 copy enctest from '/tmp/enctest_utf_to_latin1-2' encoding 'latin1';
 -- with latin1 as the server encoding, the character we used in the tests should be
 -- stored as one byte.
 select octet_length(t) from enctest;
  octet_length
 --------------
             1
             1
             1
             1
 (4 rows)

 select * from enctest;
  t
 ---
  Ã
  Ã
  Ã
  Ã
 (4 rows)

 copy enctest to stdout;
 Ã
 Ã
 Ã
 Ã
 \c regression
 drop database utf8db;
 drop database latin1db;
	--
	-- Test different combinations of client and server encodings with COPY.
	--
	CREATE DATABASE utf8db ENCODING 'utf8' TEMPLATE=template0 LC_COLLATE='C' LC_CTYPE='C';
	CREATE DATABASE latin1db ENCODING 'latin1' TEMPLATE=template0 LC_COLLATE='C' LC_CTYPE='C';
	-- First, connect to the UTF-8 database, and use COPY TO with non-ASCII data.
	-- Use both explicit ENCODING, and client_encoding, to specify the output
	-- encoding.
	\c utf8db
	set client_encoding='utf8';
	CREATE TABLE enctest (t text);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	insert into enctest values (chr(196)); -- Latin Capital Letter a with Diaeresis
	-- with UTF-8 as the server encoding, it should be stored as two bytes.
	select octet_length(t) from enctest;
	octet_length
	--------------
	2
	(1 row)

	copy enctest to '/tmp/enctest_utf_to_latin1-1' encoding 'latin1';
	set client_encoding='latin1';
	copy enctest to stdout;
	Ä
	copy enctest to '/tmp/enctest_utf_to_latin1-2';
	-- Connect to 'latin1' database, and load back the files we just created.
	-- This is to check that they were created correctly, and that the ENCODING
	-- option works correctly also in COPY FROM.
	\c latin1db
	CREATE TABLE enctest (t text);
	NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 't' as the Apache Cloudberry data distribution key for this table.
	HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
	set client_encoding='latin1';
	copy enctest from '/tmp/enctest_utf_to_latin1-1';
	copy enctest from '/tmp/enctest_utf_to_latin1-2';
	set client_encoding='utf8';
	copy enctest from '/tmp/enctest_utf_to_latin1-1' encoding 'latin1';
	copy enctest from '/tmp/enctest_utf_to_latin1-2' encoding 'latin1';
	-- with latin1 as the server encoding, the character we used in the tests should be
	-- stored as one byte.
	select octet_length(t) from enctest;
	octet_length
	--------------
	1
	1
	1
	1
	(4 rows)

	select * from enctest;
	t
	---
	Ã
	Ã
	Ã
	Ã
	(4 rows)

	copy enctest to stdout;
	Ã
	Ã
	Ã
	Ã
	\c regression
	drop database utf8db;
	drop database latin1db;