src/test/regress/expected/random.out - cloudberry - Git at Google

 --
 -- RANDOM
 -- Test random() and allies
 --
 -- Tests in this file may have a small probability of failure,
 -- since we are dealing with randomness.  Try to keep the failure
 -- risk for any one test case under 1e-9.
 --
 -- There should be no duplicates in 1000 random() values.
 -- (Assuming 52 random bits in the float8 results, we could
 -- take as many as 3000 values and still have less than 1e-9 chance
 -- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
 SELECT r, count(*)
 FROM (SELECT random() r FROM generate_series(1, 1000)) ss
 GROUP BY r HAVING count(*) > 1;
  r | count
 ---+-------
 (0 rows)

 -- The range should be [0, 1).  We can expect that at least one out of 2000
 -- random values is in the lowest or highest 1% of the range with failure
 -- probability less than about 1e-9.
 SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
        (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
        (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
 FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
  out_of_range | has_small | has_large
 --------------+-----------+-----------
             0 | t         | t
 (1 row)

 -- Check for uniform distribution using the Kolmogorov-Smirnov test.
 CREATE FUNCTION ks_test_uniform_random()
 RETURNS boolean AS
 $$
 DECLARE
   n int := 1000;        -- Number of samples
   c float8 := 1.94947;  -- Critical value for 99.9% confidence
   ok boolean;
 BEGIN
   ok := (
     WITH samples AS (
       SELECT random() r FROM generate_series(1, n) ORDER BY 1
     ), indexed_samples AS (
       SELECT (row_number() OVER())-1.0 i, r FROM samples
     )
     SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
   );
   RETURN ok;
 END
 $$
 LANGUAGE plpgsql;
 -- As written, ks_test_uniform_random() returns true about 99.9%
 -- of the time.  To get down to a roughly 1e-9 test failure rate,
 -- just run it 3 times and accept if any one of them passes.
 SELECT ks_test_uniform_random() OR
        ks_test_uniform_random() OR
        ks_test_uniform_random() AS uniform;
  uniform
 ---------
  t
 (1 row)

 -- now test random_normal()
 -- As above, there should be no duplicates in 1000 random_normal() values.
 SELECT r, count(*)
 FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
 GROUP BY r HAVING count(*) > 1;
  r | count
 ---+-------
 (0 rows)

 -- ... unless we force the range (standard deviation) to zero.
 -- This is a good place to check that the mean input does something, too.
 SELECT r, count(*)
 FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
  r  | count
 ----+-------
  10 |   100
 (1 row)

 SELECT r, count(*)
 FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
 GROUP BY r;
   r  | count
 -----+-------
  -10 |   100
 (1 row)

 -- Check standard normal distribution using the Kolmogorov-Smirnov test.
 CREATE FUNCTION ks_test_normal_random()
 RETURNS boolean AS
 $$
 DECLARE
   n int := 1000;        -- Number of samples
   c float8 := 1.94947;  -- Critical value for 99.9% confidence
   ok boolean;
 BEGIN
   ok := (
     WITH samples AS (
       SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
     ), indexed_samples AS (
       SELECT (row_number() OVER())-1.0 i, r FROM samples
     )
     SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
     FROM indexed_samples
   );
   RETURN ok;
 END
 $$
 LANGUAGE plpgsql;
 -- As above, ks_test_normal_random() returns true about 99.9%
 -- of the time, so try it 3 times and accept if any test passes.
 SELECT ks_test_normal_random() OR
        ks_test_normal_random() OR
        ks_test_normal_random() AS standard_normal;
  standard_normal
 -----------------
  t
 (1 row)

 -- setseed() should produce a reproducible series of random() values.
 SELECT setseed(0.5);
  setseed
 ---------

 (1 row)

 SELECT random() FROM generate_series(1, 10);
        random
 ---------------------
   0.9851677175347999
    0.825301858027981
  0.12974610012450416
  0.16356291958601088
      0.6476186144084
   0.8822771983038762
   0.1404566845227775
  0.15619865764623442
   0.5145227426983392
   0.7712969548127826
 (10 rows)

 -- Likewise for random_normal(); however, since its implementation relies
 -- on libm functions that have different roundoff behaviors on different
 -- machines, we have to round off the results a bit to get consistent output.
 SET extra_float_digits = -1;
 SELECT random_normal() FROM generate_series(1, 10);
    random_normal
 -------------------
   0.20853464493838
   0.26453024054096
  -0.60675246790043
   0.82579942785265
    1.7011161173536
  -0.22344546371619
     0.249712419191
   -1.2494722990669
   0.12562715204368
   0.47539161454401
 (10 rows)

 SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
         r
 ------------------
   1.0060597281173
     1.09685453015
   1.0286920613201
  0.90947567671234
  0.98372476313426
  0.93934454957762
   1.1871350020636
  0.96225768429293
  0.91444120680041
  0.96403105557543
 (10 rows)
	--
	-- RANDOM
	-- Test random() and allies
	--
	-- Tests in this file may have a small probability of failure,
	-- since we are dealing with randomness. Try to keep the failure
	-- risk for any one test case under 1e-9.
	--
	-- There should be no duplicates in 1000 random() values.
	-- (Assuming 52 random bits in the float8 results, we could
	-- take as many as 3000 values and still have less than 1e-9 chance
	-- of failure, per https://en.wikipedia.org/wiki/Birthday_problem)
	SELECT r, count(*)
	FROM (SELECT random() r FROM generate_series(1, 1000)) ss
	GROUP BY r HAVING count(*) > 1;
	r \| count
	---+-------
	(0 rows)

	-- The range should be [0, 1). We can expect that at least one out of 2000
	-- random values is in the lowest or highest 1% of the range with failure
	-- probability less than about 1e-9.
	SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range,
	(count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small,
	(count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large
	FROM (SELECT random() r FROM generate_series(1, 2000)) ss;
	out_of_range \| has_small \| has_large
	--------------+-----------+-----------
	0 \| t \| t
	(1 row)

	-- Check for uniform distribution using the Kolmogorov-Smirnov test.
	CREATE FUNCTION ks_test_uniform_random()
	RETURNS boolean AS
	$$
	DECLARE
	n int := 1000; -- Number of samples
	c float8 := 1.94947; -- Critical value for 99.9% confidence
	ok boolean;
	BEGIN
	ok := (
	WITH samples AS (
	SELECT random() r FROM generate_series(1, n) ORDER BY 1
	), indexed_samples AS (
	SELECT (row_number() OVER())-1.0 i, r FROM samples
	)
	SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples
	);
	RETURN ok;
	END
	$$
	LANGUAGE plpgsql;
	-- As written, ks_test_uniform_random() returns true about 99.9%
	-- of the time. To get down to a roughly 1e-9 test failure rate,
	-- just run it 3 times and accept if any one of them passes.
	SELECT ks_test_uniform_random() OR
	ks_test_uniform_random() OR
	ks_test_uniform_random() AS uniform;
	uniform
	---------
	t
	(1 row)

	-- now test random_normal()
	-- As above, there should be no duplicates in 1000 random_normal() values.
	SELECT r, count(*)
	FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss
	GROUP BY r HAVING count(*) > 1;
	r \| count
	---+-------
	(0 rows)

	-- ... unless we force the range (standard deviation) to zero.
	-- This is a good place to check that the mean input does something, too.
	SELECT r, count(*)
	FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss
	GROUP BY r;
	r \| count
	----+-------
	10 \| 100
	(1 row)

	SELECT r, count(*)
	FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss
	GROUP BY r;
	r \| count
	-----+-------
	-10 \| 100
	(1 row)

	-- Check standard normal distribution using the Kolmogorov-Smirnov test.
	CREATE FUNCTION ks_test_normal_random()
	RETURNS boolean AS
	$$
	DECLARE
	n int := 1000; -- Number of samples
	c float8 := 1.94947; -- Critical value for 99.9% confidence
	ok boolean;
	BEGIN
	ok := (
	WITH samples AS (
	SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1
	), indexed_samples AS (
	SELECT (row_number() OVER())-1.0 i, r FROM samples
	)
	SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n)
	FROM indexed_samples
	);
	RETURN ok;
	END
	$$
	LANGUAGE plpgsql;
	-- As above, ks_test_normal_random() returns true about 99.9%
	-- of the time, so try it 3 times and accept if any test passes.
	SELECT ks_test_normal_random() OR
	ks_test_normal_random() OR
	ks_test_normal_random() AS standard_normal;
	standard_normal
	-----------------
	t
	(1 row)

	-- setseed() should produce a reproducible series of random() values.
	SELECT setseed(0.5);
	setseed
	---------

	(1 row)

	SELECT random() FROM generate_series(1, 10);
	random
	---------------------
	0.9851677175347999
	0.825301858027981
	0.12974610012450416
	0.16356291958601088
	0.6476186144084
	0.8822771983038762
	0.1404566845227775
	0.15619865764623442
	0.5145227426983392
	0.7712969548127826
	(10 rows)

	-- Likewise for random_normal(); however, since its implementation relies
	-- on libm functions that have different roundoff behaviors on different
	-- machines, we have to round off the results a bit to get consistent output.
	SET extra_float_digits = -1;
	SELECT random_normal() FROM generate_series(1, 10);
	random_normal
	-------------------
	0.20853464493838
	0.26453024054096
	-0.60675246790043
	0.82579942785265
	1.7011161173536
	-0.22344546371619
	0.249712419191
	-1.2494722990669
	0.12562715204368
	0.47539161454401
	(10 rows)

	SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10);
	r
	------------------
	1.0060597281173
	1.09685453015
	1.0286920613201
	0.90947567671234
	0.98372476313426
	0.93934454957762
	1.1871350020636
	0.96225768429293
	0.91444120680041
	0.96403105557543
	(10 rows)