blob: f7429f61bdb22a5be015b9771f2cf4b98060efeb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_pythonudf_performance") {
// Test Python UDF performance and correctness with large data volumes
def runtime_version = "3.10.12"
try {
// Create simple Python UDF
sql """ DROP FUNCTION IF EXISTS py_perf_double(INT); """
sql """
CREATE FUNCTION py_perf_double(INT)
RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "${runtime_version}"
)
AS \$\$
def evaluate(x):
if x is None:
return None
return x * 2
\$\$;
"""
sql """ DROP FUNCTION IF EXISTS py_perf_concat(STRING, STRING); """
sql """
CREATE FUNCTION py_perf_concat(STRING, STRING)
RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "${runtime_version}"
)
AS \$\$
def evaluate(s1, s2):
if s1 is None or s2 is None:
return None
return s1 + '_' + s2
\$\$;
"""
// Create test table
sql """ DROP TABLE IF EXISTS performance_test_table; """
sql """
CREATE TABLE performance_test_table (
id INT,
value INT,
category STRING,
amount DOUBLE
) ENGINE=OLAP
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 4
PROPERTIES("replication_num" = "1");
"""
// Insert test data (large data volume)
sql """
INSERT INTO performance_test_table
SELECT
number AS id,
number % 1000 AS value,
CASE
WHEN number % 4 = 0 THEN 'A'
WHEN number % 4 = 1 THEN 'B'
WHEN number % 4 = 2 THEN 'C'
ELSE 'D'
END AS category,
number * 1.5 AS amount
FROM numbers("number" = "10000");
"""
// Test 1: Simple UDF call performance
qt_select_perf_simple """
SELECT COUNT(*) AS total_count
FROM performance_test_table
WHERE py_perf_double(value) > 1000;
"""
// Test 2: UDF performance in aggregate queries
qt_select_perf_aggregate """
SELECT
category,
COUNT(*) AS count,
AVG(py_perf_double(value)) AS avg_doubled_value
FROM performance_test_table
GROUP BY category
ORDER BY category;
"""
// Test 3: Multiple UDFs combined usage
qt_select_perf_multiple_udf """
SELECT
category,
COUNT(*) AS count
FROM performance_test_table
WHERE py_perf_double(value) > 500
GROUP BY category
ORDER BY count DESC
LIMIT 10;
"""
// Test 4: String UDF performance
qt_select_perf_string """
SELECT
category,
COUNT(DISTINCT py_perf_concat(category, CAST(value AS STRING))) AS unique_combinations
FROM performance_test_table
GROUP BY category
ORDER BY category;
"""
// Test 5: UDF with complex calculations
sql """ DROP FUNCTION IF EXISTS py_perf_complex(INT, DOUBLE); """
sql """
CREATE FUNCTION py_perf_complex(INT, DOUBLE)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"symbol" = "evaluate",
"runtime_version" = "${runtime_version}"
)
AS \$\$
def evaluate(v, a):
if v is None or a is None:
return None
result = (v * 1.5 + a * 0.8) / 2.0
return result
\$\$;
"""
qt_select_perf_complex """
SELECT
category,
AVG(py_perf_complex(value, amount)) AS avg_complex_result,
MAX(py_perf_complex(value, amount)) AS max_complex_result,
MIN(py_perf_complex(value, amount)) AS min_complex_result
FROM performance_test_table
GROUP BY category
ORDER BY category;
"""
// Test 6: UDF in nested queries
qt_select_perf_nested """
SELECT
category,
avg_doubled
FROM (
SELECT
category,
AVG(py_perf_double(value)) AS avg_doubled
FROM performance_test_table
GROUP BY category
) t
WHERE avg_doubled > 500
ORDER BY avg_doubled DESC;
"""
// Test 7: Performance test with NULL value handling
sql """ DROP TABLE IF EXISTS performance_null_test; """
sql """
CREATE TABLE performance_null_test (
id INT,
value INT,
nullable_value INT
) ENGINE=OLAP
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 4
PROPERTIES("replication_num" = "1");
"""
sql """
INSERT INTO performance_null_test
SELECT
number AS id,
number % 100 AS value,
CASE WHEN number % 5 = 0 THEN NULL ELSE number % 50 END AS nullable_value
FROM numbers("number" = "5000");
"""
qt_select_perf_null """
SELECT
COUNT(*) AS total,
COUNT(py_perf_double(nullable_value)) AS non_null_count,
AVG(py_perf_double(nullable_value)) AS avg_result
FROM performance_null_test;
"""
// Test 8: Sorting performance
qt_select_perf_order """
SELECT
id,
value,
py_perf_double(value) AS doubled_value
FROM performance_test_table
ORDER BY py_perf_double(value) DESC, id DESC
LIMIT 20;
"""
} finally {
try_sql("DROP FUNCTION IF EXISTS py_perf_double(INT);")
try_sql("DROP FUNCTION IF EXISTS py_perf_concat(STRING, STRING);")
try_sql("DROP FUNCTION IF EXISTS py_perf_complex(INT, DOUBLE);")
try_sql("DROP TABLE IF EXISTS performance_test_table;")
try_sql("DROP TABLE IF EXISTS performance_null_test;")
}
}