blob: c39e92c0ec89067e9d6095e7365d6c87732577e9 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_pythonudf_module_vector") {
// Test vectorized Python UDF using module mode with pandas.Series
def pyPath = """${context.file.parent}/udf_scripts/python_udf_vector_ops.zip"""
scp_udf_file_to_all_be(pyPath)
def runtime_version = "3.10.12"
log.info("Python module path: ${pyPath}".toString())
try {
// Create test table
sql """ DROP TABLE IF EXISTS vector_module_test_table; """
sql """
CREATE TABLE vector_module_test_table (
id INT,
int_a INT,
int_b INT,
double_a DOUBLE,
double_b DOUBLE,
string_a STRING,
string_b STRING,
bool_a BOOLEAN,
bool_b BOOLEAN
) ENGINE=OLAP
DUPLICATE KEY(id)
DISTRIBUTED BY HASH(id) BUCKETS 1
PROPERTIES("replication_num" = "1");
"""
sql """
INSERT INTO vector_module_test_table VALUES
(1, 10, 20, 1.5, 2.5, 'hello world', 'python udf', true, true),
(2, 30, 15, 3.5, 4.5, 'foo bar', 'test case', false, true),
(3, 50, 50, 5.5, 2.0, 'data science', 'machine learning', true, false),
(4, 5, 25, 7.5, 1.5, 'apache doris', 'database system', false, false),
(5, 100, 10, 9.5, 3.5, 'vector operations', 'pandas series', true, true);
"""
// Test 1: Vector addition with constant
sql """ DROP FUNCTION IF EXISTS py_vec_add_const(INT, INT); """
sql """
CREATE FUNCTION py_vec_add_const(INT, INT)
RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_add_with_constant",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_add_const """
SELECT
id,
int_a,
int_b,
py_vec_add_const(int_a, int_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 2: Vector multiplication and rounding
sql """ DROP FUNCTION IF EXISTS py_vec_multiply_round(DOUBLE, DOUBLE); """
sql """
CREATE FUNCTION py_vec_multiply_round(DOUBLE, DOUBLE)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_multiply_and_round",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_multiply_round """
SELECT
id,
double_a,
double_b,
py_vec_multiply_round(double_a, double_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 3: Vector string concatenation with separator
sql """ DROP FUNCTION IF EXISTS py_vec_concat_sep(STRING, STRING); """
sql """
CREATE FUNCTION py_vec_concat_sep(STRING, STRING)
RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_string_concat_with_separator",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_concat_sep """
SELECT
id,
string_a,
string_b,
py_vec_concat_sep(string_a, string_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 4: Vector string title case
sql """ DROP FUNCTION IF EXISTS py_vec_title_case(STRING); """
sql """
CREATE FUNCTION py_vec_title_case(STRING)
RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_string_title_case",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_title_case """
SELECT
id,
string_a,
py_vec_title_case(string_a) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 5: Vector conditional value (max of two values)
sql """ DROP FUNCTION IF EXISTS py_vec_conditional(INT, INT); """
sql """
CREATE FUNCTION py_vec_conditional(INT, INT)
RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_conditional_value",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_conditional """
SELECT
id,
int_a,
int_b,
py_vec_conditional(int_a, int_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 6: Vector percentage calculation
sql """ DROP FUNCTION IF EXISTS py_vec_percentage(DOUBLE, DOUBLE); """
sql """
CREATE FUNCTION py_vec_percentage(DOUBLE, DOUBLE)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_percentage_calculation",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_percentage """
SELECT
id,
double_a,
double_b,
py_vec_percentage(double_a, double_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 7: Vector range check
sql """ DROP FUNCTION IF EXISTS py_vec_in_range(INT, INT, INT); """
sql """
CREATE FUNCTION py_vec_in_range(INT, INT, INT)
RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_is_in_range",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_in_range """
SELECT
id,
int_a,
py_vec_in_range(int_a, 10, 50) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 8: Vector safe division
sql """ DROP FUNCTION IF EXISTS py_vec_safe_div(DOUBLE, DOUBLE); """
sql """
CREATE FUNCTION py_vec_safe_div(DOUBLE, DOUBLE)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_safe_divide",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_safe_div """
SELECT
id,
double_a,
double_b,
py_vec_safe_div(double_a, double_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 9: Vector exponential decay
sql """ DROP FUNCTION IF EXISTS py_vec_exp_decay(DOUBLE, INT); """
sql """
CREATE FUNCTION py_vec_exp_decay(DOUBLE, INT)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_exponential_decay",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_exp_decay """
SELECT
id,
double_a,
int_a,
py_vec_exp_decay(double_a, int_a) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 10: Vector string extract first word
sql """ DROP FUNCTION IF EXISTS py_vec_first_word(STRING); """
sql """
CREATE FUNCTION py_vec_first_word(STRING)
RETURNS STRING
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_string_extract_first_word",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_first_word """
SELECT
id,
string_a,
py_vec_first_word(string_a) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 11: Vector absolute difference
sql """ DROP FUNCTION IF EXISTS py_vec_abs_diff(INT, INT); """
sql """
CREATE FUNCTION py_vec_abs_diff(INT, INT)
RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_abs_difference",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_abs_diff """
SELECT
id,
int_a,
int_b,
py_vec_abs_diff(int_a, int_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 12: Vector power operation
sql """ DROP FUNCTION IF EXISTS py_vec_power(DOUBLE, DOUBLE); """
sql """
CREATE FUNCTION py_vec_power(DOUBLE, DOUBLE)
RETURNS DOUBLE
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_power",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_power """
SELECT
id,
double_a,
py_vec_power(double_a, 2.0) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 13: Vector boolean AND operation
sql """ DROP FUNCTION IF EXISTS py_vec_bool_and(BOOLEAN, BOOLEAN); """
sql """
CREATE FUNCTION py_vec_bool_and(BOOLEAN, BOOLEAN)
RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_boolean_and",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_bool_and """
SELECT
id,
bool_a,
bool_b,
py_vec_bool_and(bool_a, bool_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 14: Vector boolean OR operation
sql """ DROP FUNCTION IF EXISTS py_vec_bool_or(BOOLEAN, BOOLEAN); """
sql """
CREATE FUNCTION py_vec_bool_or(BOOLEAN, BOOLEAN)
RETURNS BOOLEAN
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_boolean_or",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_bool_or """
SELECT
id,
bool_a,
bool_b,
py_vec_bool_or(bool_a, bool_b) AS result
FROM vector_module_test_table
ORDER BY id;
"""
// Test 15: Vector clip values
sql """ DROP FUNCTION IF EXISTS py_vec_clip(INT, INT, INT); """
sql """
CREATE FUNCTION py_vec_clip(INT, INT, INT)
RETURNS INT
PROPERTIES (
"type" = "PYTHON_UDF",
"file" = "file://${pyPath}",
"symbol" = "python_udf_vector_ops.vec_clip_values",
"runtime_version" = "${runtime_version}",
"always_nullable" = "true"
);
"""
qt_vec_clip """
SELECT
id,
int_a,
py_vec_clip(int_a, 20, 60) AS result
FROM vector_module_test_table
ORDER BY id;
"""
} finally {
try_sql("DROP FUNCTION IF EXISTS py_vec_add_const(INT, INT);")
try_sql("DROP FUNCTION IF EXISTS py_vec_multiply_round(DOUBLE, DOUBLE);")
try_sql("DROP FUNCTION IF EXISTS py_vec_concat_sep(STRING, STRING);")
try_sql("DROP FUNCTION IF EXISTS py_vec_title_case(STRING);")
try_sql("DROP FUNCTION IF EXISTS py_vec_conditional(INT, INT);")
try_sql("DROP FUNCTION IF EXISTS py_vec_percentage(DOUBLE, DOUBLE);")
try_sql("DROP FUNCTION IF EXISTS py_vec_in_range(INT, INT, INT);")
try_sql("DROP FUNCTION IF EXISTS py_vec_safe_div(DOUBLE, DOUBLE);")
try_sql("DROP FUNCTION IF EXISTS py_vec_exp_decay(DOUBLE, INT);")
try_sql("DROP FUNCTION IF EXISTS py_vec_first_word(STRING);")
try_sql("DROP FUNCTION IF EXISTS py_vec_abs_diff(INT, INT);")
try_sql("DROP FUNCTION IF EXISTS py_vec_power(DOUBLE, DOUBLE);")
try_sql("DROP FUNCTION IF EXISTS py_vec_bool_and(BOOLEAN, BOOLEAN);")
try_sql("DROP FUNCTION IF EXISTS py_vec_bool_or(BOOLEAN, BOOLEAN);")
try_sql("DROP FUNCTION IF EXISTS py_vec_clip(INT, INT, INT);")
try_sql("DROP TABLE IF EXISTS vector_module_test_table;")
}
}