[Feat](udf) Support Python UDF for Doris (#57329) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test  - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason  - Behavior changed: - [ ] No. - [ ] Yes.  - Does this need documentation? - [ ] No. - [ ] Yes.  ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label  Co-authored-by: yangshijie <jasonsjyang@tencent.com>

commit: 426abc004959e3149fb8948293ae6a7bf04f1efa [log] [tgz]
author: yangshijie <sjyang2022@zju.edu.cn> Mon Nov 10 18:05:27 2025 +0800
committer: GitHub <noreply@github.com> Mon Nov 10 18:05:27 2025 +0800
tree: 661707d4d1527fa093ae8b0e75402c1e1abecd02
parent: fd47c36ceb87b809db4d58653db9006e1cad7196 [diff]
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index c2ce46b..1aeccc9 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp

@@ -35,6 +35,7 @@
 #include <mutex>
 #include <random>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -45,6 +46,7 @@
 #include "io/fs/file_writer.h"
 #include "io/fs/local_file_system.h"
 #include "util/cpu_info.h"
+#include "util/string_util.h"
 
 namespace doris::config {
 #include "common/compile_check_avoid_begin.h"
@@ -1082,6 +1084,25 @@
 // enable java udf and jdbc scannode
 DEFINE_Bool(enable_java_support, "true");
 
+// enable python udf
+DEFINE_Bool(enable_python_udf_support, "false");
+// python env mode, options: conda, venv
+DEFINE_String(python_env_mode, "");
+// root path of conda runtime, python_env_mode should be conda
+DEFINE_String(python_conda_root_path, "");
+// root path of venv runtime, python_env_mode should be venv
+DEFINE_String(python_venv_root_path, "${DORIS_HOME}/lib/udf/python");
+// python interpreter paths used by venv, e.g. /usr/bin/python3.7:/usr/bin/python3.6
+DEFINE_String(python_venv_interpreter_paths, "");
+// python deps index url
+DEFINE_String(python_deps_index_url, "https://pypi.org/simple/");
+// min number of python process
+DEFINE_Int32(min_python_process_nums, "16");
+// max number of python process
+DEFINE_Int32(max_python_process_nums, "256");
+// timeout in milliseconds when waiting for available python process
+DEFINE_Int32(python_process_pool_wait_timeout_ms, "30000");
+
 // Set config randomly to check more issues in github workflow
 DEFINE_Bool(enable_fuzzy_mode, "false");
 

diff --git a/be/src/common/config.h b/be/src/common/config.h
index df83ae4..0d530ac 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h

@@ -1113,6 +1113,25 @@
 // enable java udf and jdbc scannode
 DECLARE_Bool(enable_java_support);
 
+// enable python udf
+DECLARE_Bool(enable_python_udf_support);
+// python env mode, options: conda, venv
+DECLARE_String(python_env_mode);
+// root path of conda runtime, python_env_mode should be conda
+DECLARE_String(python_conda_root_path);
+// root path of venv runtime, python_env_mode should be venv
+DECLARE_String(python_venv_root_path);
+// python interpreter paths used by venv, e.g. /usr/bin/python3.7:/usr/bin/python3.6
+DECLARE_String(python_venv_interpreter_paths);
+// python deps index url
+DECLARE_String(python_deps_index_url);
+// min number of python process
+DECLARE_Int32(min_python_process_nums);
+// max number of python process
+DECLARE_Int32(max_python_process_nums);
+// timeout in milliseconds when waiting for available python process
+DECLARE_Int32(python_process_pool_wait_timeout_ms);
+
 // Set config randomly to check more issues in github workflow
 DECLARE_Bool(enable_fuzzy_mode);
 

diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index 78bdfa3..a175dd3 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp

@@ -99,6 +99,7 @@
 #include "service/backend_options.h"
 #include "service/backend_service.h"
 #include "service/point_query_executor.h"
+#include "udf/python/python_udf_server.h"
 #include "util/bfd_parser.h"
 #include "util/bit_util.h"
 #include "util/brpc_client_cache.h"
@@ -889,6 +890,7 @@
     _s_tracking_memory = false;
 
     clear_storage_resource();
+    PythonUDFServerManager::instance().shutdown();
     LOG(INFO) << "Doris exec envorinment is destoried.";
 }
 

diff --git a/be/src/runtime/user_function_cache.cpp b/be/src/runtime/user_function_cache.cpp
index ce6453f..d54c2f4 100644
--- a/be/src/runtime/user_function_cache.cpp
+++ b/be/src/runtime/user_function_cache.cpp

@@ -20,6 +20,7 @@
 // IWYU pragma: no_include <bthread/errno.h>
 #include <errno.h> // IWYU pragma: keep
 #include <glog/logging.h>
+#include <minizip/unzip.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -41,6 +42,7 @@
 #include "io/fs/local_file_system.h"
 #include "runtime/exec_env.h"
 #include "runtime/plugin/cloud_plugin_downloader.h"
+#include "util/defer_op.h"
 #include "util/dynamic_util.h"
 #include "util/md5.h"
 #include "util/string_util.h"
@@ -88,6 +90,9 @@
     // And this is used to indicate whether library is downloaded.
     bool is_downloaded = false;
 
+    // Indicate if the zip file is unziped.
+    bool is_unziped = false;
+
     // used to lookup a symbol
     void* lib_handle = nullptr;
 
@@ -144,9 +149,12 @@
         lib_type = LibType::SO;
     } else if (ends_with(file, ".jar")) {
         lib_type = LibType::JAR;
+    } else if (ends_with(file, ".zip") && _check_cache_is_python_udf(dir, file)) {
+        lib_type = LibType::PY_ZIP;
     } else {
         return Status::InternalError(
-                "unknown library file format. the file type is not end with xxx.jar or xxx.so : " +
+                "unknown library file format. the file type is not end with xxx.jar or xxx.so"
+                " or xxx.zip : " +
                 file);
     }
 
@@ -249,15 +257,120 @@
         RETURN_IF_ERROR(_download_lib(url, entry));
     }
 
+    if (!entry->is_unziped && entry->type == LibType::PY_ZIP) {
+        RETURN_IF_ERROR(_unzip_lib(entry->lib_file));
+        entry->lib_file = entry->lib_file.substr(0, entry->lib_file.size() - 4);
+        entry->is_unziped = true;
+    }
+
     if (entry->type == LibType::SO) {
         RETURN_IF_ERROR(_load_cache_entry_internal(entry));
-    } else if (entry->type != LibType::JAR) {
+    } else if (entry->type != LibType::JAR && entry->type != LibType::PY_ZIP) {
         return Status::InvalidArgument(
-                "Unsupported lib type! Make sure your lib type is one of 'so' and 'jar'!");
+                "Unsupported lib type! Make sure your lib type is one of 'so' and 'jar' and "
+                "python 'zip'!");
     }
     return Status::OK();
 }
 
+Status UserFunctionCache::_check_cache_is_python_udf(const std::string& dir,
+                                                     const std::string& file) {
+    const std::string& full_path = dir + "/" + file;
+    RETURN_IF_ERROR(_unzip_lib(full_path));
+    std::string unzip_dir = full_path.substr(0, full_path.size() - 4);
+
+    bool has_python_file = false;
+
+    auto scan_cb = [&has_python_file](const io::FileInfo& file) {
+        if (file.is_file && ends_with(file.file_name, ".py")) {
+            has_python_file = true;
+            return false; // Stop iteration once we find a Python file
+        }
+        return true;
+    };
+    RETURN_IF_ERROR(io::global_local_filesystem()->iterate_directory(unzip_dir, scan_cb));
+    if (!has_python_file) {
+        return Status::InternalError("No Python file found in the unzipped directory.");
+    }
+    return Status::OK();
+}
+
+Status UserFunctionCache::_unzip_lib(const std::string& zip_file) {
+    std::string unzip_dir = zip_file.substr(0, zip_file.size() - 4);
+    RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(unzip_dir));
+
+    unzFile zip_file_handle = unzOpen(zip_file.c_str());
+    if (zip_file_handle == nullptr) {
+        return Status::InternalError("Failed to open zip file: " + zip_file);
+    }
+
+    Defer defer([&] { unzClose(zip_file_handle); });
+
+    unz_global_info global_info;
+    if (unzGetGlobalInfo(zip_file_handle, &global_info) != UNZ_OK) {
+        return Status::InternalError("Failed to get global info from zip file: " + zip_file);
+    }
+
+    for (uLong i = 0; i < global_info.number_entry; ++i) {
+        unz_file_info file_info;
+        char filename[256];
+        if (unzGetCurrentFileInfo(zip_file_handle, &file_info, filename, sizeof(filename), nullptr,
+                                  0, nullptr, 0) != UNZ_OK) {
+            return Status::InternalError("Failed to get file info from zip file: " + zip_file);
+        }
+
+        if (std::string(filename).find("__MACOSX") != std::string::npos) {
+            if ((i + 1) < global_info.number_entry) {
+                if (unzGoToNextFile(zip_file_handle) != UNZ_OK) {
+                    return Status::InternalError("Failed to go to next file in zip: " + zip_file);
+                }
+            }
+            continue;
+        }
+
+        std::string full_filename = unzip_dir + "/" + filename;
+        if (full_filename.length() > PATH_MAX) {
+            return Status::InternalError(
+                    fmt::format("File path {}... is too long, maximum path length is {}",
+                                full_filename.substr(0, 50), PATH_MAX));
+        }
+
+        if (filename[strlen(filename) - 1] == '/') {
+            RETURN_IF_ERROR(io::global_local_filesystem()->create_directory(full_filename));
+        } else {
+            if (unzOpenCurrentFile(zip_file_handle) != UNZ_OK) {
+                return Status::InternalError("Failed to open file in zip: " +
+                                             std::string(filename));
+            }
+
+            FILE* out = fopen(full_filename.c_str(), "wb");
+            if (out == nullptr) {
+                unzCloseCurrentFile(zip_file_handle);
+                return Status::InternalError("Failed to create file: " + full_filename);
+            }
+            char buffer[8192];
+            int bytes_read;
+            while ((bytes_read = unzReadCurrentFile(zip_file_handle, buffer, sizeof(buffer))) > 0) {
+                fwrite(buffer, bytes_read, 1, out);
+            }
+            fclose(out);
+            unzCloseCurrentFile(zip_file_handle);
+            if (bytes_read < 0) {
+                return Status::InternalError("Failed to read file in zip: " +
+                                             std::string(filename));
+            }
+        }
+
+        if ((i + 1) < global_info.number_entry) {
+            if (unzGoToNextFile(zip_file_handle) != UNZ_OK) {
+                return Status::InternalError("Failed to go to next file in zip: " + zip_file);
+            }
+        }
+    }
+
+    return Status::OK();
+}
+
 // entry's lock must be held
 Status UserFunctionCache::_download_lib(const std::string& url,
                                         std::shared_ptr<UserFunctionCacheEntry> entry) {
@@ -348,6 +461,8 @@
     ss << _lib_dir << '/' << shard << '/' << function_id << '.' << checksum;
     if (type == LibType::JAR) {
         ss << '.' << file_name;
+    } else if (type == LibType::PY_ZIP) {
+        ss << '.' << file_name;
     } else {
         ss << ".so";
     }
@@ -362,6 +477,14 @@
     return Status::OK();
 }
 
+Status UserFunctionCache::get_pypath(int64_t fid, const std::string& url,
+                                     const std::string& checksum, std::string* libpath) {
+    std::shared_ptr<UserFunctionCacheEntry> entry = nullptr;
+    RETURN_IF_ERROR(_get_cache_entry(fid, url, checksum, entry, LibType::PY_ZIP));
+    *libpath = entry->lib_file;
+    return Status::OK();
+}
+
 std::vector<std::string> UserFunctionCache::_split_string_by_checksum(const std::string& file) {
     std::vector<std::string> result;
 

diff --git a/be/src/runtime/user_function_cache.h b/be/src/runtime/user_function_cache.h
index f5a04a5..1596f4c 100644
--- a/be/src/runtime/user_function_cache.h
+++ b/be/src/runtime/user_function_cache.h

@@ -43,7 +43,7 @@
 // with id, this function library is valid. And when user wants to
 // change its implementation(URL), Doris will generate a new function
 // id.
-enum class LibType { JAR, SO };
+enum class LibType { JAR, SO, PY_ZIP };
 
 class UserFunctionCache {
 public:
@@ -59,6 +59,9 @@
     Status get_jarpath(int64_t fid, const std::string& url, const std::string& checksum,
                        std::string* libpath);
 
+    Status get_pypath(int64_t fid, const std::string& url, const std::string& checksum,
+                      std::string* libpath);
+
 private:
     Status _load_cached_lib();
     Status _load_entry_from_lib(const std::string& dir, const std::string& file);
@@ -66,6 +69,14 @@
                             std::shared_ptr<UserFunctionCacheEntry>& output_entry, LibType type);
     Status _load_cache_entry(const std::string& url, std::shared_ptr<UserFunctionCacheEntry> entry);
     Status _download_lib(const std::string& url, std::shared_ptr<UserFunctionCacheEntry> entry);
+    /**
+     * Unzip the python udf user file.
+     */
+    Status _unzip_lib(const std::string& file);
+    /**
+     * Check if the cache file is python udf.
+     */
+    Status _check_cache_is_python_udf(const std::string& dir, const std::string& file);
     Status _load_cache_entry_internal(std::shared_ptr<UserFunctionCacheEntry> entry);
 
     std::string _make_lib_file(int64_t function_id, const std::string& checksum, LibType type,

diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 401456c..69154b6 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp

@@ -24,6 +24,7 @@
 // IWYU pragma: no_include <bthread/errno.h>
 #include <errno.h> // IWYU pragma: keep
 #include <fcntl.h>
+#include <fmt/core.h>
 #if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && \
         !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
 #include <gperftools/malloc_extension.h> // IWYU pragma: keep
@@ -76,9 +77,11 @@
 #include "service/backend_service.h"
 #include "service/brpc_service.h"
 #include "service/http_service.h"
+#include "udf/python/python_env.h"
 #include "util/debug_util.h"
 #include "util/disk_info.h"
 #include "util/mem_info.h"
+#include "util/string_util.h"
 #include "util/thrift_rpc_helper.h"
 #include "util/thrift_server.h"
 #include "util/uid_util.h"
@@ -499,6 +502,70 @@
         }
     }
 
+    if (doris::config::enable_python_udf_support) {
+        if (std::string python_udf_root_path =
+                    fmt::format("{}/lib/udf/python", std::getenv("DORIS_HOME"));
+            !std::filesystem::exists(python_udf_root_path)) {
+            std::filesystem::create_directories(python_udf_root_path);
+        }
+
+        // Normalize and trim all Python-related config parameters
+        std::string python_env_mode =
+                std::string(doris::trim(doris::to_lower(doris::config::python_env_mode)));
+        std::string python_conda_root_path =
+                std::string(doris::trim(doris::config::python_conda_root_path));
+        std::string python_venv_root_path =
+                std::string(doris::trim(doris::config::python_venv_root_path));
+        std::string python_venv_interpreter_paths =
+                std::string(doris::trim(doris::config::python_venv_interpreter_paths));
+
+        if (python_env_mode == "conda") {
+            if (python_conda_root_path.empty()) {
+                LOG(ERROR)
+                        << "Python conda root path is empty, please set `python_conda_root_path` "
+                           "or set `enable_python_udf_support` to `false`";
+                exit(1);
+            }
+            LOG(INFO) << "Doris backend python version manager is initialized. Python conda "
+                         "root path: "
+                      << python_conda_root_path;
+            status = doris::PythonVersionManager::instance().init(doris::PythonEnvType::CONDA,
+                                                                  python_conda_root_path, "");
+        } else if (python_env_mode == "venv") {
+            if (python_venv_root_path.empty()) {
+                LOG(ERROR)
+                        << "Python venv root path is empty, please set `python_venv_root_path` or "
+                           "set `enable_python_udf_support` to `false`";
+                exit(1);
+            }
+            if (python_venv_interpreter_paths.empty()) {
+                LOG(ERROR)
+                        << "Python interpreter paths is empty, please set "
+                           "`python_venv_interpreter_paths` or set `enable_python_udf_support` to "
+                           "`false`";
+                exit(1);
+            }
+            LOG(INFO) << "Doris backend python version manager is initialized. Python venv "
+                         "root path: "
+                      << python_venv_root_path
+                      << ", python interpreter paths: " << python_venv_interpreter_paths;
+            status = doris::PythonVersionManager::instance().init(doris::PythonEnvType::VENV,
+                                                                  python_venv_root_path,
+                                                                  python_venv_interpreter_paths);
+        } else {
+            status = Status::InvalidArgument(
+                    "Python env mode is invalid, should be `conda` or `venv`. If you don't want to "
+                    "enable the Python UDF function, please set `enable_python_udf_support` to "
+                    "`false`");
+        }
+
+        if (!status.ok()) {
+            LOG(ERROR) << "Failed to initialize python version manager: " << status;
+            exit(1);
+        }
+        LOG(INFO) << doris::PythonVersionManager::instance().to_string();
+    }
+
     // Doris own signal handler must be register after jvm is init.
     // Or our own sig-handler for SIGINT & SIGTERM will not be chained ...
     // https://www.oracle.com/java/technologies/javase/signals.html

diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt
index 60ea86c..34e6eec 100755
--- a/be/src/udf/CMakeLists.txt
+++ b/be/src/udf/CMakeLists.txt

@@ -20,7 +20,13 @@
 # where to put generated binaries
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf")
 
+set(UDF_SOURCES udf.cpp)
+
+file(GLOB PYTHON_UDF_SOURCES "python/*.cpp")
+
+list(APPEND UDF_SOURCES ${PYTHON_UDF_SOURCES})
+
 # Build this library twice. Once to be linked into the main Doris. This version
 # can have dependencies on our other libs. The second version is shipped as part
 # of the UDF sdk, which can't use other libs.
-add_library(Udf STATIC udf.cpp)
\ No newline at end of file
+add_library(Udf STATIC ${UDF_SOURCES})

diff --git a/be/src/udf/python/python_env.cpp b/be/src/udf/python/python_env.cpp
new file mode 100644
index 0000000..0b29be8
--- /dev/null
+++ b/be/src/udf/python/python_env.cpp

@@ -0,0 +1,290 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "python_env.h"
+
+#include <fmt/core.h>
+
+#include <filesystem>
+#include <memory>
+#include <regex>
+#include <vector>
+
+#include "common/status.h"
+#include "udf/python/python_udf_server.h"
+#include "util/string_util.h"
+
+namespace doris {
+
+namespace fs = std::filesystem;
+
+// extract python version by executing `python --version` and extract "3.9.16" from "Python 3.9.16"
+// @param python_path: path to python executable, e.g. "/opt/miniconda3/envs/myenv/bin/python"
+// @param version: extracted python version, e.g. "3.9.16"
+static Status extract_python_version(const std::string& python_path, std::string* version) {
+    static std::regex python_version_re(R"(^Python (\d+\.\d+\.\d+))");
+
+    if (!fs::exists(python_path)) {
+        return Status::NotFound("Python executable not found: {}", python_path);
+    }
+
+    std::string cmd = fmt::format("\"{}\" --version", python_path);
+    FILE* pipe = popen(cmd.c_str(), "r");
+    if (!pipe) {
+        return Status::InternalError("Failed to run: {}", cmd);
+    }
+
+    std::string result;
+    char buf[128];
+    while (fgets(buf, sizeof(buf), pipe)) {
+        result += buf;
+    }
+    pclose(pipe);
+
+    std::smatch match;
+    if (std::regex_search(result, match, python_version_re)) {
+        *version = match[1].str();
+        return Status::OK();
+    }
+
+    return Status::InternalError("Failed to extract Python version from path: {}, result: {}",
+                                 python_path, result);
+}
+
+PythonEnvironment::PythonEnvironment(const std::string& name, const PythonVersion& python_version)
+        : env_name(name), python_version(python_version) {}
+
+std::string PythonEnvironment::to_string() const {
+    return fmt::format(
+            "[env_name: {}, env_base_path: {}, python_base_path: {}, python_full_version: {}]",
+            env_name, python_version.base_path, python_version.executable_path,
+            python_version.full_version);
+}
+
+bool PythonEnvironment::is_valid() const {
+    if (!python_version.is_valid()) return false;
+
+    auto perms = fs::status(python_version.executable_path).permissions();
+    if ((perms & fs::perms::owner_exec) == fs::perms::none) {
+        return false;
+    }
+
+    std::string version;
+    if (!extract_python_version(python_version.executable_path, &version).ok()) {
+        LOG(WARNING) << "Failed to extract python version from path: "
+                     << python_version.executable_path;
+        return false;
+    }
+
+    return python_version.full_version == version;
+}
+
+// Scan for environments under the /{conda_root_path}/envs directory from the conda root.
+Status PythonEnvironment::scan_from_conda_root_path(const fs::path& conda_root_path,
+                                                    std::vector<PythonEnvironment>* environments) {
+    DCHECK(!conda_root_path.empty() && environments != nullptr);
+
+    fs::path envs_dir = conda_root_path / "envs";
+    if (!fs::exists(envs_dir) || !fs::is_directory(envs_dir)) {
+        return Status::NotFound("Conda envs directory not found: {}", envs_dir.string());
+    }
+
+    for (const auto& entry : fs::directory_iterator(envs_dir)) {
+        if (!entry.is_directory()) continue;
+
+        std::string env_name = entry.path().filename(); // e.g. "myenv"
+        std::string env_base_path = entry.path();       // e.g. "/opt/miniconda3/envs/myenv"
+        std::string python_path =
+                env_base_path + "/bin/python"; // e.g. "/{env_base_path}/bin/python"
+        std::string python_full_version;       // e.g. "3.9.16"
+        RETURN_IF_ERROR(extract_python_version(python_path, &python_full_version));
+        size_t pos = python_full_version.find_last_of('.');
+
+        if (UNLIKELY(pos == std::string::npos)) {
+            return Status::InvalidArgument("Invalid python version: {}", python_full_version);
+        }
+
+        PythonVersion python_version(python_full_version, env_base_path, python_path);
+        PythonEnvironment conda_env(env_name, python_version);
+
+        if (UNLIKELY(!conda_env.is_valid())) {
+            LOG(WARNING) << "Invalid conda environment: " << conda_env.to_string();
+            continue;
+        }
+
+        environments->push_back(std::move(conda_env));
+    }
+
+    if (environments->empty()) {
+        return Status::NotFound("No conda python environments found");
+    }
+
+    return Status::OK();
+}
+
+Status PythonEnvironment::scan_from_venv_root_path(
+        const fs::path& venv_root_path, const std::vector<std::string>& interpreter_paths,
+        std::vector<PythonEnvironment>* environments) {
+    DCHECK(!venv_root_path.empty() && environments != nullptr);
+
+    for (const auto& interpreter_path : interpreter_paths) {
+        if (!fs::exists(interpreter_path) || !fs::is_regular_file(interpreter_path)) {
+            return Status::NotFound("Interpreter path not found: {}", interpreter_path);
+        }
+        std::string python_full_version;
+        RETURN_IF_ERROR(extract_python_version(interpreter_path, &python_full_version));
+        size_t pos = python_full_version.find_last_of('.');
+        if (UNLIKELY(pos == std::string::npos)) {
+            return Status::InvalidArgument("Invalid python version: {}", python_full_version);
+        }
+        // Extract major.minor version (e.g., "3.12" from "3.12.0")
+        std::string python_major_minor_version = python_full_version.substr(0, pos);
+
+        std::string env_name = fmt::format("python{}", python_full_version); // e.g. "python3.9.16"
+        std::string env_base_path = fmt::format("{}/{}", venv_root_path.string(),
+                                                env_name); // e.g. "/opt/venv/python3.9.16"
+        std::string python_path =
+                fmt::format("{}/bin/python", env_base_path); // e.g. "/{venv_base_path}/bin/python"
+
+        if (!fs::exists(env_base_path) || !fs::exists(python_path)) {
+            fs::create_directories(env_base_path);
+            std::string create_venv_cmd =
+                    fmt::format("{} -m venv {}", interpreter_path, env_base_path);
+
+            if (system(create_venv_cmd.c_str()) != 0 || !fs::exists(python_path)) {
+                return Status::RuntimeError("Failed to create python virtual environment, cmd: {}",
+                                            create_venv_cmd);
+            }
+        }
+
+        // Use major.minor version for site-packages path (e.g., "python3.12")
+        std::string python_dependency_path = fmt::format("{}/lib/python{}/site-packages",
+                                                         env_base_path, python_major_minor_version);
+
+        if (!fs::exists(python_dependency_path)) {
+            return Status::NotFound("Python dependency path not found: {}", python_dependency_path);
+        }
+
+        PythonVersion python_version(python_full_version, env_base_path, python_path);
+        PythonEnvironment venv_env(env_name, python_version);
+
+        if (UNLIKELY(!venv_env.is_valid())) {
+            LOG(WARNING) << "Invalid venv environment: " << venv_env.to_string();
+            continue;
+        }
+
+        environments->push_back(std::move(venv_env));
+    }
+
+    if (environments->empty()) {
+        return Status::NotFound("No venv python environments found");
+    }
+
+    return Status::OK();
+}
+
+Status PythonEnvScanner::get_versions(std::vector<PythonVersion>* versions) const {
+    DCHECK(versions != nullptr);
+    if (_envs.empty()) {
+        return Status::InternalError("not found available version");
+    }
+    for (const auto& env : _envs) {
+        versions->push_back(env.python_version);
+    }
+    return Status::OK();
+}
+
+Status PythonEnvScanner::get_version(const std::string& runtime_version,
+                                     PythonVersion* version) const {
+    if (_envs.empty()) {
+        return Status::InternalError("not found available version");
+    }
+    std::string_view runtime_version_view(runtime_version);
+    runtime_version_view = trim(runtime_version_view);
+    for (const auto& env : _envs) {
+        if (env.python_version.full_version == runtime_version_view) {
+            *version = env.python_version;
+            return Status::OK();
+        }
+    }
+    return Status::NotFound("not found runtime version: {}", runtime_version);
+}
+
+Status CondaEnvScanner::scan() {
+    RETURN_IF_ERROR(PythonEnvironment::scan_from_conda_root_path(_env_root_path, &_envs));
+    return Status::OK();
+}
+
+std::string CondaEnvScanner::to_string() const {
+    std::stringstream ss;
+    ss << "Conda environments: ";
+    for (const auto& conda_env : _envs) {
+        ss << conda_env.to_string() << ", ";
+    }
+    return ss.str();
+}
+
+Status VenvEnvScanner::scan() {
+    RETURN_IF_ERROR(PythonEnvironment::scan_from_venv_root_path(_env_root_path, _interpreter_paths,
+                                                                &_envs));
+    return Status::OK();
+}
+
+std::string VenvEnvScanner::to_string() const {
+    std::stringstream ss;
+    ss << "Venv environments: ";
+    for (const auto& venv_env : _envs) {
+        ss << venv_env.to_string() << ", ";
+    }
+    return ss.str();
+}
+
+Status PythonVersionManager::init(PythonEnvType env_type, const fs::path& python_root_path,
+                                  const std::string& python_venv_interpreter_paths) {
+    switch (env_type) {
+    case PythonEnvType::CONDA: {
+        if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) {
+            return Status::InvalidArgument("Invalid conda root path: {}",
+                                           python_root_path.string());
+        }
+        _env_scanner = std::make_unique<CondaEnvScanner>(python_root_path);
+        break;
+    }
+    case PythonEnvType::VENV: {
+        if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) {
+            return Status::InvalidArgument("Invalid venv root path: {}", python_root_path.string());
+        }
+        std::vector<std::string> interpreter_paths = split(python_venv_interpreter_paths, ":");
+        if (interpreter_paths.empty()) {
+            return Status::InvalidArgument("Invalid python interpreter paths: {}",
+                                           python_venv_interpreter_paths);
+        }
+        _env_scanner = std::make_unique<VenvEnvScanner>(python_root_path, interpreter_paths);
+        break;
+    }
+    default:
+        return Status::NotSupported("Unsupported python runtime type: {}",
+                                    static_cast<int>(env_type));
+    }
+    std::vector<PythonVersion> versions;
+    RETURN_IF_ERROR(_env_scanner->scan());
+    RETURN_IF_ERROR(_env_scanner->get_versions(&versions));
+    RETURN_IF_ERROR(PythonUDFServerManager::instance().init(versions));
+    return Status::OK();
+}
+
+} // namespace doris

diff --git a/be/src/udf/python/python_env.h b/be/src/udf/python/python_env.h
new file mode 100644
index 0000000..4d3a5ac
--- /dev/null
+++ b/be/src/udf/python/python_env.h

@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <filesystem>
+
+#include "common/status.h"
+
+namespace doris {
+
+namespace fs = std::filesystem;
+
+enum class PythonEnvType { CONDA, VENV };
+
+struct PythonVersion {
+    std::string full_version;    // e.g. "3.9.16"
+    std::string base_path;       // e.g. "/root/anaconda3/envs/python3.9"
+    std::string executable_path; // e.g. "{base_path}/bin/python3"
+
+    PythonVersion() = default;
+
+    explicit PythonVersion(std::string full_version, std::string base_path,
+                           std::string executable_path)
+            : full_version(std::move(full_version)),
+              base_path(std::move(base_path)),
+              executable_path(std::move(executable_path)) {}
+
+    bool operator==(const PythonVersion& other) const {
+        return full_version == other.full_version && base_path == other.base_path &&
+               executable_path == other.executable_path;
+    }
+
+    const std::string& get_base_path() const { return base_path; }
+
+    const std::string& get_executable_path() const { return executable_path; }
+
+    bool is_valid() const {
+        return !full_version.empty() && !base_path.empty() && !executable_path.empty() &&
+               fs::exists(base_path) && fs::exists(executable_path);
+    }
+
+    std::string to_string() const {
+        return fmt::format("[full_version: {}, base_path: {}, executable_path: {}]", full_version,
+                           base_path, executable_path);
+    }
+};
+
+struct PythonEnvironment {
+    std::string env_name; // e.g. "base" or "myenv"
+    PythonVersion python_version;
+
+    PythonEnvironment(const std::string& name, const PythonVersion& python_version);
+
+    std::string to_string() const;
+
+    bool is_valid() const;
+
+    static Status scan_from_conda_root_path(const fs::path& conda_root_path,
+                                            std::vector<PythonEnvironment>* environments);
+
+    static Status scan_from_venv_root_path(const fs::path& venv_root_path,
+                                           const std::vector<std::string>& interpreter_paths,
+                                           std::vector<PythonEnvironment>* environments);
+};
+
+class PythonEnvScanner {
+public:
+    PythonEnvScanner(const fs::path& env_root_path) : _env_root_path(env_root_path) {}
+
+    virtual ~PythonEnvScanner() = default;
+
+    virtual Status scan() = 0;
+
+    Status get_versions(std::vector<PythonVersion>* versions) const;
+
+    Status get_version(const std::string& runtime_version, PythonVersion* version) const;
+
+    std::string root_path() const { return _env_root_path.string(); }
+
+    virtual PythonEnvType env_type() const = 0;
+
+    virtual std::string to_string() const = 0;
+
+protected:
+    fs::path _env_root_path;
+    std::vector<PythonEnvironment> _envs;
+};
+
+class CondaEnvScanner : public PythonEnvScanner {
+public:
+    CondaEnvScanner(const fs::path& python_root_path) : PythonEnvScanner(python_root_path) {}
+
+    ~CondaEnvScanner() override = default;
+
+    Status scan() override;
+
+    std::string to_string() const override;
+
+    PythonEnvType env_type() const override { return PythonEnvType::CONDA; }
+};
+
+class VenvEnvScanner : public PythonEnvScanner {
+public:
+    VenvEnvScanner(const fs::path& python_root_path,
+                   const std::vector<std::string>& interpreter_paths)
+            : PythonEnvScanner(python_root_path), _interpreter_paths(interpreter_paths) {}
+
+    ~VenvEnvScanner() override = default;
+
+    Status scan() override;
+
+    std::string to_string() const override;
+
+    PythonEnvType env_type() const override { return PythonEnvType::VENV; }
+
+private:
+    std::vector<std::string> _interpreter_paths;
+};
+
+class PythonVersionManager {
+public:
+    static PythonVersionManager& instance() {
+        static PythonVersionManager instance;
+        return instance;
+    }
+
+    Status init(PythonEnvType env_type, const fs::path& python_root_path,
+                const std::string& python_venv_interpreter_paths);
+
+    Status get_version(const std::string& runtime_version, PythonVersion* version) const {
+        return _env_scanner->get_version(runtime_version, version);
+    }
+
+    std::string to_string() const { return _env_scanner->to_string(); }
+
+private:
+    std::unique_ptr<PythonEnvScanner> _env_scanner;
+};
+
+} // namespace doris
+
+namespace std {
+template <>
+struct hash<doris::PythonVersion> {
+    size_t operator()(const doris::PythonVersion& v) const noexcept {
+        return hash<string> {}(v.full_version);
+    }
+};
+} // namespace std

diff --git a/be/src/udf/python/python_udf_client.cpp b/be/src/udf/python/python_udf_client.cpp
new file mode 100644
index 0000000..8164d9f
--- /dev/null
+++ b/be/src/udf/python/python_udf_client.cpp

@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/python/python_udf_client.h"
+
+#include <utility>
+
+#include "arrow/flight/client.h"
+#include "arrow/flight/server.h"
+#include "common/status.h"
+#include "udf/python/python_udf_meta.h"
+#include "udf/python/python_udf_runtime.h"
+#include "util/arrow/utils.h"
+
+namespace doris {
+
+Status PythonUDFClient::create(const PythonUDFMeta& func_meta, ProcessPtr process,
+                               PythonUDFClientPtr* client) {
+    PythonUDFClientPtr python_udf_client = std::make_shared<PythonUDFClient>();
+    RETURN_IF_ERROR(python_udf_client->init(func_meta, std::move(process)));
+    *client = std::move(python_udf_client);
+    return Status::OK();
+}
+
+Status PythonUDFClient::init(const PythonUDFMeta& func_meta, ProcessPtr process) {
+    if (_inited) {
+        return Status::InternalError("PythonUDFClient has already been initialized");
+    }
+    arrow::flight::Location location;
+    RETURN_DORIS_STATUS_IF_RESULT_ERROR(location,
+                                        arrow::flight::Location::Parse(process->get_uri()));
+    RETURN_DORIS_STATUS_IF_RESULT_ERROR(_arrow_client, FlightClient::Connect(location));
+    std::string command;
+    RETURN_IF_ERROR(func_meta.serialize_to_json(&command));
+    FlightDescriptor descriptor = FlightDescriptor::Command(command);
+    arrow::flight::FlightClient::DoExchangeResult exchange_res;
+    RETURN_DORIS_STATUS_IF_RESULT_ERROR(exchange_res, _arrow_client->DoExchange(descriptor));
+    _reader = std::move(exchange_res.reader);
+    _writer = std::move(exchange_res.writer);
+    _process = std::move(process);
+    _inited = true;
+    return Status::OK();
+}
+
+Status PythonUDFClient::evaluate(const arrow::RecordBatch& input,
+                                 std::shared_ptr<arrow::RecordBatch>* output) {
+    if (!_process->is_alive()) {
+        return Status::RuntimeError("Python UDF process is not alive");
+    }
+
+    // Step 1: Begin exchange with schema (only once)
+    if (UNLIKELY(!_begin)) {
+        auto begin_res = _writer->Begin(input.schema());
+        if (!begin_res.ok()) {
+            return handle_error(begin_res);
+        }
+        _begin = true;
+    }
+
+    // Step 2: Write the record batch to server
+    auto write_res = _writer->WriteRecordBatch(input);
+    if (!write_res.ok()) {
+        return handle_error(write_res);
+    }
+
+    // Step 3: Read response from server
+    auto read_res = _reader->Next();
+    if (!read_res.ok()) {
+        return handle_error(read_res.status());
+    }
+
+    arrow::flight::FlightStreamChunk chunk = std::move(*read_res);
+    if (!chunk.data) {
+        _process->shutdown();
+        return Status::InternalError("Received empty RecordBatch from Python UDF server");
+    }
+    *output = std::move(chunk.data);
+    return Status::OK();
+}
+
+Status PythonUDFClient::handle_error(arrow::Status status) {
+    DCHECK(!status.ok());
+    _writer.reset();
+    _reader.reset();
+    _process->shutdown();
+    std::string msg = status.message();
+    size_t pos = msg.find("The above exception was the direct cause");
+    if (pos != std::string::npos) {
+        msg = msg.substr(0, pos);
+    }
+    return Status::RuntimeError(trim(msg));
+}
+
+Status PythonUDFClient::close() {
+    if (!_inited || !_writer) return Status::OK();
+    auto writer_res = _writer->Close();
+    if (!writer_res.ok()) {
+        return handle_error(writer_res);
+    }
+    _inited = false;
+    _begin = false;
+    _arrow_client.reset();
+    _writer.reset();
+    _reader.reset();
+    if (auto* pool = _process->pool(); pool) {
+        pool->return_process(std::move(_process));
+    }
+    return Status::OK();
+}
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_client.h b/be/src/udf/python/python_udf_client.h
new file mode 100644
index 0000000..9d88b79
--- /dev/null
+++ b/be/src/udf/python/python_udf_client.h

@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/status.h>
+
+#include "arrow/flight/client.h"
+#include "common/status.h"
+#include "udf/python/python_udf_meta.h"
+#include "udf/python/python_udf_runtime.h"
+#include "util/arrow/utils.h"
+
+namespace doris {
+
+class PythonUDFClient;
+class PythonUDFProcessPool;
+
+using PythonUDFClientPtr = std::shared_ptr<PythonUDFClient>;
+
+class PythonUDFClient {
+public:
+    using FlightDescriptor = arrow::flight::FlightDescriptor;
+    using FlightClient = arrow::flight::FlightClient;
+    using FlightStreamWriter = arrow::flight::FlightStreamWriter;
+    using FlightStreamReader = arrow::flight::FlightStreamReader;
+
+    PythonUDFClient() = default;
+
+    ~PythonUDFClient() = default;
+
+    static Status create(const PythonUDFMeta& func_meta, ProcessPtr process,
+                         PythonUDFClientPtr* client);
+
+    Status init(const PythonUDFMeta& func_meta, ProcessPtr process);
+
+    Status evaluate(const arrow::RecordBatch& input, std::shared_ptr<arrow::RecordBatch>* output);
+
+    Status close();
+
+    Status handle_error(arrow::Status status);
+
+    std::string print_process() const { return _process->to_string(); }
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(PythonUDFClient);
+
+    bool _inited = false;
+    bool _begin = false;
+    std::unique_ptr<FlightClient> _arrow_client;
+    std::unique_ptr<FlightStreamWriter> _writer;
+    std::unique_ptr<FlightStreamReader> _reader;
+    ProcessPtr _process;
+};
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_meta.cpp b/be/src/udf/python/python_udf_meta.cpp
new file mode 100644
index 0000000..ba3105d
--- /dev/null
+++ b/be/src/udf/python/python_udf_meta.cpp

@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/python/python_udf_meta.h"
+
+#include <arrow/util/base64.h>
+#include <fmt/core.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+
+#include <sstream>
+
+#include "common/status.h"
+#include "util/arrow/utils.h"
+#include "util/string_util.h"
+
+namespace doris {
+
+Status PythonUDFMeta::convert_types_to_schema(const vectorized::DataTypes& types,
+                                              const std::string& timezone,
+                                              std::shared_ptr<arrow::Schema>* schema) {
+    assert(!types.empty());
+    arrow::SchemaBuilder builder;
+    for (size_t i = 0; i < types.size(); ++i) {
+        std::shared_ptr<arrow::DataType> arrow_type;
+        RETURN_IF_ERROR(convert_to_arrow_type(types[i], &arrow_type, timezone));
+        std::shared_ptr<arrow::Field> field = std::make_shared<arrow::Field>(
+                "arg" + std::to_string(i), arrow_type, types[i]->is_nullable());
+        RETURN_DORIS_STATUS_IF_ERROR(builder.AddField(field));
+    }
+    RETURN_DORIS_STATUS_IF_RESULT_ERROR(schema, builder.Finish());
+    return Status::OK();
+}
+
+Status PythonUDFMeta::serialize_arrow_schema(const std::shared_ptr<arrow::Schema>& schema,
+                                             std::shared_ptr<arrow::Buffer>* out) {
+    RETURN_DORIS_STATUS_IF_RESULT_ERROR(
+            out, arrow::ipc::SerializeSchema(*schema, arrow::default_memory_pool()));
+    return Status::OK();
+}
+
+/*
+    json format:
+    {
+        "name": "xxx",
+        "symbol": "xxx",
+        "location": "xxx",
+        "udf_load_type": 0 or 1,
+        "runtime_version": "x.xx.xx",
+        "always_nullable": true,
+        "inline_code": "base64_inline_code",
+        "input_types": "base64_input_types",
+        "return_type": "base64_return_type"
+    }
+*/
+Status PythonUDFMeta::serialize_to_json(std::string* json_str) const {
+    rapidjson::Document doc;
+    doc.SetObject();
+    auto& allocator = doc.GetAllocator();
+    doc.AddMember("name", rapidjson::Value().SetString(_name.c_str(), allocator), allocator);
+    doc.AddMember("symbol", rapidjson::Value().SetString(_symbol.c_str(), allocator), allocator);
+    doc.AddMember("location", rapidjson::Value().SetString(_location.c_str(), allocator),
+                  allocator);
+    doc.AddMember("udf_load_type", rapidjson::Value().SetInt(static_cast<int>(_type)), allocator);
+    doc.AddMember("runtime_version",
+                  rapidjson::Value().SetString(_runtime_version.c_str(), allocator), allocator);
+    doc.AddMember("always_nullable", rapidjson::Value().SetBool(_always_nullable), allocator);
+
+    {
+        // Serialize base64 inline code to json
+        std::string base64_str = arrow::util::base64_encode(_inline_code);
+        doc.AddMember("inline_code", rapidjson::Value().SetString(base64_str.c_str(), allocator),
+                      allocator);
+    }
+    {
+        // Serialize base64 input types to json
+        std::shared_ptr<arrow::Schema> input_schema;
+        RETURN_IF_ERROR(convert_types_to_schema(_input_types, TimezoneUtils::default_time_zone,
+                                                &input_schema));
+        std::shared_ptr<arrow::Buffer> input_schema_buffer;
+        RETURN_IF_ERROR(serialize_arrow_schema(input_schema, &input_schema_buffer));
+        std::string base64_str =
+                arrow::util::base64_encode({input_schema_buffer->data_as<char>(),
+                                            static_cast<size_t>(input_schema_buffer->size())});
+        doc.AddMember("input_types", rapidjson::Value().SetString(base64_str.c_str(), allocator),
+                      allocator);
+    }
+    {
+        // Serialize base64 return type to json
+        std::shared_ptr<arrow::Schema> return_schema;
+        RETURN_IF_ERROR(convert_types_to_schema({_return_type}, TimezoneUtils::default_time_zone,
+                                                &return_schema));
+        std::shared_ptr<arrow::Buffer> return_schema_buffer;
+        RETURN_IF_ERROR(serialize_arrow_schema(return_schema, &return_schema_buffer));
+        std::string base64_str =
+                arrow::util::base64_encode({return_schema_buffer->data_as<char>(),
+                                            static_cast<size_t>(return_schema_buffer->size())});
+        doc.AddMember("return_type", rapidjson::Value().SetString(base64_str.c_str(), allocator),
+                      allocator);
+    }
+
+    // Convert document to json string
+    rapidjson::StringBuffer buffer;
+    rapidjson::Writer<rapidjson::StringBuffer> writer(buffer);
+    doc.Accept(writer);
+    *json_str = std::string(buffer.GetString(), buffer.GetSize());
+    return Status::OK();
+}
+
+std::string PythonUDFMeta::to_string() const {
+    std::stringstream input_types_ss;
+    input_types_ss << "<";
+    for (size_t i = 0; i < _input_types.size(); ++i) {
+        input_types_ss << _input_types[i]->get_name();
+        if (i != _input_types.size() - 1) {
+            input_types_ss << ", ";
+        }
+    }
+    input_types_ss << ">";
+    return fmt::format(
+            "[name: {}, symbol: {}, location: {}, runtime_version: {}, always_nullable: {}, "
+            "inline_code: {}][input_types: {}][return_type: {}]",
+            _name, _symbol, _location, _runtime_version, _always_nullable, _inline_code,
+            input_types_ss.str(), _return_type->get_name());
+}
+
+Status PythonUDFMeta::check() const {
+    if (trim(_name).empty()) {
+        return Status::InvalidArgument("Python UDF name is empty");
+    }
+
+    if (trim(_symbol).empty()) {
+        return Status::InvalidArgument("Python UDF symbol is empty");
+    }
+
+    if (trim(_runtime_version).empty()) {
+        return Status::InvalidArgument("Python UDF runtime version is empty");
+    }
+
+    if (_input_types.empty()) {
+        return Status::InvalidArgument("Python UDF input types is empty");
+    }
+
+    if (!_return_type) {
+        return Status::InvalidArgument("Python UDF return type is empty");
+    }
+
+    if (_type == PythonUDFLoadType::UNKNOWN) {
+        return Status::InvalidArgument(
+                "Python UDF load type is invalid, please check inline code or file path");
+    }
+
+    if (_type == PythonUDFLoadType::MODULE) {
+        if (trim(_location).empty()) {
+            return Status::InvalidArgument("Non-inline Python UDF location is empty");
+        }
+        if (trim(_checksum).empty()) {
+            return Status::InvalidArgument("Non-inline Python UDF checksum is empty");
+        }
+    }
+
+    return Status::OK();
+}
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_meta.h b/be/src/udf/python/python_udf_meta.h
new file mode 100644
index 0000000..71c808a
--- /dev/null
+++ b/be/src/udf/python/python_udf_meta.h

@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sys/types.h>
+
+#include "arrow/buffer.h"
+#include "arrow/flight/client.h"
+#include "arrow/flight/server.h"
+#include "common/status.h"
+#include "util/arrow/row_batch.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris {
+
+enum class PythonUDFLoadType : uint8_t { INLINE = 0, MODULE = 1, UNKNOWN = 2 };
+
+struct PythonUDFMeta {
+    int64_t _id;
+    std::string _name;
+    std::string _symbol;
+    std::string _location;
+    std::string _checksum;
+    std::string _runtime_version;
+    std::string _inline_code;
+    bool _always_nullable;
+    vectorized::DataTypes _input_types;
+    vectorized::DataTypePtr _return_type;
+    PythonUDFLoadType _type;
+
+    static Status convert_types_to_schema(const vectorized::DataTypes& types,
+                                          const std::string& timezone,
+                                          std::shared_ptr<arrow::Schema>* schema);
+
+    static Status serialize_arrow_schema(const std::shared_ptr<arrow::Schema>& schema,
+                                         std::shared_ptr<arrow::Buffer>* out);
+
+    Status serialize_to_json(std::string* json_str) const;
+
+    std::string to_string() const;
+
+    Status check() const;
+};
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_runtime.cpp b/be/src/udf/python/python_udf_runtime.cpp
new file mode 100644
index 0000000..3683fc4
--- /dev/null
+++ b/be/src/udf/python/python_udf_runtime.cpp

@@ -0,0 +1,185 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/python/python_udf_runtime.h"
+
+#include <butil/fd_utility.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <boost/process.hpp>
+
+#include "common/config.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "udf/python/python_udf_server.h"
+
+namespace doris {
+
+void PythonUDFProcess::remove_unix_socket() {
+    if (_uri.empty() || _unix_socket_file_path.empty()) return;
+
+    if (unlink(_unix_socket_file_path.c_str()) == 0) {
+        LOG(INFO) << "Successfully removed unix socket: " << _unix_socket_file_path;
+        return;
+    }
+
+    if (errno == ENOENT) {
+        // File does not exist, this is fine, no need to warn
+        LOG(INFO) << "Unix socket not found (already removed): " << _uri;
+    } else {
+        LOG(WARNING) << "Failed to remove unix socket " << _uri << ": " << std::strerror(errno)
+                     << " (errno=" << errno << ")";
+    }
+}
+
+void PythonUDFProcess::shutdown() {
+    if (!_child.valid() || _is_shutdown) return;
+
+    _child.terminate();
+    bool graceful = false;
+    constexpr std::chrono::milliseconds retry_interval(100); // 100ms
+
+    for (int i = 0; i < TERMINATE_RETRY_TIMES; ++i) {
+        if (!_child.running()) {
+            graceful = true;
+            break;
+        }
+        std::this_thread::sleep_for(retry_interval);
+    }
+
+    if (!graceful) {
+        LOG(WARNING) << "Python process did not terminate gracefully, sending SIGKILL";
+        ::kill(_child.id(), SIGKILL);
+        _child.wait();
+    }
+
+    if (int exit_code = _child.exit_code(); exit_code > 128 && exit_code <= 255) {
+        int signal = exit_code - 128;
+        LOG(INFO) << "Python process was killed by signal " << signal;
+    } else {
+        LOG(INFO) << "Python process exited normally with code: " << exit_code;
+    }
+
+    _output_stream.close();
+    remove_unix_socket();
+    _is_shutdown = true;
+}
+
+std::string PythonUDFProcess::to_string() const {
+    return fmt::format(
+            "PythonUDFProcess(child_pid={}, uri={}, "
+            "unix_socket_file_path={}, is_shutdown={})",
+            _child.id(), _uri, _unix_socket_file_path, _is_shutdown);
+}
+
+Status PythonUDFProcessPool::init() {
+    if (_init_pool_size > _max_pool_size) {
+        return Status::InvalidArgument("min_idle cannot be greater than max_pool_size");
+    }
+
+    std::lock_guard<std::mutex> lock(_mtx);
+    for (size_t i = 0; i < _init_pool_size; ++i) {
+        ProcessPtr process;
+        RETURN_IF_ERROR(PythonUDFServerManager::instance().fork(this, &process));
+        _idle_processes.push(std::move(process));
+        ++_current_size;
+    }
+
+    return Status::OK();
+}
+
+Status PythonUDFProcessPool::borrow_process(ProcessPtr* process) {
+    std::unique_lock<std::mutex> lock(_mtx);
+
+    if (_is_shutdown) {
+        return Status::RuntimeError("UDF process pool is shutdown");
+    }
+
+    // Try to get an idle process or create a new one
+    while (true) {
+        // If there's an idle process, return it immediately
+        if (!_idle_processes.empty()) {
+            *process = std::move(_idle_processes.front());
+            _idle_processes.pop();
+            return Status::OK();
+        }
+
+        // If we can create a new process, do it
+        if (_current_size < _max_pool_size) {
+            RETURN_IF_ERROR(PythonUDFServerManager::instance().fork(this, process));
+            ++_current_size;
+            return Status::OK();
+        }
+
+        // Pool is exhausted, wait for a process to be returned
+        LOG(INFO) << "Python UDF process pool exhausted (current size: " << _current_size
+                  << ", max size: " << _max_pool_size << "), waiting for available process...";
+
+        auto timeout = std::chrono::milliseconds(config::python_process_pool_wait_timeout_ms);
+        std::cv_status wait_result = _cv.wait_for(lock, timeout);
+
+        // Check if shutdown during wait
+        if (_is_shutdown) {
+            return Status::RuntimeError("UDF process pool is shutdown");
+        }
+
+        // If timeout occurred and still no idle processes
+        if (wait_result == std::cv_status::timeout && _idle_processes.empty()) {
+            return Status::RuntimeError(
+                    "UDF process pool exhausted (max size = {}), waited for {} ms but no "
+                    "process became available. Please increase max_python_process_nums parameter "
+                    "or python_process_pool_wait_timeout_ms and restart BE",
+                    _max_pool_size, config::python_process_pool_wait_timeout_ms);
+        }
+
+        // If notified or spurious wakeup, loop back to check conditions
+    }
+}
+
+void PythonUDFProcessPool::return_process(ProcessPtr process) {
+    {
+        std::lock_guard<std::mutex> lock(_mtx);
+
+        if (!process || _is_shutdown) return;
+
+        if (!process->is_alive()) {
+            --_current_size;
+            LOG(WARNING) << "return dead process: " << process->to_string();
+            return;
+        }
+
+        _idle_processes.push(std::move(process));
+    }
+    // Notify one waiting thread that a process is available
+    _cv.notify_one();
+}
+
+void PythonUDFProcessPool::shutdown() {
+    std::lock_guard<std::mutex> lock(_mtx);
+
+    if (_is_shutdown) return;
+
+    while (!_idle_processes.empty()) {
+        _idle_processes.front()->shutdown();
+        _idle_processes.pop();
+    }
+
+    _is_shutdown = true;
+}
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_runtime.h b/be/src/udf/python/python_udf_runtime.h
new file mode 100644
index 0000000..aa95414
--- /dev/null
+++ b/be/src/udf/python/python_udf_runtime.h

@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <boost/process.hpp>
+#include <condition_variable>
+#include <queue>
+
+#include "common/status.h"
+#include "python_env.h"
+
+namespace doris {
+
+static const char* UNIX_SOCKET_PREFIX = "grpc+unix://";
+static const char* BASE_UNIX_SOCKET_PATH_TEMPLATE = "{}{}/lib/udf/python/python_udf";
+static const char* UNIX_SOCKET_PATH_TEMPLATE = "{}_{}.sock";
+static const char* FLIGHT_SERVER_PATH_TEMPLATE = "{}/plugins/python_udf/{}";
+static const char* FLIGHT_SERVER_FILENAME = "python_udf_server.py";
+static const char* EXECUTABLE_PYTHON_FILENAME = "python";
+
+inline std::string get_base_unix_socket_path() {
+    return fmt::format(BASE_UNIX_SOCKET_PATH_TEMPLATE, UNIX_SOCKET_PREFIX,
+                       std::getenv("DORIS_HOME"));
+}
+
+inline std::string get_unix_socket_path(pid_t child_pid) {
+    return fmt::format(UNIX_SOCKET_PATH_TEMPLATE, get_base_unix_socket_path(), child_pid);
+}
+
+inline std::string get_unix_socket_file_path(pid_t child_pid) {
+    return fmt::format(UNIX_SOCKET_PATH_TEMPLATE,
+                       fmt::format(BASE_UNIX_SOCKET_PATH_TEMPLATE, "", std::getenv("DORIS_HOME")),
+                       child_pid);
+}
+
+inline std::string get_fight_server_path() {
+    return fmt::format(FLIGHT_SERVER_PATH_TEMPLATE, std::getenv("DORIS_HOME"),
+                       FLIGHT_SERVER_FILENAME);
+}
+
+class PythonUDFProcess;
+class PythonUDFProcessPool;
+
+using ProcessPtr = std::unique_ptr<PythonUDFProcess>;
+using PythonUDFProcessPoolPtr = std::unique_ptr<PythonUDFProcessPool>;
+
+class PythonUDFProcess {
+public:
+    PythonUDFProcess(boost::process::child child, boost::process::ipstream output_stream,
+                     PythonUDFProcessPool* pool)
+            : _is_shutdown(false),
+              _uri(get_unix_socket_path(child.id())),
+              _unix_socket_file_path(get_unix_socket_file_path(child.id())),
+              _child(std::move(child)),
+              _output_stream(std::move(output_stream)),
+              _pool(pool) {}
+
+    ~PythonUDFProcess() { shutdown(); }
+
+    std::string get_uri() const { return _uri; }
+
+    const std::string& get_socket_file_path() const { return _unix_socket_file_path; }
+
+    bool is_shutdown() const { return _is_shutdown; }
+
+    bool is_alive() const {
+        if (_is_shutdown) return false;
+        return _child.running();
+    }
+
+    void remove_unix_socket();
+
+    void shutdown();
+
+    std::string to_string() const;
+
+    PythonUDFProcessPool* pool() const { return _pool; }
+
+private:
+    constexpr static int TERMINATE_RETRY_TIMES = 10;
+    constexpr static size_t MAX_ACCUMULATED_LOG_SIZE = 65536;
+
+    bool _is_shutdown {false};
+    std::string _uri;
+    std::string _unix_socket_file_path;
+    mutable boost::process::child _child;
+    boost::process::ipstream _output_stream;
+    std::string _accumulated_log;
+    PythonUDFProcessPool* _pool {nullptr};
+};
+
+class PythonUDFProcessPool {
+public:
+    explicit PythonUDFProcessPool(PythonVersion version, size_t max_pool_size, size_t min_idle)
+            : _python_version(version),
+              _max_pool_size(max_pool_size),
+              _init_pool_size(min_idle),
+              _current_size(0),
+              _is_shutdown(false) {}
+
+    explicit PythonUDFProcessPool(PythonVersion version)
+            : _python_version(version),
+              _max_pool_size(16),
+              _init_pool_size(4),
+              _current_size(0),
+              _is_shutdown(false) {}
+
+    Status init();
+
+    Status borrow_process(ProcessPtr* process);
+
+    void return_process(ProcessPtr process);
+
+    void shutdown();
+
+    const PythonVersion& get_python_version() const { return _python_version; }
+
+private:
+    PythonVersion _python_version;
+    size_t _max_pool_size;
+    size_t _init_pool_size;
+    size_t _current_size;
+    bool _is_shutdown;
+    std::queue<ProcessPtr> _idle_processes;
+    // protect _idle_processes, _is_shutdown and _current_size
+    mutable std::mutex _mtx;
+    // condition variable to notify waiting threads when a process is returned
+    std::condition_variable _cv;
+};
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_server.cpp b/be/src/udf/python/python_udf_server.cpp
new file mode 100644
index 0000000..a74008d
--- /dev/null
+++ b/be/src/udf/python/python_udf_server.cpp

@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/python/python_udf_server.h"
+
+#include <butil/fd_utility.h>
+#include <dirent.h>
+#include <fmt/core.h>
+#include <sys/poll.h>
+
+#include <boost/asio.hpp>
+#include <boost/process.hpp>
+
+#include "common/config.h"
+#include "udf/python/python_udf_client.h"
+
+namespace doris {
+
+Status PythonUDFServerManager::init(const std::vector<PythonVersion>& versions) {
+    std::lock_guard<std::mutex> lock(_pools_mutex);
+    for (const auto& version : versions) {
+        if (_pools.find(version) != _pools.end()) continue;
+        PythonUDFProcessPoolPtr new_pool = std::make_unique<PythonUDFProcessPool>(
+                version, config::max_python_process_nums, config::min_python_process_nums);
+        RETURN_IF_ERROR(new_pool->init());
+        _pools[version] = std::move(new_pool);
+    }
+    return Status::OK();
+}
+
+Status PythonUDFServerManager::get_client(const PythonUDFMeta& func_meta,
+                                          const PythonVersion& version,
+                                          PythonUDFClientPtr* client) {
+    PythonUDFProcessPoolPtr* pool = nullptr;
+    {
+        std::lock_guard<std::mutex> lock(_pools_mutex);
+        if (_pools.find(version) == _pools.end()) {
+            PythonUDFProcessPoolPtr new_pool = std::make_unique<PythonUDFProcessPool>(
+                    version, config::max_python_process_nums, config::min_python_process_nums);
+            RETURN_IF_ERROR(new_pool->init());
+            _pools[version] = std::move(new_pool);
+        }
+        pool = &_pools[version];
+    }
+    ProcessPtr process;
+    RETURN_IF_ERROR((*pool)->borrow_process(&process));
+    RETURN_IF_ERROR(PythonUDFClient::create(func_meta, std::move(process), client));
+    return Status::OK();
+}
+
+Status PythonUDFServerManager::fork(PythonUDFProcessPool* pool, ProcessPtr* process) {
+    DCHECK(pool != nullptr);
+    const PythonVersion& version = pool->get_python_version();
+    // e.g. /usr/local/python3.7/bin/python3
+    std::string python_executable_path = version.get_executable_path();
+    // e.g. /{DORIS_HOME}/plugins/python_udf/python_udf_server.py
+    std::string fight_server_path = get_fight_server_path();
+    // e.g. grpc+unix:///home/doris/output/be/lib/udf/python/python_udf
+    std::string base_unix_socket_path = get_base_unix_socket_path();
+    std::vector<std::string> args = {"-u", // unbuffered output
+                                     fight_server_path, base_unix_socket_path};
+    boost::process::environment env = boost::this_process::environment();
+    boost::process::ipstream child_output; // input stream from child
+
+    try {
+        boost::process::child c(
+                python_executable_path, args, boost::process::std_out > child_output,
+                boost::process::env = env,
+                boost::process::on_exit([](int exit_code, const std::error_code& ec) {
+                    if (ec) {
+                        LOG(WARNING) << "Python UDF server exited with error: " << ec.message();
+                    }
+                }));
+
+        std::string log_line;
+        std::string full_log;
+        bool started_successfully = false;
+        std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
+        const auto timeout = std::chrono::milliseconds(5000);
+
+        while (std::chrono::steady_clock::now() - start < timeout) {
+            if (std::getline(child_output, log_line)) {
+                full_log += log_line + "\n";
+                LOG(INFO) << fmt::format("Start python server, log_line: {}, full_log: {}",
+                                         log_line, full_log);
+                if (log_line == "Start python server successfully") {
+                    started_successfully = true;
+                    break;
+                }
+            } else {
+                if (!c.running()) {
+                    break;
+                }
+                std::this_thread::sleep_for(std::chrono::milliseconds(10));
+            }
+        }
+
+        if (!started_successfully) {
+            if (c.running()) {
+                c.terminate(); // terminate() sends SIGTERM on Unix
+                c.wait();      // wait for exit to avoid zombie processes
+            }
+
+            std::string error_msg = full_log.empty() ? "No output from Python server" : full_log;
+            LOG(ERROR) << "Python server start failed:\n" << error_msg;
+            return Status::InternalError("python server start failed:\n{}", error_msg);
+        }
+
+        *process = std::make_unique<PythonUDFProcess>(std::move(c), std::move(child_output), pool);
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to start Python UDF server: {}", e.what());
+    }
+
+    return Status::OK();
+}
+
+void PythonUDFServerManager::shutdown() {
+    std::lock_guard lock(_pools_mutex);
+    for (auto& pool : _pools) {
+        pool.second->shutdown();
+    }
+    _pools.clear();
+    LOG(INFO) << "Python UDF server manager shutdown successfully";
+}
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_server.h b/be/src/udf/python/python_udf_server.h
new file mode 100644
index 0000000..b21b874
--- /dev/null
+++ b/be/src/udf/python/python_udf_server.h

@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "udf/python/python_udf_client.h"
+#include "udf/python/python_udf_meta.h"
+#include "udf/python/python_udf_runtime.h"
+
+namespace doris {
+
+class PythonUDFServerManager {
+public:
+    PythonUDFServerManager() = default;
+
+    ~PythonUDFServerManager() = default;
+
+    static PythonUDFServerManager& instance() {
+        static PythonUDFServerManager instance;
+        return instance;
+    }
+
+    Status init(const std::vector<PythonVersion>& versions);
+
+    Status get_client(const PythonUDFMeta& func_meta, const PythonVersion& version,
+                      PythonUDFClientPtr* client);
+
+    Status fork(PythonUDFProcessPool* pool, ProcessPtr* process);
+
+    void shutdown();
+
+private:
+    std::unordered_map<PythonVersion, PythonUDFProcessPoolPtr> _pools;
+    // protect _pools
+    std::mutex _pools_mutex;
+};
+
+} // namespace doris
\ No newline at end of file

diff --git a/be/src/udf/python/python_udf_server.py b/be/src/udf/python/python_udf_server.py
new file mode 100644
index 0000000..7095b52
--- /dev/null
+++ b/be/src/udf/python/python_udf_server.py

@@ -0,0 +1,1054 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import argparse
+import base64
+import importlib
+import inspect
+import json
+import sys
+import os
+import traceback
+import logging
+import time
+import threading
+from abc import ABC, abstractmethod
+from contextlib import contextmanager
+from typing import Any, Callable, Optional, Tuple, get_origin
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+
+import pandas as pd
+import pyarrow as pa
+from pyarrow import flight
+
+
+class ServerState:
+    """Global server state container."""
+
+    unix_socket_path: str = ""
+
+    @staticmethod
+    def setup_logging():
+        """Setup logging configuration for the UDF server."""
+        doris_home = os.getenv("DORIS_HOME")
+        if not doris_home:
+            # Fallback to current directory if DORIS_HOME is not set
+            doris_home = os.getcwd()
+
+        log_dir = os.path.join(doris_home, "lib", "udf", "python")
+        os.makedirs(log_dir, exist_ok=True)
+        log_file = os.path.join(log_dir, "python_udf_output.log")
+
+        logging.basicConfig(
+            level=logging.INFO,
+            format="[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s",
+            handlers=[
+                logging.FileHandler(log_file, mode="a", encoding="utf-8"),
+                logging.StreamHandler(sys.stderr),  # Also log to stderr for debugging
+            ],
+        )
+        logging.info("Logging initialized. Log file: %s", log_file)
+
+    @staticmethod
+    def extract_base_unix_socket_path(unix_socket_uri: str) -> str:
+        """
+        Extract the file system path from a gRPC Unix socket URI.
+
+        Args:
+            unix_socket_uri: URI in format 'grpc+unix:///path/to/socket'
+
+        Returns:
+            The file system path without the protocol prefix
+        """
+        if unix_socket_uri.startswith("grpc+unix://"):
+            unix_socket_uri = unix_socket_uri[len("grpc+unix://") :]
+        return unix_socket_uri
+
+    @staticmethod
+    def remove_unix_socket(unix_socket_uri: str) -> None:
+        """
+        Remove the Unix domain socket file if it exists.
+
+        Args:
+            unix_socket_uri: URI of the Unix socket to remove
+        """
+        if unix_socket_uri is None:
+            return
+        base_unix_socket_path = ServerState.extract_base_unix_socket_path(
+            unix_socket_uri
+        )
+        if os.path.exists(base_unix_socket_path):
+            try:
+                os.unlink(base_unix_socket_path)
+                logging.info(
+                    "Removed UNIX socket %s successfully", base_unix_socket_path
+                )
+            except OSError as e:
+                logging.error(
+                    "Failed to remove UNIX socket %s: %s", base_unix_socket_path, e
+                )
+        else:
+            logging.warning("UNIX socket %s does not exist", base_unix_socket_path)
+
+    @staticmethod
+    def monitor_parent_exit():
+        """
+        Monitor the parent process and exit gracefully if it dies.
+        This prevents orphaned UDF server processes.
+        """
+        parent_pid = os.getppid()
+        if parent_pid == 1:
+            # Parent process is init, no need to monitor
+            logging.info("Parent process is init (PID 1), skipping parent monitoring")
+            return
+
+        logging.info("Started monitoring parent process (PID: %s)", parent_pid)
+
+        while True:
+            try:
+                # os.kill(pid, 0) only checks whether the process exists
+                # without sending an actual signal
+                os.kill(parent_pid, 0)
+            except OSError:
+                # Parent process died
+                ServerState.remove_unix_socket(ServerState.unix_socket_path)
+                logging.error(
+                    "Parent process %s died, exiting UDF server, unix socket path: %s",
+                    parent_pid,
+                    ServerState.unix_socket_path,
+                )
+                os._exit(0)
+            # Check every 2 seconds
+            time.sleep(2)
+
+
+ServerState.setup_logging()
+monitor_thread = threading.Thread(target=ServerState.monitor_parent_exit, daemon=True)
+monitor_thread.start()
+
+
+@contextmanager
+def temporary_sys_path(path: str):
+    """
+    Context manager to temporarily add a path to sys.path.
+    Ensures the path is removed after use to avoid pollution.
+
+    Args:
+        path: Directory path to add to sys.path
+
+    Yields:
+        None
+    """
+    path_added = False
+    if path not in sys.path:
+        sys.path.insert(0, path)
+        path_added = True
+        logging.debug("Temporarily added to sys.path: %s", path)
+
+    try:
+        yield
+    finally:
+        if path_added and path in sys.path:
+            sys.path.remove(path)
+            logging.debug("Removed from sys.path: %s", path)
+
+
+class VectorType(Enum):
+    """Enum representing supported vector types."""
+
+    LIST = "list"
+    PANDAS_SERIES = "pandas.Series"
+    ARROW_ARRAY = "pyarrow.Array"
+
+    @property
+    def python_type(self):
+        """
+        Returns the Python type corresponding to this VectorType.
+
+        Returns:
+            The Python type class (list, pd.Series, or pa.Array)
+        """
+        mapping = {
+            VectorType.LIST: list,
+            VectorType.PANDAS_SERIES: pd.Series,
+            VectorType.ARROW_ARRAY: pa.Array,
+        }
+        return mapping[self]
+
+    @staticmethod
+    def resolve_vector_type(param: inspect.Parameter):
+        """
+        Resolves the param's type annotation to the corresponding VectorType enum.
+        Returns None if the type is unsupported or not a vector type.
+        """
+        if (
+            param is None
+            or param.annotation is None
+            or param.annotation is inspect.Parameter.empty
+        ):
+            return None
+
+        annotation = param.annotation
+        origin = get_origin(annotation)
+        raw_type = origin if origin is not None else annotation
+
+        if raw_type is list:
+            return VectorType.LIST
+        if raw_type is pd.Series:
+            return VectorType.PANDAS_SERIES
+
+        return None
+
+
+class PythonUDFMeta:
+    """Metadata container for a Python UDF."""
+
+    def __init__(
+        self,
+        name: str,
+        symbol: str,
+        location: str,
+        udf_load_type: int,
+        runtime_version: str,
+        always_nullable: bool,
+        inline_code: bytes,
+        input_types: pa.Schema,
+        output_type: pa.DataType,
+    ) -> None:
+        """
+        Initialize Python UDF metadata.
+
+        Args:
+            name: UDF function name
+            symbol: Symbol to load (function name or module.function)
+            location: File path or directory containing the UDF
+            udf_load_type: 0 for inline code, 1 for module
+            runtime_version: Python runtime version requirement
+            always_nullable: Whether the UDF can return NULL values
+            inline_code: Base64-encoded inline Python code (if applicable)
+            input_types: PyArrow schema for input parameters
+            output_type: PyArrow data type for return value
+        """
+        self.name = name
+        self.symbol = symbol
+        self.location = location
+        self.udf_load_type = udf_load_type
+        self.runtime_version = runtime_version
+        self.always_nullable = always_nullable
+        self.inline_code = inline_code
+        self.input_types = input_types
+        self.output_type = output_type
+
+    def __str__(self) -> str:
+        """Returns a string representation of the UDF metadata."""
+        udf_load_type_str = "INLINE" if self.udf_load_type == 0 else "MODULE"
+        return (
+            f"PythonUDFMeta(name={self.name}, symbol={self.symbol}, "
+            f"location={self.location}, udf_load_type={udf_load_type_str}, runtime_version={self.runtime_version}, "
+            f"always_nullable={self.always_nullable}, inline_code={self.inline_code}, "
+            f"input_types={self.input_types}, output_type={self.output_type})"
+        )
+
+
+class AdaptivePythonUDF:
+    """
+    A wrapper around a UDF function that supports both scalar and vectorized execution modes.
+    The mode is determined by the type hints of the function parameters.
+    """
+
+    def __init__(self, python_udf_meta: PythonUDFMeta, func: Callable) -> None:
+        """
+        Initialize the adaptive UDF wrapper.
+
+        Args:
+            python_udf_meta: Metadata describing the UDF
+            func: The actual Python function to execute
+        """
+        self.python_udf_meta = python_udf_meta
+        self._eval_func = func
+
+    def __str__(self) -> str:
+        """Returns a string representation of the UDF wrapper."""
+        input_type_strs = [str(t) for t in self.python_udf_meta.input_types.types]
+        output_type_str = str(self.python_udf_meta.output_type)
+        eval_func_str = f"{self.python_udf_meta.name}({', '.join(input_type_strs)}) -> {output_type_str}"
+        return f"AdaptivePythonUDF(python_udf_meta: {self.python_udf_meta}, eval_func: {eval_func_str})"
+
+    def __call__(self, record_batch: pa.RecordBatch) -> pa.Array:
+        """
+        Executes the UDF on the given record batch. Supports both scalar and vectorized modes.
+
+        :param record_batch: Input data with N columns, each of length num_rows
+        :return: Output array of length num_rows
+        """
+        if record_batch.num_rows == 0:
+            return pa.array([], type=self._get_output_type())
+
+        if self._should_use_vectorized():
+            logging.info("Using vectorized mode for UDF: %s", self.python_udf_meta.name)
+            return self._vectorized_call(record_batch)
+
+        logging.info("Using scalar mode for UDF: %s", self.python_udf_meta.name)
+        return self._scalar_call(record_batch)
+
+    @staticmethod
+    def _cast_arrow_to_vector(arrow_array: pa.Array, vec_type: VectorType):
+        """
+        Convert a pa.Array to an instance of the specified VectorType.
+        """
+        if vec_type == VectorType.LIST:
+            return arrow_array.to_pylist()
+        elif vec_type == VectorType.PANDAS_SERIES:
+            return arrow_array.to_pandas()
+        else:
+            raise ValueError(f"Unsupported vector type: {vec_type}")
+
+    def _should_use_vectorized(self) -> bool:
+        """
+        Determines whether to use vectorized mode based on parameter type annotations.
+        Returns True if any parameter is annotated as:
+            - list
+            - pd.Series
+        """
+        try:
+            signature = inspect.signature(self._eval_func)
+        except ValueError:
+            # Cannot inspect built-in or C functions; default to scalar
+            return False
+
+        for param in signature.parameters.values():
+            if VectorType.resolve_vector_type(param):
+                return True
+
+        return False
+
+    def _convert_from_arrow_to_py(self, field):
+        if field is None:
+            return None
+
+        if pa.types.is_map(field.type):
+            # pyarrow.lib.MapScalar's as_py() returns a list of tuples, convert to dict
+            list_of_tuples = field.as_py()
+            return dict(list_of_tuples) if list_of_tuples is not None else None
+        return field.as_py()
+
+    def _scalar_call(self, record_batch: pa.RecordBatch) -> pa.Array:
+        """
+        Applies the UDF in scalar mode: one row at a time.
+
+        Args:
+            record_batch: Input data batch
+
+        Returns:
+            Output array with results for each row
+        """
+        columns = record_batch.columns
+        num_rows = record_batch.num_rows
+        result = []
+
+        for i in range(num_rows):
+            converted_args = [self._convert_from_arrow_to_py(col[i]) for col in columns]
+
+            try:
+                res = self._eval_func(*converted_args)
+                # Check if result is None when always_nullable is False
+                if res is None and not self.python_udf_meta.always_nullable:
+                    raise RuntimeError(
+                        f"the result of row {i} is null, but the return type is not nullable, "
+                        f"please check the always_nullable property in create function statement, "
+                        f"it should be true"
+                    )
+                result.append(res)
+            except Exception as e:
+                logging.error(
+                    "Error in scalar UDF execution at row %s: %s\nArgs: %s\nTraceback: %s",
+                    i,
+                    e,
+                    converted_args,
+                    traceback.format_exc(),
+                )
+                # Return None for failed rows if always_nullable is True
+                if self.python_udf_meta.always_nullable:
+                    result.append(None)
+                else:
+                    raise
+
+        return pa.array(result, type=self._get_output_type(), from_pandas=True)
+
+    def _vectorized_call(self, record_batch: pa.RecordBatch) -> pa.Array:
+        """
+        Applies the UDF in vectorized mode: processes entire columns at once.
+
+        Args:
+            record_batch: Input data batch
+
+        Returns:
+            Output array with results
+        """
+        column_args = record_batch.columns
+        logging.info("Vectorized call with %s columns", len(column_args))
+
+        sig = inspect.signature(self._eval_func)
+        params = list(sig.parameters.values())
+
+        if len(column_args) != len(params):
+            raise ValueError(f"UDF expects {len(params)} args, got {len(column_args)}")
+
+        converted_args = []
+        for param, arrow_col in zip(params, column_args):
+            vec_type = VectorType.resolve_vector_type(param)
+
+            if vec_type is None:
+                # For scalar types (int, float, str, etc.), extract the first value
+                # instead of converting to list
+                pylist = arrow_col.to_pylist()
+                if len(pylist) > 0:
+                    converted = pylist[0]
+                    logging.info(
+                        "Converted %s to scalar (first value): %s",
+                        param.name,
+                        type(converted).__name__,
+                    )
+                else:
+                    converted = None
+                    logging.info(
+                        "Converted %s to scalar (None, empty column)", param.name
+                    )
+            else:
+                converted = self._cast_arrow_to_vector(arrow_col, vec_type)
+                logging.info("Converted %s: %s", param.name, vec_type)
+
+            converted_args.append(converted)
+
+        try:
+            result = self._eval_func(*converted_args)
+        except Exception as e:
+            logging.error(
+                "Error in vectorized UDF: %s\nTraceback: %s", e, traceback.format_exc()
+            )
+            raise RuntimeError(f"Error in vectorized UDF: {e}") from e
+
+        # Convert result to PyArrow Array
+        result_array = None
+        if isinstance(result, pa.Array):
+            result_array = result
+        elif isinstance(result, pa.ChunkedArray):
+            # Combine chunks into a single array
+            result_array = pa.concat_arrays(result.chunks)
+        elif isinstance(result, pd.Series):
+            result_array = pa.array(result, type=self._get_output_type())
+        elif isinstance(result, list):
+            result_array = pa.array(
+                result, type=self._get_output_type(), from_pandas=True
+            )
+        else:
+            # Scalar result - broadcast to all rows
+            out_type = self._get_output_type()
+            logging.warning(
+                "UDF returned scalar value, broadcasting to %s rows",
+                record_batch.num_rows,
+            )
+            result_array = pa.array([result] * record_batch.num_rows, type=out_type)
+
+        # Check for None values when always_nullable is False
+        if not self.python_udf_meta.always_nullable:
+            null_count = result_array.null_count
+            if null_count > 0:
+                # Find the first null index for error message
+                for i, value in enumerate(result_array):
+                    if value.is_valid is False:
+                        raise RuntimeError(
+                            f"the result of row {i} is null, but the return type is not nullable, "
+                            f"please check the always_nullable property in create function statement, "
+                            f"it should be true"
+                        )
+
+        return result_array
+
+    def _get_output_type(self) -> pa.DataType:
+        """
+        Returns the expected output type for the UDF.
+
+        Returns:
+            PyArrow DataType for the output
+        """
+        return self.python_udf_meta.output_type or pa.null()
+
+
+class UDFLoader(ABC):
+    """Abstract base class for loading UDFs from different sources."""
+
+    def __init__(self, python_udf_meta: PythonUDFMeta) -> None:
+        """
+        Initialize the UDF loader.
+
+        Args:
+            python_udf_meta: Metadata describing the UDF to load
+        """
+        self.python_udf_meta = python_udf_meta
+
+    @abstractmethod
+    def load(self) -> AdaptivePythonUDF:
+        """Load the UDF and return an AdaptivePythonUDF wrapper."""
+        raise NotImplementedError("Subclasses must implement load().")
+
+
+class InlineUDFLoader(UDFLoader):
+    """Loads a UDF defined directly in inline code."""
+
+    def load(self) -> AdaptivePythonUDF:
+        """
+        Load and execute inline Python code to extract the UDF function.
+
+        Returns:
+            AdaptivePythonUDF wrapper around the loaded function
+
+        Raises:
+            RuntimeError: If code execution fails
+            ValueError: If the function is not found or not callable
+        """
+        symbol = self.python_udf_meta.symbol
+        inline_code = self.python_udf_meta.inline_code.decode("utf-8")
+        env: dict[str, Any] = {}
+        logging.info("Loading inline code for function '%s'", symbol)
+        logging.debug("Inline code:\n%s", inline_code)
+
+        try:
+            # Execute the code in a clean environment
+            # pylint: disable=exec-used
+            # Note: exec() is necessary here for dynamic UDF loading from inline code
+            exec(inline_code, env)  # nosec B102
+        except Exception as e:
+            logging.error(
+                "Failed to exec inline code: %s\nTraceback: %s",
+                e,
+                traceback.format_exc(),
+            )
+            raise RuntimeError(f"Failed to exec inline code: {e}") from e
+
+        func = env.get(symbol)
+        if func is None:
+            available_funcs = [
+                k for k, v in env.items() if callable(v) and not k.startswith("_")
+            ]
+            logging.error(
+                "Function '%s' not found in inline code. Available functions: %s",
+                symbol,
+                available_funcs,
+            )
+            raise ValueError(f"Function '{symbol}' not found in inline code.")
+
+        if not callable(func):
+            logging.error(
+                "'%s' exists but is not callable (type: %s)", symbol, type(func)
+            )
+            raise ValueError(f"'{symbol}' is not a callable function.")
+
+        logging.info("Successfully loaded function '%s' from inline code", symbol)
+        return AdaptivePythonUDF(self.python_udf_meta, func)
+
+
+class ModuleUDFLoader(UDFLoader):
+    """Loads a UDF from a Python module file (.py)."""
+
+    def load(self) -> AdaptivePythonUDF:
+        """
+        Loads a UDF from a Python module file.
+
+        Returns:
+            AdaptivePythonUDF instance wrapping the loaded function
+
+        Raises:
+            ValueError: If module file not found
+            TypeError: If symbol is not callable
+        """
+        symbol = self.python_udf_meta.symbol  # [package_name.]module_name.function_name
+        location = self.python_udf_meta.location  # /path/to/module_name[.py]
+
+        if not os.path.exists(location):
+            raise ValueError(f"Module file not found: {location}")
+
+        package_name, module_name, func_name = self.parse_symbol(symbol)
+        func = self.load_udf_from_module(location, package_name, module_name, func_name)
+
+        if not callable(func):
+            raise TypeError(
+                f"'{symbol}' exists but is not callable (type: {type(func).__name__})"
+            )
+
+        logging.info(
+            "Successfully loaded function '%s' from module: %s", symbol, location
+        )
+        return AdaptivePythonUDF(self.python_udf_meta, func)
+
+    def parse_symbol(self, symbol: str):
+        """
+        Parse symbol into (package_name, module_name, func_name)
+
+        Supported formats:
+        - "module.func"           → (None, module, func)
+        - "package.module.func"   → (package, "module", func)
+        """
+        if not symbol or "." not in symbol:
+            raise ValueError(
+                f"Invalid symbol format: '{symbol}'. "
+                "Expected 'module.function' or 'package.module.function'"
+            )
+
+        parts = symbol.split(".")
+        if len(parts) == 2:
+            # module.func → Single-file mode
+            module_name, func_name = parts
+            package_name = None
+            if not module_name or not module_name.strip():
+                raise ValueError(f"Module name is empty in symbol: '{symbol}'")
+            if not func_name or not func_name.strip():
+                raise ValueError(f"Function name is empty in symbol: '{symbol}'")
+        elif len(parts) > 2:
+            package_name = parts[0]
+            module_name = ".".join(parts[1:-1])
+            func_name = parts[-1]
+            if not package_name or not package_name.strip():
+                raise ValueError(f"Package name is empty in symbol: '{symbol}'")
+            if not module_name or not module_name.strip():
+                raise ValueError(f"Module name is empty in symbol: '{symbol}'")
+            if not func_name or not func_name.strip():
+                raise ValueError(f"Function name is empty in symbol: '{symbol}'")
+        else:
+            raise ValueError(f"Invalid symbol format: '{symbol}'")
+
+        logging.debug(
+            "Parsed symbol: package=%s, module=%s, func=%s",
+            package_name,
+            module_name,
+            func_name,
+        )
+        return package_name, module_name, func_name
+
+    def _validate_location(self, location: str) -> None:
+        """Validate that the location is a valid directory."""
+        if not os.path.isdir(location):
+            raise ValueError(f"Location is not a directory: {location}")
+
+    def _get_or_import_module(self, location: str, full_module_name: str) -> Any:
+        """Get module from cache or import it."""
+        if full_module_name in sys.modules:
+            logging.warning(
+                "Module '%s' already loaded, using cached version", full_module_name
+            )
+            return sys.modules[full_module_name]
+
+        with temporary_sys_path(location):
+            return importlib.import_module(full_module_name)
+
+    def _extract_function(
+        self, module: Any, func_name: str, module_name: str
+    ) -> Callable:
+        """Extract and validate function from module."""
+        func = getattr(module, func_name, None)
+        if func is None:
+            raise AttributeError(
+                f"Function '{func_name}' not found in module '{module_name}'"
+            )
+        if not callable(func):
+            raise TypeError(f"'{func_name}' is not callable")
+        return func
+
+    def _load_single_file_udf(
+        self, location: str, module_name: str, func_name: str
+    ) -> Callable:
+        """Load UDF from a single Python file."""
+        py_file = os.path.join(location, f"{module_name}.py")
+        if not os.path.isfile(py_file):
+            raise ImportError(f"Python file not found: {py_file}")
+
+        try:
+            udf_module = self._get_or_import_module(location, module_name)
+            return self._extract_function(udf_module, func_name, module_name)
+        except (ImportError, AttributeError, TypeError) as e:
+            raise ImportError(
+                f"Failed to load single-file UDF '{module_name}.{func_name}': {e}"
+            ) from e
+        except Exception as e:
+            logging.error(
+                "Unexpected error loading UDF: %s\n%s", e, traceback.format_exc()
+            )
+            raise
+
+    def _ensure_package_init(self, package_path: str, package_name: str) -> None:
+        """Ensure __init__.py exists in the package directory."""
+        init_path = os.path.join(package_path, "__init__.py")
+        if not os.path.exists(init_path):
+            logging.warning(
+                "__init__.py not found in package '%s', attempting to create it",
+                package_name,
+            )
+            try:
+                with open(init_path, "w", encoding="utf-8") as f:
+                    f.write(
+                        "# Auto-generated by UDF loader to make directory a Python package\n"
+                    )
+                logging.info("Created __init__.py in %s", package_path)
+            except OSError as e:
+                raise ImportError(
+                    f"Cannot create __init__.py in package '{package_name}': {e}"
+                ) from e
+
+    def _build_full_module_name(self, package_name: str, module_name: str) -> str:
+        """Build the full module name for package mode."""
+        if module_name == "__init__":
+            return package_name
+        return f"{package_name}.{module_name}"
+
+    def _load_package_udf(
+        self, location: str, package_name: str, module_name: str, func_name: str
+    ) -> Callable:
+        """Load UDF from a Python package."""
+        package_path = os.path.join(location, package_name)
+        if not os.path.isdir(package_path):
+            raise ImportError(f"Package '{package_name}' not found in '{location}'")
+
+        self._ensure_package_init(package_path, package_name)
+
+        try:
+            full_module_name = self._build_full_module_name(package_name, module_name)
+            udf_module = self._get_or_import_module(location, full_module_name)
+            return self._extract_function(udf_module, func_name, full_module_name)
+        except (ImportError, AttributeError, TypeError) as e:
+            raise ImportError(
+                f"Failed to load packaged UDF '{package_name}.{module_name}.{func_name}': {e}"
+            ) from e
+        except Exception as e:
+            logging.error(
+                "Unexpected error loading packaged UDF: %s\n%s",
+                e,
+                traceback.format_exc(),
+            )
+            raise
+
+    def load_udf_from_module(
+        self,
+        location: str,
+        package_name: Optional[str],
+        module_name: str,
+        func_name: str,
+    ) -> Callable:
+        """
+        Load a UDF from a Python module, supporting both:
+        1. Single-file mode: package_name=None, module_name="your_file"
+        2. Package mode: package_name="your_pkg", module_name="submodule" or "__init__"
+
+        Args:
+            location:
+                - In package mode: parent directory of the package
+                - In single-file mode: directory containing the .py file
+            package_name:
+                - If None or empty: treat as single-file mode
+                - Else: standard package name
+            module_name:
+                - In package mode: submodule name (e.g., "main") or "__init__"
+                - In single-file mode: filename without .py (e.g., "udf_script")
+            func_name: name of the function to load
+
+        Returns:
+            The callable UDF function.
+        """
+        self._validate_location(location)
+
+        if not package_name or package_name.strip() == "":
+            return self._load_single_file_udf(location, module_name, func_name)
+        else:
+            return self._load_package_udf(
+                location, package_name, module_name, func_name
+            )
+
+
+class UDFLoaderFactory:
+    """Factory to select the appropriate loader based on UDF location."""
+
+    @staticmethod
+    def get_loader(python_udf_meta: PythonUDFMeta) -> UDFLoader:
+        """
+        Factory method to create the appropriate UDF loader based on metadata.
+
+        Args:
+            python_udf_meta: UDF metadata containing load type and location
+
+        Returns:
+            Appropriate UDFLoader instance (InlineUDFLoader or ModuleUDFLoader)
+
+        Raises:
+            ValueError: If UDF load type or location is unsupported
+        """
+        location = python_udf_meta.location
+        udf_load_type = python_udf_meta.udf_load_type  # 0: inline, 1: module
+
+        if udf_load_type == 0:
+            return InlineUDFLoader(python_udf_meta)
+        elif udf_load_type == 1:
+            if UDFLoaderFactory.check_module(location):
+                return ModuleUDFLoader(python_udf_meta)
+            else:
+                raise ValueError(f"Unsupported UDF location: {location}")
+        else:
+            raise ValueError(f"Unsupported UDF load type: {udf_load_type}")
+
+    @staticmethod
+    def check_module(location: str) -> bool:
+        """
+        Checks if a location is a valid Python module or package.
+
+        A valid module is either:
+        - A .py file, or
+        - A directory containing __init__.py (i.e., a package).
+
+        Raises:
+            ValueError: If the location does not exist or contains no Python module.
+
+        Returns:
+            True if valid.
+        """
+        if not os.path.exists(location):
+            raise ValueError(f"Module not found: {location}")
+
+        if os.path.isfile(location):
+            if location.endswith(".py"):
+                return True
+            else:
+                raise ValueError(f"File is not a Python module (.py): {location}")
+
+        if os.path.isdir(location):
+            if UDFLoaderFactory.has_python_file_recursive(location):
+                return True
+            else:
+                raise ValueError(
+                    f"Directory contains no Python (.py) files: {location}"
+                )
+
+        raise ValueError(f"Invalid module location (not file or directory): {location}")
+
+    @staticmethod
+    def has_python_file_recursive(location: str) -> bool:
+        """
+        Recursively checks if a directory contains any Python (.py) files.
+
+        Args:
+            location: Directory path to search
+
+        Returns:
+            True if at least one .py file is found, False otherwise
+        """
+        path = Path(location)
+        if not path.is_dir():
+            return False
+        return any(path.rglob("*.py"))
+
+
+class UDFFlightServer(flight.FlightServerBase):
+    """Arrow Flight server for executing Python UDFs."""
+
+    @staticmethod
+    def parse_python_udf_meta(
+        descriptor: flight.FlightDescriptor,
+    ) -> Optional[PythonUDFMeta]:
+        """Parses UDF metadata from a command descriptor."""
+
+        if descriptor.descriptor_type != flight.DescriptorType.CMD:
+            logging.error("Invalid descriptor type: %s", descriptor.descriptor_type)
+            return None
+
+        cmd_json = json.loads(descriptor.command)
+        name = cmd_json["name"]
+        symbol = cmd_json["symbol"]
+        location = cmd_json["location"]
+        udf_load_type = cmd_json["udf_load_type"]
+        runtime_version = cmd_json["runtime_version"]
+        always_nullable = cmd_json["always_nullable"]
+
+        inline_code = base64.b64decode(cmd_json["inline_code"])
+        input_binary = base64.b64decode(cmd_json["input_types"])
+        output_binary = base64.b64decode(cmd_json["return_type"])
+
+        input_schema = pa.ipc.read_schema(pa.BufferReader(input_binary))
+        output_schema = pa.ipc.read_schema(pa.BufferReader(output_binary))
+
+        if len(output_schema) != 1:
+            logging.error(
+                "Output schema must have exactly one field: %s", output_schema
+            )
+            return None
+
+        output_type = output_schema.field(0).type
+
+        return PythonUDFMeta(
+            name=name,
+            symbol=symbol,
+            location=location,
+            udf_load_type=udf_load_type,
+            runtime_version=runtime_version,
+            always_nullable=always_nullable,
+            inline_code=inline_code,
+            input_types=input_schema,
+            output_type=output_type,
+        )
+
+    @staticmethod
+    def check_schema(
+        record_batch: pa.RecordBatch, expected_schema: pa.Schema
+    ) -> Tuple[bool, str]:
+        """
+        Validates that the input RecordBatch schema matches the expected schema.
+        Checks that field count and types match, but field names can differ.
+
+        :return: (result, error_message)
+        """
+        actual = record_batch.schema
+        expected = expected_schema
+
+        logging.info(f"Actual schema: {actual}")
+        logging.info(f"Expected schema: {expected}")
+
+        # Check field count
+        if len(actual) != len(expected):
+            return (
+                False,
+                f"Schema length mismatch, got {len(actual)} fields, expected {len(expected)} fields",
+            )
+
+        # Check each field type (ignore field names)
+        for i, (actual_field, expected_field) in enumerate(zip(actual, expected)):
+            if not actual_field.type.equals(expected_field.type):
+                return False, (
+                    f"Type mismatch at field index {i}, "
+                    f"got {actual_field.type}, expected {expected_field.type}"
+                )
+
+        return True, ""
+
+    def do_exchange(
+        self,
+        context: flight.ServerCallContext,
+        descriptor: flight.FlightDescriptor,
+        reader: flight.MetadataRecordBatchReader,
+        writer: flight.MetadataRecordBatchWriter,
+    ) -> None:
+        """Handles bidirectional streaming UDF execution."""
+        logging.info("Received exchange request for UDF: %s", descriptor)
+
+        python_udf_meta = UDFFlightServer.parse_python_udf_meta(descriptor)
+        if not python_udf_meta:
+            raise ValueError("Invalid or missing UDF metadata in descriptor")
+
+        loader = UDFLoaderFactory.get_loader(python_udf_meta)
+        udf = loader.load()
+        logging.info("Loaded UDF: %s", udf)
+
+        started = False
+        for chunk in reader:
+            if not chunk.data:
+                logging.info("Empty chunk received, skipping")
+                continue
+
+            check_schema_result, error_msg = UDFFlightServer.check_schema(
+                chunk.data, python_udf_meta.input_types
+            )
+            if not check_schema_result:
+                logging.error("Schema mismatch: %s", error_msg)
+                raise ValueError(f"Schema mismatch: {error_msg}")
+
+            result_array = udf(chunk.data)
+
+            if not python_udf_meta.output_type.equals(result_array.type):
+                logging.error(
+                    "Output type mismatch: got %s, expected %s",
+                    result_array.type,
+                    python_udf_meta.output_type,
+                )
+                raise ValueError(
+                    f"Output type mismatch: got {result_array.type}, expected {python_udf_meta.output_type}"
+                )
+
+            result_batch = pa.RecordBatch.from_arrays([result_array], ["result"])
+            if not started:
+                writer.begin(result_batch.schema)
+                started = True
+            writer.write_batch(result_batch)
+
+
+def check_unix_socket_path(unix_socket_path: str) -> bool:
+    """Validates the Unix domain socket path format."""
+    if not unix_socket_path:
+        logging.error("Unix socket path is empty")
+        return False
+
+    if not unix_socket_path.startswith("grpc+unix://"):
+        raise ValueError("gRPC UDS URL must start with 'grpc+unix://'")
+
+    socket_path = unix_socket_path[len("grpc+unix://") :].strip()
+    if not socket_path:
+        logging.error("Extracted socket path is empty")
+        return False
+
+    return True
+
+
+def main(unix_socket_path: str) -> None:
+    """
+    Main entry point for the Python UDF server.
+
+    Args:
+        unix_socket_path: Base path for the Unix domain socket
+
+    Raises:
+        SystemExit: If socket path is invalid or server fails to start
+    """
+    try:
+        if not check_unix_socket_path(unix_socket_path):
+            print(f"ERROR: Invalid socket path: {unix_socket_path}", flush=True)
+            sys.exit(1)
+
+        current_pid = os.getpid()
+        ServerState.unix_socket_path = f"{unix_socket_path}_{current_pid}.sock"
+        server = UDFFlightServer(ServerState.unix_socket_path)
+        print("Start python server successfully", flush=True)
+        logging.info("##### PYTHON UDF SERVER STARTED AT %s #####", datetime.now())
+        server.wait()
+
+    except Exception as e:
+        print(
+            f"ERROR: Failed to start Python UDF server: {type(e).__name__}: {e}",
+            flush=True,
+        )
+        tb_lines = traceback.format_exception(type(e), e, e.__traceback__)
+        if len(tb_lines) > 1:
+            print(f"DETAIL: {tb_lines[-2].strip()}", flush=True)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run an Arrow Flight UDF server over Unix socket."
+    )
+    parser.add_argument(
+        "unix_socket_path",
+        type=str,
+        help="Path to the Unix socket (e.g., grpc+unix:///path/to/socket)",
+    )
+    args = parser.parse_args()
+    main(args.unix_socket_path)

diff --git a/be/src/util/arrow/block_convertor.cpp b/be/src/util/arrow/block_convertor.cpp
index 0da5c22..ab8da71 100644
--- a/be/src/util/arrow/block_convertor.cpp
+++ b/be/src/util/arrow/block_convertor.cpp

@@ -53,38 +53,7 @@
 namespace doris {
 #include "common/compile_check_begin.h"
 
-class FromBlockConverter {
-public:
-    FromBlockConverter(const vectorized::Block& block, const std::shared_ptr<arrow::Schema>& schema,
-                       arrow::MemoryPool* pool, const cctz::time_zone& timezone_obj)
-            : _block(block),
-              _schema(schema),
-              _pool(pool),
-              _cur_field_idx(-1),
-              _timezone_obj(timezone_obj) {}
-
-    ~FromBlockConverter() = default;
-
-    Status convert(std::shared_ptr<arrow::RecordBatch>* out);
-
-private:
-    const vectorized::Block& _block;
-    const std::shared_ptr<arrow::Schema>& _schema;
-    arrow::MemoryPool* _pool;
-
-    size_t _cur_field_idx;
-    size_t _cur_start;
-    size_t _cur_rows;
-    vectorized::ColumnPtr _cur_col;
-    vectorized::DataTypePtr _cur_type;
-    arrow::ArrayBuilder* _cur_builder = nullptr;
-
-    const cctz::time_zone& _timezone_obj;
-
-    std::vector<std::shared_ptr<arrow::Array>> _arrays;
-};
-
-Status FromBlockConverter::convert(std::shared_ptr<arrow::RecordBatch>* out) {
+Status FromBlockToRecordBatchConverter::convert(std::shared_ptr<arrow::RecordBatch>* out) {
     int num_fields = _schema->num_fields();
     if (_block.columns() != num_fields) {
         return Status::InvalidArgument("number fields not match");
@@ -127,13 +96,44 @@
     return Status::OK();
 }
 
+Status FromRecordBatchToBlockConverter::convert(vectorized::Block* block) {
+    DCHECK(block);
+    int num_fields = _batch->num_columns();
+    if ((size_t)num_fields != _types.size()) {
+        return Status::InvalidArgument("number fields not match");
+    }
+
+    int64_t num_rows = _batch->num_rows();
+    _columns.reserve(num_fields);
+
+    for (int idx = 0; idx < num_fields; ++idx) {
+        auto doris_type = _types[idx];
+        auto doris_column = doris_type->create_column();
+        auto arrow_column = _batch->column(idx);
+        DCHECK_EQ(arrow_column->length(), num_rows);
+        RETURN_IF_ERROR(doris_type->get_serde()->read_column_from_arrow(
+                *doris_column, &*arrow_column, 0, num_rows, _timezone_obj));
+        _columns.emplace_back(std::move(doris_column), std::move(doris_type), std::to_string(idx));
+    }
+
+    block->swap(_columns);
+    return Status::OK();
+}
+
 Status convert_to_arrow_batch(const vectorized::Block& block,
                               const std::shared_ptr<arrow::Schema>& schema, arrow::MemoryPool* pool,
                               std::shared_ptr<arrow::RecordBatch>* result,
                               const cctz::time_zone& timezone_obj) {
-    FromBlockConverter converter(block, schema, pool, timezone_obj);
+    FromBlockToRecordBatchConverter converter(block, schema, pool, timezone_obj);
     return converter.convert(result);
 }
 
+Status convert_from_arrow_batch(const std::shared_ptr<arrow::RecordBatch>& batch,
+                                const vectorized::DataTypes& types, vectorized::Block* block,
+                                const cctz::time_zone& timezone_obj) {
+    FromRecordBatchToBlockConverter converter(batch, types, timezone_obj);
+    return converter.convert(block);
+}
+
 #include "common/compile_check_end.h"
 } // namespace doris

diff --git a/be/src/util/arrow/block_convertor.h b/be/src/util/arrow/block_convertor.h
index 6c3163b..7cda9c3 100644
--- a/be/src/util/arrow/block_convertor.h
+++ b/be/src/util/arrow/block_convertor.h

@@ -22,7 +22,9 @@
 #include <memory>
 
 #include "common/status.h"
+#include "vec/columns/column.h"
 #include "vec/core/block.h"
+#include "vec/data_types/data_type.h"
 
 // This file will convert Doris Block to/from Arrow's RecordBatch
 // Block is used by Doris query engine to exchange data between
@@ -38,9 +40,63 @@
 
 namespace doris {
 
+class FromBlockToRecordBatchConverter {
+public:
+    FromBlockToRecordBatchConverter(const vectorized::Block& block,
+                                    const std::shared_ptr<arrow::Schema>& schema,
+                                    arrow::MemoryPool* pool, const cctz::time_zone& timezone_obj)
+            : _block(block),
+              _schema(schema),
+              _pool(pool),
+              _cur_field_idx(-1),
+              _timezone_obj(timezone_obj) {}
+
+    ~FromBlockToRecordBatchConverter() = default;
+
+    Status convert(std::shared_ptr<arrow::RecordBatch>* out);
+
+private:
+    const vectorized::Block& _block;
+    const std::shared_ptr<arrow::Schema>& _schema;
+    arrow::MemoryPool* _pool;
+
+    size_t _cur_field_idx;
+    size_t _cur_start;
+    size_t _cur_rows;
+    vectorized::ColumnPtr _cur_col;
+    vectorized::DataTypePtr _cur_type;
+    arrow::ArrayBuilder* _cur_builder = nullptr;
+
+    const cctz::time_zone& _timezone_obj;
+
+    std::vector<std::shared_ptr<arrow::Array>> _arrays;
+};
+
+class FromRecordBatchToBlockConverter {
+public:
+    FromRecordBatchToBlockConverter(const std::shared_ptr<arrow::RecordBatch>& batch,
+                                    const vectorized::DataTypes& types,
+                                    const cctz::time_zone& timezone_obj)
+            : _batch(batch), _types(types), _timezone_obj(timezone_obj) {}
+
+    ~FromRecordBatchToBlockConverter() = default;
+
+    Status convert(vectorized::Block* block);
+
+private:
+    const std::shared_ptr<arrow::RecordBatch>& _batch;
+    const vectorized::DataTypes& _types;
+    const cctz::time_zone& _timezone_obj;
+    vectorized::ColumnsWithTypeAndName _columns;
+};
+
 Status convert_to_arrow_batch(const vectorized::Block& block,
                               const std::shared_ptr<arrow::Schema>& schema, arrow::MemoryPool* pool,
                               std::shared_ptr<arrow::RecordBatch>* result,
                               const cctz::time_zone& timezone_obj);
 
+Status convert_from_arrow_batch(const std::shared_ptr<arrow::RecordBatch>& batch,
+                                const vectorized::DataTypes& types, vectorized::Block* block,
+                                const cctz::time_zone& timezone_obj);
+
 } // namespace doris

diff --git a/be/src/util/arrow/utils.h b/be/src/util/arrow/utils.h
index 0a731ba..7794906 100644
--- a/be/src/util/arrow/utils.h
+++ b/be/src/util/arrow/utils.h

@@ -17,8 +17,11 @@
 
 #pragma once
 
+#include <arrow/result.h>
+
 #include <iostream>
 
+#include "common/compiler_util.h"
 #include "common/status.h"
 
 // This files contains some utilities to convert Doris internal
@@ -72,4 +75,33 @@
 Status to_doris_status(const arrow::Status& status);
 arrow::Status to_arrow_status(const Status& status);
 
+template <typename T>
+inline void assign_from_result(T& output, const arrow::Result<T>& result) {
+    output = *result;
+}
+
+template <typename T>
+inline void assign_from_result(T& output, arrow::Result<T>&& result) {
+    output = std::move(*result);
+}
+
+template <typename T>
+inline void assign_from_result(T* output, const arrow::Result<T>& result) {
+    *output = *result;
+}
+
+template <typename T>
+inline void assign_from_result(T* output, arrow::Result<T>&& result) {
+    *output = std::move(*result);
+}
+
+#define RETURN_DORIS_STATUS_IF_RESULT_ERROR(output, result_expr)                \
+    do {                                                                        \
+        auto&& _result_ = (result_expr);                                        \
+        if (UNLIKELY(!_result_.ok())) {                                         \
+            return to_doris_status(_result_.status());                          \
+        }                                                                       \
+        assign_from_result(output, std::forward<decltype(_result_)>(_result_)); \
+    } while (0)
+
 } // namespace doris

diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp
index cc61a69..553cb76 100644
--- a/be/src/vec/exprs/vectorized_fn_call.cpp
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp

@@ -17,6 +17,7 @@
 
 #include "vec/exprs/vectorized_fn_call.h"
 
+#include <fmt/compile.h>
 #include <fmt/format.h>
 #include <fmt/ranges.h> // IWYU pragma: keep
 #include <gen_cpp/Opcodes_types.h>
@@ -57,6 +58,7 @@
 #include "vec/functions/function_agg_state.h"
 #include "vec/functions/function_fake.h"
 #include "vec/functions/function_java_udf.h"
+#include "vec/functions/function_python_udf.h"
 #include "vec/functions/function_rpc.h"
 #include "vec/functions/simple_function_factory.h"
 #include "vec/utils/util.hpp"
@@ -115,6 +117,17 @@
                     "Java UDF is not enabled, you can change be config enable_java_support to true "
                     "and restart be.");
         }
+    } else if (_fn.binary_type == TFunctionBinaryType::PYTHON_UDF) {
+        if (config::enable_python_udf_support) {
+            _function = PythonFunctionCall::create(_fn, argument_template, _data_type);
+            LOG(INFO) << fmt::format(
+                    "create python function call: {}, runtime version: {}, function code: {}",
+                    _fn.name.function_name, _fn.runtime_version, _fn.function_code);
+        } else {
+            return Status::InternalError(
+                    "Python UDF is not enabled, you can change be config enable_python_udf_support "
+                    "to true and restart be.");
+        }
     } else if (_fn.binary_type == TFunctionBinaryType::AGG_STATE) {
         DataTypes argument_types;
         for (auto column : argument_template) {

diff --git a/be/src/vec/functions/function_python_udf.cpp b/be/src/vec/functions/function_python_udf.cpp
new file mode 100644
index 0000000..63c0c1d
--- /dev/null
+++ b/be/src/vec/functions/function_python_udf.cpp

@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_python_udf.h"
+
+#include <arrow/record_batch.h>
+#include <arrow/type_fwd.h>
+#include <fmt/core.h>
+#include <glog/logging.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <ctime>
+#include <memory>
+
+#include "common/status.h"
+#include "runtime/user_function_cache.h"
+#include "udf/python/python_udf_meta.h"
+#include "udf/python/python_udf_server.h"
+#include "util/arrow/block_convertor.h"
+#include "util/arrow/row_batch.h"
+#include "util/timezone_utils.h"
+#include "vec/core/block.h"
+#include "vec/exec/jni_connector.h"
+
+namespace doris::vectorized {
+
+PythonFunctionCall::PythonFunctionCall(const TFunction& fn, const DataTypes& argument_types,
+                                       const DataTypePtr& return_type)
+        : _fn(fn), _argument_types(argument_types), _return_type(return_type) {}
+
+Status PythonFunctionCall::open(FunctionContext* context,
+                                FunctionContext::FunctionStateScope scope) {
+    if (scope == FunctionContext::FunctionStateScope::FRAGMENT_LOCAL) {
+        LOG(INFO) << "Open python UDF fragment local";
+        return Status::OK();
+    }
+
+    PythonVersion version;
+    PythonUDFMeta func_meta;
+    func_meta._id = _fn.id;
+    func_meta._name = _fn.name.function_name;
+    func_meta._symbol = _fn.scalar_fn.symbol;
+    if (!_fn.function_code.empty()) {
+        func_meta._type = PythonUDFLoadType::INLINE;
+        func_meta._location = "inline";
+        func_meta._inline_code = _fn.function_code;
+    } else if (!_fn.hdfs_location.empty()) {
+        func_meta._type = PythonUDFLoadType::MODULE;
+        func_meta._location = _fn.hdfs_location;
+        func_meta._checksum = _fn.checksum;
+    } else {
+        func_meta._type = PythonUDFLoadType::UNKNOWN;
+        func_meta._location = "unknown";
+    }
+
+    func_meta._input_types = _argument_types;
+    func_meta._return_type = _return_type;
+
+    if (_fn.__isset.runtime_version && !_fn.runtime_version.empty()) {
+        RETURN_IF_ERROR(
+                PythonVersionManager::instance().get_version(_fn.runtime_version, &version));
+    } else {
+        return Status::InvalidArgument("Python UDF runtime version is not set");
+    }
+
+    func_meta._runtime_version = version.full_version;
+    RETURN_IF_ERROR(func_meta.check());
+    func_meta._always_nullable = _return_type->is_nullable();
+    LOG(INFO) << fmt::format("runtime_version: {}, func_meta: {}", version.to_string(),
+                             func_meta.to_string());
+
+    if (func_meta._type == PythonUDFLoadType::MODULE) {
+        RETURN_IF_ERROR(UserFunctionCache::instance()->get_pypath(
+                func_meta._id, func_meta._location, func_meta._checksum, &func_meta._location));
+    }
+
+    PythonUDFClientPtr client = nullptr;
+    RETURN_IF_ERROR(PythonUDFServerManager::instance().get_client(func_meta, version, &client));
+
+    if (!client) {
+        return Status::InternalError("Python UDF client is null");
+    }
+
+    context->set_function_state(FunctionContext::THREAD_LOCAL, client);
+    LOG(INFO) << fmt::format("Successfully get python UDF client, process: {}",
+                             client->print_process());
+    return Status::OK();
+}
+
+Status PythonFunctionCall::execute_impl(FunctionContext* context, Block& block,
+                                        const ColumnNumbers& arguments, uint32_t result,
+                                        size_t num_rows) const {
+    auto client = reinterpret_cast<PythonUDFClient*>(
+            context->get_function_state(FunctionContext::THREAD_LOCAL));
+    if (!client) {
+        LOG(WARNING) << "Python UDF client is null";
+        return Status::InternalError("Python UDF client is null");
+    }
+
+    int64_t input_rows = block.rows();
+    uint32_t input_columns = block.columns();
+    DCHECK(input_columns > 0 && result < input_columns &&
+           _argument_types.size() == arguments.size());
+    vectorized::Block input_block;
+    vectorized::Block output_block;
+
+    if (!_return_type->equals(*block.get_by_position(result).type)) {
+        return Status::InternalError(fmt::format("Python UDF output type {} not equal to {}",
+                                                 block.get_by_position(result).type->get_name(),
+                                                 _return_type->get_name()));
+    }
+
+    for (uint32_t i = 0; i < arguments.size(); ++i) {
+        if (!_argument_types[i]->equals(*block.get_by_position(arguments[i]).type)) {
+            return Status::InternalError(
+                    fmt::format("Python UDF input type {} not equal to {}",
+                                block.get_by_position(arguments[i]).type->get_name(),
+                                _argument_types[i]->get_name()));
+        }
+        input_block.insert(block.get_by_position(arguments[i]));
+    }
+
+    std::shared_ptr<arrow::Schema> schema;
+    RETURN_IF_ERROR(
+            get_arrow_schema_from_block(input_block, &schema, TimezoneUtils::default_time_zone));
+    std::shared_ptr<arrow::RecordBatch> input_batch;
+    std::shared_ptr<arrow::RecordBatch> output_batch;
+    cctz::time_zone _timezone_obj; // default UTC
+    RETURN_IF_ERROR(convert_to_arrow_batch(input_block, schema, arrow::default_memory_pool(),
+                                           &input_batch, _timezone_obj));
+    RETURN_IF_ERROR(client->evaluate(*input_batch, &output_batch));
+    int64_t output_rows = output_batch->num_rows();
+
+    if (output_batch->num_columns() != 1) {
+        return Status::InternalError(fmt::format("Python UDF output columns {} not equal to 1",
+                                                 output_batch->num_columns()));
+    }
+
+    if (input_rows != output_rows) {
+        return Status::InternalError(fmt::format(
+                "Python UDF output rows {} not equal to input rows {}", output_rows, input_rows));
+    }
+
+    RETURN_IF_ERROR(
+            convert_from_arrow_batch(output_batch, {_return_type}, &output_block, _timezone_obj));
+    DCHECK_EQ(output_block.columns(), 1);
+    block.replace_by_position(result, std::move(output_block.get_by_position(0).column));
+    return Status::OK();
+}
+
+Status PythonFunctionCall::close(FunctionContext* context,
+                                 FunctionContext::FunctionStateScope scope) {
+    auto client = reinterpret_cast<PythonUDFClient*>(
+            context->get_function_state(FunctionContext::THREAD_LOCAL));
+    if (!client) {
+        LOG(WARNING) << "Python UDF client is null";
+        return Status::InternalError("Python UDF client is null");
+    }
+    RETURN_IF_ERROR(client->close());
+    return Status::OK();
+}
+
+} // namespace doris::vectorized

diff --git a/be/src/vec/functions/function_python_udf.h b/be/src/vec/functions/function_python_udf.h
new file mode 100644
index 0000000..e13bf49
--- /dev/null
+++ b/be/src/vec/functions/function_python_udf.h

@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <gen_cpp/Types_types.h>
+
+#include <functional>
+#include <memory>
+
+#include "common/status.h"
+#include "udf/udf.h"
+#include "vec/core/block.h"
+#include "vec/core/column_numbers.h"
+#include "vec/core/columns_with_type_and_name.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+class PythonUDFPreparedFunction : public PreparedFunctionImpl {
+public:
+    using execute_call_back = std::function<Status(FunctionContext* context, Block& block,
+                                                   const ColumnNumbers& arguments, uint32_t result,
+                                                   size_t input_rows_count)>;
+
+    explicit PythonUDFPreparedFunction(const execute_call_back& func, const std::string& name)
+            : callback_function(func), name(name) {}
+
+    String get_name() const override { return name; }
+
+protected:
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const override {
+        return callback_function(context, block, arguments, result, input_rows_count);
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+private:
+    execute_call_back callback_function;
+    std::string name;
+};
+
+class PythonFunctionCall : public IFunctionBase {
+public:
+    PythonFunctionCall(const TFunction& fn, const DataTypes& argument_types,
+                       const DataTypePtr& return_type);
+
+    static FunctionBasePtr create(const TFunction& fn, const ColumnsWithTypeAndName& argument_types,
+                                  const DataTypePtr& return_type) {
+        DataTypes data_types(argument_types.size());
+        for (size_t i = 0; i < argument_types.size(); ++i) {
+            data_types[i] = argument_types[i].type;
+        }
+        return std::make_shared<PythonFunctionCall>(fn, data_types, return_type);
+    }
+
+    /// Get the main function name.
+    String get_name() const override { return _fn.name.function_name; }
+
+    const DataTypes& get_argument_types() const override { return _argument_types; }
+    const DataTypePtr& get_return_type() const override { return _return_type; }
+
+    PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block,
+                                const ColumnNumbers& arguments, uint32_t result) const override {
+        return std::make_shared<PythonUDFPreparedFunction>(
+                [this](auto&& PH1, auto&& PH2, auto&& PH3, auto&& PH4, auto&& PH5) {
+                    return PythonFunctionCall::execute_impl(
+                            std::forward<decltype(PH1)>(PH1), std::forward<decltype(PH2)>(PH2),
+                            std::forward<decltype(PH3)>(PH3), std::forward<decltype(PH4)>(PH4),
+                            std::forward<decltype(PH5)>(PH5));
+                },
+                _fn.name.function_name);
+    }
+
+    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        uint32_t result, size_t input_rows_count) const;
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
+
+    bool is_use_default_implementation_for_constants() const override { return true; }
+
+    bool is_udf_function() const override { return true; }
+
+private:
+    const TFunction& _fn;
+    const DataTypes _argument_types;
+    const DataTypePtr _return_type {nullptr};
+};
+
+} // namespace doris::vectorized

diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index 24886fa..a0600d6 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp

@@ -23,6 +23,7 @@
 #include <arrow/record_batch.h>
 #include <arrow/status.h>
 #include <arrow/type.h>
+#include <arrow/type_fwd.h>
 #include <arrow/util/decimal.h>
 #include <arrow/visit_type_inline.h>
 #include <arrow/visitor.h>
@@ -42,6 +43,7 @@
 #include <vector>
 
 #include "olap/hll.h"
+#include "runtime/define_primitive_type.h"
 #include "runtime/descriptors.cpp"
 #include "util/arrow/block_convertor.h"
 #include "util/arrow/row_batch.h"
@@ -77,8 +79,8 @@
 
 namespace doris::vectorized {
 
-void serialize_and_deserialize_arrow_test(std::vector<PrimitiveType> cols, int row_num,
-                                          bool is_nullable) {
+std::shared_ptr<Block> create_test_block(std::vector<PrimitiveType> cols, int row_num,
+                                         bool is_nullable) {
     auto block = std::make_shared<Block>();
     for (int i = 0; i < cols.size(); i++) {
         std::string col_name = std::to_string(i);
@@ -398,6 +400,12 @@
             LOG(FATAL) << "error column type";
         }
     }
+    return block;
+}
+
+void serialize_and_deserialize_arrow_test(std::vector<PrimitiveType> cols, int row_num,
+                                          bool is_nullable) {
+    std::shared_ptr<Block> block = create_test_block(cols, row_num, is_nullable);
     std::shared_ptr<arrow::RecordBatch> record_batch =
             CommonDataTypeSerdeTest::serialize_arrow(block);
     auto assert_block = std::make_shared<Block>(block->clone_empty());
@@ -405,6 +413,25 @@
     CommonDataTypeSerdeTest::compare_two_blocks(block, assert_block);
 }
 
+void block_converter_test(std::vector<PrimitiveType> cols, int row_num, bool is_nullable) {
+    std::shared_ptr<Block> source_block = create_test_block(cols, row_num, is_nullable);
+    std::shared_ptr<arrow::RecordBatch> record_batch;
+    std::shared_ptr<arrow::Schema> schema;
+    Status status = Status::OK();
+    status = get_arrow_schema_from_block(*source_block, &schema, TimezoneUtils::default_time_zone);
+    ASSERT_TRUE(status.ok() && schema);
+    cctz::time_zone default_timezone; //default UTC
+    status = convert_to_arrow_batch(*source_block, schema, arrow::default_memory_pool(),
+                                    &record_batch, default_timezone);
+    ASSERT_TRUE(status.ok() && record_batch);
+    auto target_block = std::make_shared<Block>(source_block->clone_empty());
+    DataTypes source_data_types = source_block->get_data_types();
+    status = convert_from_arrow_batch(record_batch, source_data_types, &*target_block,
+                                      default_timezone);
+    ASSERT_TRUE(status.ok() && target_block);
+    CommonDataTypeSerdeTest::compare_two_blocks(source_block, target_block);
+}
+
 TEST(DataTypeSerDeArrowTest, DataTypeScalaSerDeTest) {
     std::vector<PrimitiveType> cols = {
             TYPE_INT,        TYPE_INT,       TYPE_STRING, TYPE_DECIMAL128I, TYPE_BOOLEAN,
@@ -486,4 +513,14 @@
     CommonDataTypeSerdeTest::compare_two_blocks(block, assert_block);
 }
 
+TEST(DataTypeSerDeArrowTest, BlockConverterTest) {
+    std::vector<PrimitiveType> cols = {
+            TYPE_INT,        TYPE_INT,       TYPE_STRING, TYPE_DECIMAL128I, TYPE_BOOLEAN,
+            TYPE_DECIMAL32,  TYPE_DECIMAL64, TYPE_IPV4,   TYPE_IPV6,        TYPE_DATETIME,
+            TYPE_DATETIMEV2, TYPE_DATE,      TYPE_DATEV2,
+    };
+    block_converter_test(cols, 7, true);
+    block_converter_test(cols, 7, false);
+}
+
 } // namespace doris::vectorized

diff --git a/build.sh b/build.sh
index 048fa48..516fc2a 100755
--- a/build.sh
+++ b/build.sh

@@ -937,9 +937,11 @@
     mkdir -p "${DORIS_OUTPUT}/be/storage"
     mkdir -p "${DORIS_OUTPUT}/be/plugins/jdbc_drivers/"
     mkdir -p "${DORIS_OUTPUT}/be/plugins/java_udf/"
+    mkdir -p "${DORIS_OUTPUT}/be/plugins/python_udf/"
     mkdir -p "${DORIS_OUTPUT}/be/plugins/connectors/"
     mkdir -p "${DORIS_OUTPUT}/be/plugins/hadoop_conf/"
     mkdir -p "${DORIS_OUTPUT}/be/plugins/java_extensions/"
+    cp -r -p "${DORIS_HOME}/be/src/udf/python/python_udf_server.py" "${DORIS_OUTPUT}/be/plugins/python_udf/"
 fi
 
 if [[ "${BUILD_BROKER}" -eq 1 ]]; then

diff --git a/conf/be.conf b/conf/be.conf
index d7e815e..7e35191 100644
--- a/conf/be.conf
+++ b/conf/be.conf

@@ -94,4 +94,4 @@
 #    Error = 4
 azure_log_level = 4
 ## If you are not running in aws cloud, you can disable EC2 metadata
-AWS_EC2_METADATA_DISABLED=true
+AWS_EC2_METADATA_DISABLED=true
\ No newline at end of file

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index d1b95e4..798e800 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java

@@ -2832,6 +2832,14 @@
     public static boolean enable_udf_in_load = false;
 
     @ConfField(description = {
+        "开启python_udf, 默认为false。如果该配置为false，则禁止创建和使用python_udf。在一些场景下关闭该配置可防止命令注入攻击。",
+        "Used to enable python_udf, default is true. if this configuration is false, creation and use of python_udf is "
+            + "disabled. in some scenarios it may be necessary to disable this configuration to prevent "
+            + "command injection attacks."
+    })
+    public static boolean enable_python_udf = false;
+
+    @ConfField(description = {
             "是否忽略 Image 文件中未知的模块。如果为 true，不在 PersistMetaModules.MODULE_NAMES 中的元数据模块将被忽略并跳过。"
                     + "默认为 false，如果 Image 文件中包含未知的模块，Doris 将会抛出异常。"
                     + "该参数主要用于降级操作中，老版本可以兼容新版本的 Image 文件。",

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
index fa9f119..35cb4b0 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4

@@ -56,6 +56,7 @@
 RIGHT_BRACKET: ']';
 LEFT_BRACE: '{';
 RIGHT_BRACE: '}';
+DOLLAR_QUOTED_STRING: '$$' ( ~'$' | '$' ~'$' )* '$$';
 
 // TODO: add a doc to list reserved words
 

diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
index 8a485a9..c33d5d2 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4

@@ -215,7 +215,8 @@
             (TABLES | AGGREGATE)? FUNCTION (IF NOT EXISTS)?
             functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN
             RETURNS returnType=dataType (INTERMEDIATE intermediateType=dataType)?
-            properties=propertyClause?                                              #createUserDefineFunction
+            properties=propertyClause?
+            (AS functionCode=dollarQuotedString)?                                   #createUserDefineFunction
     | CREATE statementScope? ALIAS FUNCTION (IF NOT EXISTS)?
             functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN
             WITH PARAMETER LEFT_PAREN parameters=identifierSeq? RIGHT_PAREN
@@ -1867,6 +1868,10 @@
     | SUBTRACT? (EXPONENT_VALUE | DECIMAL_VALUE) #decimalLiteral
     ;
 
+dollarQuotedString
+    : DOLLAR_QUOTED_STRING
+    ;
+
 // there are 1 kinds of keywords in Doris.
 // - Non-reserved keywords:
 //     normal version of non-reserved keywords.

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
index 4d7d8f9..71a0a1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java

@@ -155,6 +155,10 @@
     protected boolean isStaticLoad = false;
     @SerializedName("eT")
     protected long expirationTime = 360; // default 6 hours;
+    @SerializedName("rv")
+    protected String runtimeVersion;
+    @SerializedName("fc")
+    protected String functionCode;
 
     // Only used for serialization
     protected Function() {
@@ -332,6 +336,22 @@
         isGlobal = global;
     }
 
+    public String getRuntimeVersion() {
+        return runtimeVersion;
+    }
+
+    public void setRuntimeVersion(String runtimeVersion) {
+        this.runtimeVersion = runtimeVersion;
+    }
+
+    public String getFunctionCode() {
+        return functionCode;
+    }
+
+    public void setFunctionCode(String functionCode) {
+        this.functionCode = functionCode;
+    }
+
     // TODO(cmy): Currently we judge whether it is UDF by wheter the 'location' is set.
     // Maybe we should use a separate variable to identify,
     // but additional variables need to modify the persistence information.

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionUtil.java
index fb6444e..edfc580 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionUtil.java

@@ -25,7 +25,9 @@
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdaf;
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdf;
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdtf;
+import org.apache.doris.nereids.trees.expressions.functions.udf.PythonUdf;
 import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.thrift.TFunctionBinaryType;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
@@ -182,7 +184,11 @@
                 if (function.isUDTFunction()) {
                     JavaUdtf.translateToNereidsFunction(dbName, ((ScalarFunction) function));
                 } else {
-                    JavaUdf.translateToNereidsFunction(dbName, ((ScalarFunction) function));
+                    if (function.getBinaryType() == TFunctionBinaryType.JAVA_UDF) {
+                        JavaUdf.translateToNereidsFunction(dbName, ((ScalarFunction) function));
+                    } else if (function.getBinaryType() == TFunctionBinaryType.PYTHON_UDF) {
+                        PythonUdf.translateToNereidsFunction(dbName, (ScalarFunction) function);
+                    }
                 }
             } else if (function instanceof AggregateFunction) {
                 JavaUdaf.translateToNereidsFunction(dbName, ((AggregateFunction) function));
@@ -213,4 +219,9 @@
         }
     }
 
+    public static void checkEnablePythonUdf() throws AnalysisException {
+        if (!Config.enable_python_udf) {
+            throw new AnalysisException("python_udf has been disabled.");
+        }
+    }
 }

diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java
index c0a3f05..cba3c83 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarFunction.java

@@ -25,6 +25,7 @@
 import org.apache.doris.thrift.TFunctionBinaryType;
 import org.apache.doris.thrift.TScalarFunction;
 
+import com.google.common.base.Strings;
 import com.google.common.collect.Maps;
 import com.google.gson.Gson;
 import com.google.gson.annotations.SerializedName;
@@ -253,11 +254,18 @@
     public TFunction toThrift(Type realReturnType, Type[] realArgTypes, Boolean[] realArgTypeNullables) {
         TFunction fn = super.toThrift(realReturnType, realArgTypes, realArgTypeNullables);
         fn.setScalarFn(new TScalarFunction());
-        if (getBinaryType() == TFunctionBinaryType.JAVA_UDF || getBinaryType() == TFunctionBinaryType.RPC) {
+        if (getBinaryType() == TFunctionBinaryType.JAVA_UDF || getBinaryType() == TFunctionBinaryType.RPC
+                || getBinaryType() == TFunctionBinaryType.PYTHON_UDF) {
             fn.getScalarFn().setSymbol(symbolName);
         } else {
             fn.getScalarFn().setSymbol("");
         }
+        if (getBinaryType() == TFunctionBinaryType.PYTHON_UDF) {
+            if (!Strings.isNullOrEmpty(functionCode)) {
+                fn.setFunctionCode(functionCode);
+            }
+            fn.setRuntimeVersion(runtimeVersion);
+        }
         if (dictFunction != null) {
             fn.setDictFunction(dictFunction);
         }

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
index a6cb178..a3a9cd4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java

@@ -102,6 +102,7 @@
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdaf;
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdf;
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdtf;
+import org.apache.doris.nereids.trees.expressions.functions.udf.PythonUdf;
 import org.apache.doris.nereids.trees.expressions.functions.window.WindowFunction;
 import org.apache.doris.nereids.trees.expressions.literal.Literal;
 import org.apache.doris.nereids.trees.expressions.literal.NullLiteral;
@@ -875,6 +876,16 @@
         return functionCallExpr;
     }
 
+    @Override
+    public Expr visitPythonUdf(PythonUdf udf, PlanTranslatorContext context) {
+        FunctionParams exprs = new FunctionParams(udf.children().stream()
+                .map(expression -> expression.accept(this, context))
+                .collect(Collectors.toList()));
+        FunctionCallExpr functionCallExpr = new FunctionCallExpr(udf.getCatalogFunction(), exprs);
+        functionCallExpr.setNullableFromNereids(udf.nullable());
+        return functionCallExpr;
+    }
+
     // TODO: Supports for `distinct`
     private Expr translateAggregateFunction(AggregateFunction function,
             List<Expression> currentPhaseArguments, List<Expr> aggFnArguments,

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
index 5c0a4ca..dc0bea1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java

@@ -5034,9 +5034,10 @@
         Map<String, String> properties = ctx.propertyClause() != null
                 ? Maps.newHashMap(visitPropertyClause(ctx.propertyClause()))
                 : Maps.newHashMap();
+        String functionCode = ctx.dollarQuotedString() != null ? ctx.dollarQuotedString().getText() : "";
         return new CreateFunctionCommand(statementScope, ifNotExists, isAggFunction, false, isTableFunction,
                 function, functionArgTypesInfo, returnType, intermediateType,
-                null, null, properties);
+                null, null, properties, functionCode);
     }
 
     @Override
@@ -5054,7 +5055,7 @@
         Expression originFunction = getExpression(ctx.expression());
         return new CreateFunctionCommand(statementScope, ifNotExists, false, true, false,
                 function, functionArgTypesInfo, VarcharType.MAX_VARCHAR_TYPE, null,
-                parameters, originFunction, null);
+                parameters, originFunction, null, null);
     }
 
     @Override

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdf.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdf.java
new file mode 100644
index 0000000..464d614
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdf.java

@@ -0,0 +1,186 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.udf;
+
+import org.apache.doris.analysis.FunctionName;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.Function;
+import org.apache.doris.catalog.Function.NullableMode;
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.util.URI;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.VirtualSlotReference;
+import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.Udf;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.ScalarFunction;
+import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.thrift.TFunctionBinaryType;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * Python UDF for Nereids
+ */
+public class PythonUdf extends ScalarFunction implements ExplicitlyCastableSignature, Udf {
+    private final String dbName;
+    private final long functionId;
+    private final TFunctionBinaryType binaryType;
+    private final FunctionSignature signature;
+    private final NullableMode nullableMode;
+    private final String objectFile;
+    private final String symbol;
+    private final String prepareFn;
+    private final String closeFn;
+    private final String checkSum;
+    private final boolean isStaticLoad;
+    private final long expirationTime;
+    private final String runtimeVersion;
+    private final String functionCode;
+
+    /**
+     * Constructor of UDF
+     */
+    public PythonUdf(String name, long functionId, String dbName, TFunctionBinaryType binaryType,
+                     FunctionSignature signature,
+                     NullableMode nullableMode, String objectFile, String symbol, String prepareFn, String closeFn,
+                     String checkSum, boolean isStaticLoad, long expirationTime,
+                     String runtimeVersion, String functionCode, Expression... args) {
+        super(name, args);
+        this.dbName = dbName;
+        this.functionId = functionId;
+        this.binaryType = binaryType;
+        this.signature = signature;
+        this.nullableMode = nullableMode;
+        this.objectFile = objectFile;
+        this.symbol = symbol;
+        this.prepareFn = prepareFn;
+        this.closeFn = closeFn;
+        this.checkSum = checkSum;
+        this.isStaticLoad = isStaticLoad;
+        this.expirationTime = expirationTime;
+        this.runtimeVersion = runtimeVersion;
+        this.functionCode = functionCode;
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return ImmutableList.of(signature);
+    }
+
+    @Override
+    public boolean hasVarArguments() {
+        return signature.hasVarArgs;
+    }
+
+    @Override
+    public int arity() {
+        return signature.argumentsTypes.size();
+    }
+
+    @Override
+    public NullableMode getNullableMode() {
+        return nullableMode;
+    }
+
+    /**
+     * withChildren.
+     */
+    @Override
+    public PythonUdf withChildren(List<Expression> children) {
+        Preconditions.checkArgument(children.size() == this.children.size());
+        return new PythonUdf(getName(), functionId, dbName, binaryType, signature, nullableMode,
+            objectFile, symbol, prepareFn, closeFn, checkSum, isStaticLoad, expirationTime,
+            runtimeVersion, functionCode, children.toArray(new Expression[0]));
+    }
+
+    /**
+     * translate catalog java udf to nereids java udf
+     */
+    public static void translateToNereidsFunction(String dbName, org.apache.doris.catalog.ScalarFunction scalar) {
+        String fnName = scalar.functionName();
+        DataType retType = DataType.fromCatalogType(scalar.getReturnType());
+        List<DataType> argTypes = Arrays.stream(scalar.getArgs())
+                .map(DataType::fromCatalogType)
+                .collect(Collectors.toList());
+
+        FunctionSignature.FuncSigBuilder sigBuilder = FunctionSignature.ret(retType);
+        FunctionSignature sig = scalar.hasVarArgs()
+                ? sigBuilder.varArgs(argTypes.toArray(new DataType[0]))
+                : sigBuilder.args(argTypes.toArray(new DataType[0]));
+
+        VirtualSlotReference[] virtualSlots = argTypes.stream()
+                .map(type -> new VirtualSlotReference(type.toString(), type, Optional.empty(),
+                        (shape) -> ImmutableList.of()))
+                .toArray(VirtualSlotReference[]::new);
+
+        PythonUdf udf = new PythonUdf(fnName, scalar.getId(), dbName, scalar.getBinaryType(), sig,
+                scalar.getNullableMode(),
+                scalar.getLocation() == null ? null : scalar.getLocation().getLocation(),
+                scalar.getSymbolName(),
+                scalar.getPrepareFnSymbol(),
+                scalar.getCloseFnSymbol(),
+                scalar.getChecksum(), scalar.isStaticLoad(), scalar.getExpirationTime(),
+                scalar.getRuntimeVersion(),
+                scalar.getFunctionCode(),
+                virtualSlots);
+
+        PythonUdfBuilder builder = new PythonUdfBuilder(udf);
+        Env.getCurrentEnv().getFunctionRegistry().addUdf(dbName, fnName, builder);
+    }
+
+    @Override
+    public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
+        return visitor.visitPythonUdf(this, context);
+    }
+
+    @Override
+    public Function getCatalogFunction() {
+        try {
+            org.apache.doris.catalog.ScalarFunction expr = org.apache.doris.catalog.ScalarFunction.createUdf(
+                    binaryType,
+                    new FunctionName(dbName, getName()),
+                    signature.argumentsTypes.stream().map(DataType::toCatalogDataType).toArray(Type[]::new),
+                    signature.returnType.toCatalogDataType(),
+                    signature.hasVarArgs,
+                    objectFile == null ? null : URI.create(objectFile),
+                    symbol,
+                    prepareFn,
+                    closeFn
+            );
+            expr.setNullableMode(nullableMode);
+            expr.setChecksum(checkSum);
+            expr.setId(functionId);
+            expr.setStaticLoad(isStaticLoad);
+            expr.setExpirationTime(expirationTime);
+            expr.setRuntimeVersion(runtimeVersion);
+            expr.setFunctionCode(functionCode);
+            return expr;
+        } catch (Exception e) {
+            throw new AnalysisException(e.getMessage(), e.getCause());
+        }
+    }
+}

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdfBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdfBuilder.java
new file mode 100644
index 0000000..7185594
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/udf/PythonUdfBuilder.java

@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.udf;
+
+import org.apache.doris.catalog.FunctionSignature;
+import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.ReflectionUtils;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.functions.BoundFunction;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.util.TypeCoercionUtils;
+
+import com.google.common.base.Suppliers;
+import com.google.common.collect.Lists;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ * function builder for python udf
+ */
+public class PythonUdfBuilder extends UdfBuilder {
+    private final PythonUdf udf;
+    private final int arity;
+    private final boolean isVarArgs;
+
+    public PythonUdfBuilder(PythonUdf udf) {
+        this.udf = udf;
+        this.isVarArgs = udf.hasVarArguments();
+        this.arity = udf.arity();
+    }
+
+    @Override
+    public List<DataType> getArgTypes() {
+        return Suppliers.memoize(() -> udf.getSignatures().get(0).argumentsTypes.stream()
+                .map(DataType.class::cast)
+                .collect(Collectors.toList())).get();
+    }
+
+    @Override
+    public List<FunctionSignature> getSignatures() {
+        return udf.getSignatures();
+    }
+
+    @Override
+    public Class<? extends BoundFunction> functionClass() {
+        return JavaUdf.class;
+    }
+
+    @Override
+    public boolean canApply(List<?> arguments) {
+        if ((isVarArgs && arity > arguments.size() + 1) || (!isVarArgs && arguments.size() != arity)) {
+            return false;
+        }
+        for (Object argument : arguments) {
+            if (!(argument instanceof Expression)) {
+                Optional<Class> primitiveType = ReflectionUtils.getPrimitiveType(argument.getClass());
+                if (!primitiveType.isPresent() || !Expression.class.isAssignableFrom(primitiveType.get())) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+
+    @Override
+    public Pair<PythonUdf, PythonUdf> build(String name, List<?> arguments) {
+        List<Expression> exprs = arguments.stream().map(Expression.class::cast).collect(Collectors.toList());
+        List<DataType> argTypes = udf.getSignatures().get(0).argumentsTypes;
+
+        List<Expression> processedExprs = Lists.newArrayList();
+        for (int i = 0; i < exprs.size(); ++i) {
+            processedExprs.add(TypeCoercionUtils.castIfNotSameType(exprs.get(i), argTypes.get(i)));
+        }
+        return Pair.ofSame(udf.withChildren(processedExprs));
+    }
+
+    @Override
+    public String parameterDisplayString() {
+        StringBuilder string = new StringBuilder("(");
+        for (int i = 0; i < udf.getArgumentsTypes().size(); ++i) {
+            if (i > 0) {
+                string.append(", ");
+            }
+            string.append(udf.getArgumentsTypes().get(i));
+            if (isVarArgs && i + 1 == udf.getArgumentsTypes().size()) {
+                string.append("...");
+            }
+        }
+        return string.append(")").toString();
+    }
+}

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
index 630fcbe..8285cc9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java

@@ -531,6 +531,7 @@
 import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub;
 import org.apache.doris.nereids.trees.expressions.functions.udf.AliasUdf;
 import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdf;
+import org.apache.doris.nereids.trees.expressions.functions.udf.PythonUdf;
 
 /**
  * ScalarFunctionVisitor.
@@ -2460,6 +2461,10 @@
         return visitScalarFunction(javaUdf, context);
     }
 
+    default R visitPythonUdf(PythonUdf pythonUdf, C context) {
+        return visitScalarFunction(pythonUdf, context);
+    }
+
     default R visitAliasUdf(AliasUdf aliasUdf, C context) {
         return visitScalarFunction(aliasUdf, context);
     }

diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
index 317d004..f06a72e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java

@@ -91,6 +91,8 @@
 import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.collections.map.CaseInsensitiveMap;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -109,6 +111,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 /**
@@ -143,6 +146,10 @@
     // iff is static load, BE will be cache the udf class load, so only need load once
     public static final String IS_STATIC_LOAD = "static_load";
     public static final String EXPIRATION_TIME = "expiration_time";
+    public static final String RUNTIME_VERSION = "runtime_version";
+
+    private static final Pattern PYTHON_VERSION_PATTERN = Pattern.compile("^3\\.\\d{1,2}(?:\\.\\d{1,2})?$");
+    private static final Logger LOG = LogManager.getLogger(CreateFunctionCommand.class);
 
     // timeout for both connection and read. 10 seconds is long enough.
     private static final int HTTP_TIMEOUT_MS = 10000;
@@ -170,14 +177,16 @@
     // if not, will core dump when input is not null column, but need return null
     // like https://github.com/apache/doris/pull/14002/files
     private NullableMode returnNullMode = NullableMode.ALWAYS_NULLABLE;
+    private String runtimeVersion;
+    private String functionCode;
 
     /**
      * CreateFunctionCommand
      */
     public CreateFunctionCommand(SetType setType, boolean ifNotExists, boolean isAggregate, boolean isAlias,
-            boolean isTableFunction, FunctionName functionName, FunctionArgTypesInfo argsDef,
-            DataType returnType, DataType intermediateType, List<String> parameters,
-            Expression originFunction, Map<String, String> properties) {
+                                 boolean isTableFunction, FunctionName functionName, FunctionArgTypesInfo argsDef,
+                                 DataType returnType, DataType intermediateType, List<String> parameters,
+                                 Expression originFunction, Map<String, String> properties, String functionCode) {
         super(PlanType.CREATE_FUNCTION_COMMAND);
         this.setType = setType;
         this.ifNotExists = ifNotExists;
@@ -199,6 +208,7 @@
         } else {
             this.properties = ImmutableSortedMap.copyOf(properties, String.CASE_INSENSITIVE_ORDER);
         }
+        this.functionCode = functionCode;
     }
 
     @Override
@@ -336,22 +346,48 @@
             if (staticLoad != null && staticLoad) {
                 isStaticLoad = true;
             }
-            String expirationTimeString = properties.get(EXPIRATION_TIME);
-            if (expirationTimeString != null) {
-                long timeMinutes = 0;
-                try {
-                    timeMinutes = Long.parseLong(expirationTimeString);
-                } catch (NumberFormatException e) {
-                    throw new AnalysisException(e.getMessage());
-                }
-                if (timeMinutes <= 0) {
-                    throw new AnalysisException("expirationTime should greater than zero: ");
-                }
-                this.expirationTime = timeMinutes;
+            extractExpirationTime();
+        } else if (binaryType == TFunctionBinaryType.PYTHON_UDF) {
+            FunctionUtil.checkEnablePythonUdf();
+
+            // always_nullable the default value is true, equal null means true
+            Boolean isReturnNull = parseBooleanFromProperties(IS_RETURN_NULL);
+            if (isReturnNull != null && !isReturnNull) {
+                returnNullMode = NullableMode.ALWAYS_NOT_NULLABLE;
             }
+            extractExpirationTime();
+            String runtimeVersionString = properties.get(RUNTIME_VERSION);
+            if (runtimeVersionString == null) {
+                throw new AnalysisException("Python runtime version is not set");
+            } else if (!validatePythonRuntimeVersion(runtimeVersionString)) {
+                throw new AnalysisException(
+                    String.format("Invalid Python runtime version: '%s'. Expected format:"
+                        + "'3.X.X' or '3.XX.XX' (e.g. '3.10.2').", runtimeVersionString));
+            }
+            runtimeVersion = runtimeVersionString;
         }
     }
 
+    private void extractExpirationTime() throws AnalysisException {
+        String expirationTimeString = properties.get(EXPIRATION_TIME);
+        if (expirationTimeString != null) {
+            long timeMinutes = 0;
+            try {
+                timeMinutes = Long.parseLong(expirationTimeString);
+            } catch (NumberFormatException e) {
+                throw new AnalysisException(e.getMessage());
+            }
+            if (timeMinutes <= 0) {
+                throw new AnalysisException("expirationTime should greater than zero: ");
+            }
+            this.expirationTime = timeMinutes;
+        }
+    }
+
+    private static boolean validatePythonRuntimeVersion(String runtimeVersionString) {
+        return runtimeVersionString != null && PYTHON_VERSION_PATTERN.matcher(runtimeVersionString).matches();
+    }
+
     private Boolean parseBooleanFromProperties(String propertyString) throws AnalysisException {
         String valueOfString = properties.get(propertyString);
         if (valueOfString == null) {
@@ -519,6 +555,8 @@
             checkRPCUdf(symbol);
         } else if (binaryType == TFunctionBinaryType.JAVA_UDF) {
             analyzeJavaUdf(symbol);
+        } else if (binaryType == TFunctionBinaryType.PYTHON_UDF) {
+            analyzePythonUdf(symbol);
         }
         URI location;
         if (!Strings.isNullOrEmpty(originalUserFile)) {
@@ -534,6 +572,8 @@
         function.setNullableMode(returnNullMode);
         function.setStaticLoad(isStaticLoad);
         function.setExpirationTime(expirationTime);
+        function.setRuntimeVersion(runtimeVersion);
+        function.setFunctionCode(functionCode);
     }
 
     private void analyzeJavaUdaf(String clazz) throws AnalysisException {
@@ -714,6 +754,26 @@
         }
     }
 
+    private void analyzePythonUdf(String clazz) throws AnalysisException {
+        if (Strings.isNullOrEmpty(clazz)) {
+            throw new AnalysisException("No symbol class name provided for Python UDF");
+        }
+
+        if (Strings.isNullOrEmpty(this.functionCode)) {
+            return;
+        }
+
+        this.functionCode = this.functionCode.trim();
+        if (!(this.functionCode.startsWith("$$") && this.functionCode.endsWith("$$"))) {
+            throw new AnalysisException("Inline Python UDF code must be start with $$ and end with $$");
+        }
+
+        this.functionCode = this.functionCode.substring(2, this.functionCode.length() - 2);
+        if (this.functionCode.isEmpty()) {
+            throw new AnalysisException("Inline Python UDF is empty");
+        }
+    }
+
     private void checkUdfClass(String clazz, ClassLoader cl) throws ClassNotFoundException, AnalysisException {
         Class udfClass = cl.loadClass(clazz);
         List<Method> evalList = Arrays.stream(udfClass.getMethods())

diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 34d2220..1594ff9 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift

@@ -331,7 +331,9 @@
 
   JAVA_UDF = 5,
 
-  AGG_STATE = 6
+  AGG_STATE = 6,
+
+  PYTHON_UDF = 7
 }
 
 // Represents a fully qualified function name.
@@ -407,6 +409,8 @@
   15: optional bool is_static_load = false
   16: optional i64 expiration_time //minutes
   17: optional TDictFunction dict_function
+  18: optional string runtime_version
+  19: optional string function_code
 }
 
 enum TJdbcOperation {

diff --git a/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertequal.out b/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertequal.out
new file mode 100644
index 0000000..3376296
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertequal.out

@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+23.34 == 23.34
+

diff --git a/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertlessthan.out b/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertlessthan.out
new file mode 100644
index 0000000..41cb521
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/sanity/test_pythonudf_assertlessthan.out

@@ -0,0 +1,5 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+0.123 < 0.124
+23.34 < 23.35
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_aggregate.out b/regression-test/data/pythonudf_p0/test_pythonudf_aggregate.out
new file mode 100644
index 0000000..ac41889
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_aggregate.out

@@ -0,0 +1,38 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_grades --
+1	Alice	English	88	B
+1	Alice	Math	95	A
+1	Alice	Science	92	A
+2	Bob	English	85	B
+2	Bob	Math	78	C
+2	Bob	Science	80	B
+3	Charlie	English	70	C
+3	Charlie	Math	65	D
+3	Charlie	Science	68	D
+4	David	English	60	D
+4	David	Math	55	F
+4	David	Science	58	F
+
+-- !select_group_by_grade --
+A	2	93.5
+B	3	84.33333333333333
+C	2	74
+D	3	64.33333333333333
+F	2	56.5
+
+-- !select_aggregate_with_udf --
+1	Alice	91.66666666666667	A
+2	Bob	81	B
+3	Charlie	67.66666666666667	D
+4	David	57.66666666666666	F
+
+-- !select_age_group_aggregate --
+Adult	2	85000	90000	80000
+Minor	1	0	0	0
+Senior	2	105000	110000	100000
+Young Adult	3	51666.66666666666	60000	45000
+
+-- !select_having_with_udf --
+1	Alice	91.66666666666667
+2	Bob	81
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_always_nullable.out b/regression-test/data/pythonudf_p0/test_pythonudf_always_nullable.out
new file mode 100644
index 0000000..055bbad
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_always_nullable.out

@@ -0,0 +1,42 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_nullable_true_normal --
+20
+
+-- !select_nullable_true_null --
+\N
+
+-- !select_nullable_true_negative --
+\N
+
+-- !select_nullable_false_normal --
+20
+
+-- !select_nullable_false_null --
+0
+
+-- !select_nullable_false_returns_none_normal --
+20
+
+-- !select_table_nullable_true --
+1	10	20
+2	\N	\N
+3	-5	\N
+4	0	0
+5	100	200
+
+-- !select_table_nullable_false --
+1	10	20
+2	\N	0
+3	-5	-10
+4	0	0
+5	100	200
+
+-- !select_string_nullable --
+HELLO
+
+-- !select_string_nullable_null --
+\N
+
+-- !select_string_nullable_empty --
+\N
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_array.out b/regression-test/data/pythonudf_p0/test_pythonudf_array.out
new file mode 100644
index 0000000..eda9275
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_array.out

@@ -0,0 +1,109 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1	2	a1b
+2	4	a2b
+3	6	a3b
+4	8	a4b
+5	10	a5b
+6	12	a6b
+7	14	a7b
+8	16	a8b
+9	18	a9b
+10	20	a10b
+
+-- !select_1 --
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+
+-- !select_2 --
+\N
+
+-- !select_3 --
+[1]	2
+[2]	4
+[3]	6
+[4]	8
+[5]	10
+[6]	12
+[7]	14
+[8]	16
+[9]	18
+[10]	20
+
+-- !select_4 --
+[2]	2
+[4]	4
+[6]	6
+[8]	8
+[10]	10
+[12]	12
+[14]	14
+[16]	16
+[18]	18
+[20]	20
+
+-- !select_5 --
+\N
+
+-- !select_6 --
+["a1b"]	2
+["a2b"]	4
+["a3b"]	6
+["a4b"]	8
+["a5b"]	10
+["a6b"]	12
+["a7b"]	14
+["a8b"]	16
+["a9b"]	18
+["a10b"]	20
+
+-- !select_7 --
+["a1b1"]	2
+["a2b2"]	4
+["a3b3"]	6
+["a4b4"]	8
+["a5b5"]	10
+["a6b6"]	12
+["a7b7"]	14
+["a8b8"]	16
+["a9b9"]	18
+["a10b10"]	20
+
+-- !select_8 --
+\N
+
+-- !select_9 --
+a1b	2
+a2b	4
+a3b	6
+a4b	8
+a5b	10
+a6b	12
+a7b	14
+a8b	16
+a9b	18
+a10b	20
+
+-- !select_10 --
+a1b1	2
+a2b2	4
+a3b3	6
+a4b4	8
+a5b5	10
+a6b6	12
+a7b7	14
+a8b8	16
+a9b9	18
+a10b10	20
+
+-- !select_11 --
+\N
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_base_data_type.out b/regression-test/data/pythonudf_p0/test_pythonudf_base_data_type.out
new file mode 100644
index 0000000..781ea31
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_base_data_type.out

@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+True,127,32767,2147483647,9223372036854775807,170141183460469231731687303715884105727,1.2300000190734863,4.56789,123456.780000000,12345678901.230000000,123456789012345678901.234567890,2023-01-01,2023-01-01 20:34:56+08:00,char_data_1,varchar_data_1,string_data_1
+False,-128,-32768,-2147483648,-9223372036854775808,-170141183460469231731687303715884105728,-2.3399999141693115,-5.6789,-987654.320000000,-98765432.110000000,-987654321098765432.109876540,2024-05-15,2024-05-15 16:22:10+08:00,char_data_2,varchar_data_2,string_data_2
+True,0,0,0,0,0,0.0,0.0,0E-9,0E-9,0E-9,2025-10-15,2025-10-15 08:00:00+08:00,char_zero,varchar_zero,string_zero
+False,100,20000,300000000,4000000000000000000,99999999999999999999999999999999999999,3.140000104904175,2.71828,999999.990000000,99999999999999.990000000,100000000000000000000000.000000000,2022-12-31,2023-01-01 07:59:59+08:00,char_max,varchar_max,string_max
+True,-50,-10000,-100000000,-5000000000000000000,-99999999999999999999999999999999999999,-1.409999966621399,-0.57721,-0.010000000,-0.010000000,0E-9,2021-07-04,2021-07-04 22:30:00+08:00,char_neg,varchar_neg,string_neg
+
+-- !select_2 --
+True,127,32767,2147483647,9223372036854775807,170141183460469231731687303715884105727,1.2300000190734863,4.56789,123456.780000000,12345678901.230000000,123456789012345678901.234567890,2023-01-01,2023-01-01 20:34:56+08:00,char_data_1,varchar_data_1,string_data_1
+False,-128,-32768,-2147483648,-9223372036854775808,-170141183460469231731687303715884105728,-2.3399999141693115,-5.6789,-987654.320000000,-98765432.110000000,-987654321098765432.109876540,2024-05-15,2024-05-15 16:22:10+08:00,char_data_2,varchar_data_2,string_data_2
+True,0,0,0,0,0,0.0,0.0,0E-9,0E-9,0E-9,2025-10-15,2025-10-15 08:00:00+08:00,char_zero,varchar_zero,string_zero
+False,100,20000,300000000,4000000000000000000,99999999999999999999999999999999999999,3.140000104904175,2.71828,999999.990000000,99999999999999.990000000,100000000000000000000000.000000000,2022-12-31,2023-01-01 07:59:59+08:00,char_max,varchar_max,string_max
+True,-50,-10000,-100000000,-5000000000000000000,-99999999999999999999999999999999999999,-1.409999966621399,-0.57721,-0.010000000,-0.010000000,0E-9,2021-07-04,2021-07-04 22:30:00+08:00,char_neg,varchar_neg,string_neg
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_boolean.out b/regression-test/data/pythonudf_p0/test_pythonudf_boolean.out
new file mode 100644
index 0000000..e43b5026
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_boolean.out

@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+111	true
+112	false
+113	false
+114	true
+
+-- !select --
+false
+
+-- !select --
+true
+
+-- !select --
+false
+
+-- !select --
+true
+
+-- !select --
+true
+
+-- !select --
+111	false
+112	true
+113	true
+114	false
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_complex_data_type.out b/regression-test/data/pythonudf_p0/test_pythonudf_complex_data_type.out
new file mode 100644
index 0000000..3f3d821
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_complex_data_type.out

@@ -0,0 +1,37 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+[1,2,3]|[a,b,c]|[[1,2],[3,4]]
+[]|[]|[]
+NULL|[x,NULL,z]|NULL
+[0,-1,2147483647]|[hello,world]|[[],[1]]
+
+-- !select_2 --
+[1,2,3]|[a,b,c]|[[1,2],[3,4]]
+[]|[]|[]
+NULL|[x,NULL,z]|NULL
+[0,-1,2147483647]|[hello,world]|[[],[1]]
+
+-- !select_3 --
+{1:one,2:two}|{e:2.718,pi:3.14}
+{}|{}
+NULL|{null_key:NULL}
+{-1:minus_one,0:zero}|{max:1.79769e+308}
+
+-- !select_4 --
+{1:one,2:two}|{e:2.718,pi:3.14}
+{}|{}
+NULL|{null_key:NULL}
+{-1:minus_one,0:zero}|{max:1.79769e+308}
+
+-- !select_5 --
+(Alice,30,75000.50)|(1.5,2.5,[red,blue])
+(NULL,NULL,NULL)|(0.0,0.0,[])
+(Bob,25,60000.00)|(NULL,3.14,[tag1,NULL,tag3])
+(,0,0.00)|(-1.0,-2.0,NULL)
+
+-- !select_6 --
+(Alice,30,75000.50)|(1.5,2.5,[red,blue])
+(NULL,NULL,NULL)|(0.0,0.0,[])
+(Bob,25,60000.00)|(NULL,3.14,[tag1,NULL,tag3])
+(,0,0.00)|(-1.0,-2.0,NULL)
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_data_types.out b/regression-test/data/pythonudf_p0/test_pythonudf_data_types.out
new file mode 100644
index 0000000..a79b499
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_data_types.out

@@ -0,0 +1,24 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_tinyint --
+11
+
+-- !select_smallint --
+2000
+
+-- !select_bigint --
+1000001000000
+
+-- !select_decimal --
+\N
+
+-- !select_date --
+2024-01-15
+
+-- !select_datetime --
+2024-01-15 18:30:45+08:00
+
+-- !select_table_types --
+1	11	200	1010000
+2	21	400	1020000
+3	\N	\N	\N
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_error_handling.out b/regression-test/data/pythonudf_p0/test_pythonudf_error_handling.out
new file mode 100644
index 0000000..2dc0c07
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_error_handling.out

@@ -0,0 +1,50 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_divide_normal --
+5
+
+-- !select_divide_zero --
+\N
+
+-- !select_divide_null --
+\N
+
+-- !select_substring_valid --
+e
+
+-- !select_substring_invalid --
+\N
+
+-- !select_substring_negative --
+\N
+
+-- !select_parse_valid --
+123
+
+-- !select_parse_invalid --
+\N
+
+-- !select_parse_empty --
+\N
+
+-- !select_array_valid --
+20
+
+-- !select_array_invalid --
+\N
+
+-- !select_table_error_handling --
+1	100	10	10	123	123
+2	50	0	\N	abc	\N
+3	\N	5	\N		\N
+4	75	\N	\N	456	456
+5	25	5	5	xyz	\N
+
+-- !select_length_normal --
+5
+
+-- !select_length_empty --
+0
+
+-- !select_length_null --
+\N
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_file_protocol.out b/regression-test/data/pythonudf_p0/test_pythonudf_file_protocol.out
new file mode 100644
index 0000000..8d1d3a5
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_file_protocol.out

@@ -0,0 +1,22 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_file_int --
+100
+
+-- !select_file_string --
+123****890
+
+-- !select_file_float --
+\N
+
+-- !select_file_bool_true --
+false
+
+-- !select_file_bool_false --
+true
+
+-- !select_table_file --
+1	10	11	hello	h***o
+2	20	21	world	w***d
+3	30	31	python	p****n
+4	40	41	doris	d***s
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_float.out b/regression-test/data/pythonudf_p0/test_pythonudf_float.out
new file mode 100644
index 0000000..6baa525
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_float.out

@@ -0,0 +1,45 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+111	11111.11	222222.3	12345678.34455677	1111112
+112	1234556.0	222222.3	222222222.3333333	4444444444444.556
+113	8.765432E7	\N	6666666666.666667	\N
+
+-- !select --
+-108.2747
+
+-- !select --
+-108.2747
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+111	-211111.2
+112	1012334.0
+113	\N
+
+-- !select --
+111	-211111.2
+112	1012334.0
+113	\N
+
+-- !select --
+113.9475611
+
+-- !select --
+113.9475611
+
+-- !select --
+\N
+
+-- !select --
+\N
+
+-- !select --
+111	24691356.68911354
+112	444444444.6666667
+113	13333333333.33333
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_global_function.out b/regression-test/data/pythonudf_p0/test_pythonudf_global_function.out
new file mode 100644
index 0000000..ad3d84a
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_global_function.out

@@ -0,0 +1,23 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_global_multiply --
+56
+
+-- !select_global_lower --
+hello world
+
+-- !select_local_add --
+40
+
+-- !select_table_global --
+1	5	6	30	APPLE	apple
+2	10	20	200	BANANA	banana
+3	3	7	21	CHERRY	cherry
+4	\N	5	\N	DATE	date
+5	8	9	72	\N	\N
+
+-- !select_global_power --
+8
+
+-- !select_global_power_decimal --
+2.23606797749979
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_inline_complex.out b/regression-test/data/pythonudf_p0/test_pythonudf_inline_complex.out
new file mode 100644
index 0000000..22015af
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_inline_complex.out

@@ -0,0 +1,19 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_array_sum --
+15
+
+-- !select_reverse --
+olleH
+
+-- !select_weighted_avg --
+84
+
+-- !select_format_name --
+DOE, John
+
+-- !select_in_range_true --
+true
+
+-- !select_in_range_false --
+false
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_inline_scalar.out b/regression-test/data/pythonudf_p0/test_pythonudf_inline_scalar.out
new file mode 100644
index 0000000..9632ff9
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_inline_scalar.out

@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_add --
+30
+
+-- !select_add_null --
+\N
+
+-- !select_concat --
+Hello World
+
+-- !select_concat_null --
+\N
+
+-- !select_square --
+25
+
+-- !select_square_negative --
+9
+
+-- !select_positive --
+true
+
+-- !select_negative --
+false
+
+-- !select_zero --
+false
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_inline_vector.out b/regression-test/data/pythonudf_p0/test_pythonudf_inline_vector.out
new file mode 100644
index 0000000..de95543
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_inline_vector.out

@@ -0,0 +1,85 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !vec_add_int --
+1	10	20	31
+2	30	40	71
+3	\N	50	\N
+4	60	\N	\N
+5	70	80	151
+
+-- !vec_multiply_double --
+1	1.5	2.5	3.75
+2	3.5	4.5	15.75
+3	5.5	\N	\N
+4	\N	6.5	\N
+5	7.5	8.5	63.75
+
+-- !vec_concat_string --
+1	hello	world	hello_world
+2	foo	bar	foo_bar
+3	\N	test	\N
+4	data	\N	\N
+5	python	udf	python_udf
+
+-- !vec_max_int --
+1	10	20	20
+2	30	40	40
+3	\N	50	\N
+4	60	\N	\N
+5	70	80	80
+
+-- !vec_sqrt_double --
+1	1.5	1.224744871391589
+2	3.5	1.870828693386971
+3	5.5	2.345207879911715
+4	\N	\N
+5	7.5	2.738612787525831
+
+-- !vec_upper_string --
+1	hello	HELLO
+2	foo	FOO
+3	\N	\N
+4	data	DATA
+5	python	PYTHON
+
+-- !vec_weighted_sum --
+1	10	20	17
+2	30	40	37
+3	\N	50	\N
+4	60	\N	\N
+5	70	80	77
+
+-- !vec_not_bool --
+1	true	false
+2	false	true
+3	true	false
+4	false	true
+5	true	false
+
+-- !vec_greater_than --
+1	10	20	false
+2	30	40	false
+3	\N	50	false
+4	60	\N	false
+5	70	80	false
+
+-- !vec_string_length --
+1	hello	5
+2	foo	3
+3	\N	\N
+4	data	4
+5	python	6
+
+-- !vec_fill_null_int --
+1	10	10
+2	30	30
+3	\N	0
+4	60	60
+5	70	70
+
+-- !vec_cumsum_int --
+1	10	10
+2	30	40
+3	\N	\N
+4	60	100
+5	70	170
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_int.out b/regression-test/data/pythonudf_p0/test_pythonudf_int.out
new file mode 100644
index 0000000..cd3a1de
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_int.out

@@ -0,0 +1,112 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1	2	3	4
+2	4	6	8
+3	6	9	12
+4	8	12	16
+5	10	15	20
+6	12	18	24
+7	14	21	28
+8	16	24	32
+9	18	27	36
+10	20	30	40
+
+-- !select --
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+
+-- !select --
+\N
+
+-- !select --
+3
+5
+7
+9
+11
+13
+15
+17
+19
+21
+
+-- !select --
+\N
+
+-- !select --
+4
+7
+10
+13
+16
+19
+22
+25
+28
+31
+
+-- !select --
+\N
+
+-- !select --
+5
+9
+13
+17
+21
+25
+29
+33
+37
+41
+
+-- !select --
+\N
+
+-- !select_global_1 --
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+
+-- !select_global_2 --
+\N
+
+-- !select_global_3 --
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+
+-- !select_global_4 --
+4
+4
+4
+4
+4
+4
+4
+4
+4
+4
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_map.out b/regression-test/data/pythonudf_p0/test_pythonudf_map.out
new file mode 100644
index 0000000..7c7cf58
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_map.out

@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+{1:1, 10:1, 100:1}	111
+{2:1, 20:1, 200:1, 2000:1}	2222
+{3:1}	3
+
+-- !select_2 --
+{"114":"514", "1919":"810"}	1145141919810
+{"a":"bc", "def":"g", "hij":"k"}	abcdefghijk
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_mixed_params.out b/regression-test/data/pythonudf_p0/test_pythonudf_mixed_params.out
new file mode 100644
index 0000000..45d661e
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_mixed_params.out

@@ -0,0 +1,77 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+1	100	150
+2	200	300
+3	150	225
+4	300	450
+5	250	375
+6	180	270
+7	220	330
+8	120	180
+9	280	420
+10	350	525
+
+-- !select_2 --
+1	100	5	550
+2	200	3	660
+3	150	8	1320
+4	300	2	660
+5	250	6	1650
+
+-- !select_3 --
+1	100	0.1	90
+2	200	0.15	170
+3	150	0.2	120
+4	300	0.05	285
+5	250	0.12	220
+
+-- !select_4 --
+1	100	5	0.1	460
+2	200	3	0.15	520
+3	150	8	0.2	970
+4	300	2	0.05	580
+5	250	6	0.12	1330
+
+-- !select_5 --
+1	A	CAT_A
+2	B	CAT_B
+3	A	CAT_A
+4	C	CAT_C
+5	B	CAT_B
+
+-- !select_6 --
+1	5	15
+2	3	13
+3	8	18
+4	2	12
+5	6	16
+
+-- !select_7 --
+1	100	0.1	100
+2	200	0.15	170
+3	150	0.2	150
+4	300	0.05	285
+5	250	0.12	220
+6	180	0.18	180
+7	220	0.08	202.4
+8	120	0.25	120
+9	280	0.1	252
+10	350	0.15	297.5
+
+-- !select_8 --
+1	100	5	600
+2	200	3	720
+3	150	8	1440
+
+-- !select_9 --
+1	100	5	120
+2	200	3	200
+3	150	8	180
+4	300	2	300
+5	250	6	300
+
+-- !select_10 --
+1	100	109.5
+2	200	214
+3	150	161.75
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_module.out b/regression-test/data/pythonudf_p0/test_pythonudf_module.out
new file mode 100644
index 0000000..a1a8c21
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_module.out

@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select --
+1001	5	10	500	62.19368581839511
+1002	40	1	20	2.679441541679836
+1003	15	5	300	40.4622349294233
+1004	-1	3	100	\N
+1005	\N	2	200	\N
+1006	7	\N	150	\N
+1007	30	0	\N	\N
+1008	0	100	5000	100
+1009	100	2	10	3.595836866004329
+1010	8	8	800	68.85254329722605
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_module_advanced.out b/regression-test/data/pythonudf_p0/test_pythonudf_module_advanced.out
new file mode 100644
index 0000000..67bbf40
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_module_advanced.out

@@ -0,0 +1,57 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_module_ltv_normal --
+100
+
+-- !select_module_ltv_null --
+\N
+
+-- !select_module_ltv_zero --
+100
+
+-- !select_customer_analytics --
+1001	Premium	5	50	10000	100
+1002	Regular	30	10	2000	67.19368581839511
+1003	Inactive	60	5	500	20.37527840768416
+1004	VIP	2	100	25000	100
+1005	Regular	15	25	5000	100
+1006	Regular	\N	30	6000	\N
+1007	Regular	10	\N	3000	\N
+1008	Inactive	45	8	\N	\N
+1009	VIP	0	200	50000	100
+1010	Churned	90	2	100	6.295836866004329
+
+-- !select_segment_analysis --
+Churned	1	100	6.295836866004329
+Inactive	2	500	20.37527840768416
+Premium	1	10000	100
+Regular	4	4000	83.59684290919756
+VIP	2	37500	100
+
+-- !select_high_value_customers --
+
+-- !select_sorted_by_ltv --
+1009	VIP	100
+1005	Regular	100
+1004	VIP	100
+1001	Premium	100
+1002	Regular	67.19368581839511
+
+-- !select_complex_query --
+1001	Premium	5	50	10000	100	Low Value
+1004	VIP	2	100	25000	100	Low Value
+1005	Regular	15	25	5000	100	Low Value
+1009	VIP	0	200	50000	100	Low Value
+1002	Regular	30	10	2000	67.19368581839511	Low Value
+1003	Inactive	60	5	500	20.37527840768416	Low Value
+1010	Churned	90	2	100	6.295836866004329	Low Value
+1007	Regular	10	\N	3000	\N	Unknown
+1008	Inactive	45	8	\N	\N	Unknown
+1006	Regular	\N	30	6000	\N	Unknown
+
+-- !select_join_with_module_udf --
+1001	Alice Johnson	Premium	10000	100
+1004	Diana Prince	VIP	25000	100
+1005	Eve Wilson	Regular	5000	100
+1002	Bob Smith	Regular	2000	67.19368581839511
+1003	Charlie Brown	Inactive	500	20.37527840768416
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_module_scalar.out b/regression-test/data/pythonudf_p0/test_pythonudf_module_scalar.out
new file mode 100644
index 0000000..534da01
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_module_scalar.out

@@ -0,0 +1,210 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !add_three --
+1	10	20	30	60
+2	5	15	25	45
+3	100	50	25	175
+4	7	3	11	21
+5	17	19	23	59
+
+-- !safe_div --
+1	100	10	10
+2	200	20	10
+3	150	0	\N
+4	80	5	16
+5	300	15	20
+
+-- !discount --
+1	100	90	75
+2	200	180	150
+3	150	135	112.5
+4	80	72	60
+5	300	270	225
+
+-- !compound_interest --
+1	100	162.8894626777442
+2	200	325.7789253554884
+3	150	244.3341940166163
+4	80	130.3115701421954
+5	300	488.6683880332326
+
+-- !bmi --
+1	22.86	29.39
+2	22.86	29.39
+3	22.86	29.39
+4	22.86	29.39
+5	22.86	29.39
+
+-- !fibonacci --
+1	10	55
+2	5	5
+4	7	13
+5	17	1597
+
+-- !is_prime --
+1	10	20	30	false	false	false
+2	5	15	25	true	false	false
+3	100	50	25	false	false	false
+4	7	3	11	true	true	true
+5	17	19	23	true	true	true
+
+-- !gcd --
+1	10	20	10
+2	5	15	5
+3	100	50	50
+4	7	3	1
+5	17	19	1
+
+-- !lcm --
+1	10	20	20
+2	5	15	15
+3	100	50	100
+4	7	3	21
+5	17	19	323
+
+-- !reverse --
+1	hello world	dlrow olleh
+2	foo bar baz	zab rab oof
+3	racecar	racecar
+4	a man a plan a canal panama	amanap lanac a nalp a nam a
+5	python udf test	tset fdu nohtyp
+
+-- !count_vowels --
+1	hello world	3
+2	foo bar baz	4
+3	racecar	3
+4	a man a plan a canal panama	10
+5	python udf test	3
+
+-- !count_words --
+1	hello world	2
+2	foo bar baz	3
+3	racecar	1
+4	a man a plan a canal panama	7
+5	python udf test	3
+
+-- !capitalize --
+1	hello world	Hello World
+2	foo bar baz	Foo Bar Baz
+3	racecar	Racecar
+4	a man a plan a canal panama	A Man A Plan A Canal Panama
+5	python udf test	Python Udf Test
+
+-- !is_palindrome --
+1	hello world	false
+2	foo bar baz	false
+3	racecar	true
+4	a man a plan a canal panama	true
+5	python udf test	false
+
+-- !similarity --
+1	hello world	50
+2	foo bar baz	10
+3	racecar	14.29
+4	a man a plan a canal panama	10
+5	python udf test	23.08
+
+-- !mask_email --
+1	test@example.com	t***@example.com
+2	user@domain.com	u***@domain.com
+3	admin@test.org	a***@test.org
+4	info@company.net	i***@company.net
+5	contact@site.io	c***@site.io
+
+-- !extract_domain --
+1	test@example.com	example.com
+2	user@domain.com	domain.com
+3	admin@test.org	test.org
+4	info@company.net	company.net
+5	contact@site.io	site.io
+
+-- !levenshtein --
+1	hello world	0
+2	foo bar baz	10
+3	racecar	10
+4	a man a plan a canal panama	24
+5	python udf test	13
+
+-- !days_between --
+1	2024-01-15	2024-01-20	5
+2	2024-02-10	2024-03-15	34
+3	2023-12-01	2024-01-01	31
+4	2024-06-15	2024-06-15	0
+5	2024-03-01	2024-12-31	305
+
+-- !is_weekend --
+1	2024-01-15	false
+2	2024-02-10	true
+3	2023-12-01	false
+4	2024-06-15	true
+5	2024-03-01	false
+
+-- !get_quarter --
+1	2024-01-15	1
+2	2024-02-10	1
+3	2023-12-01	4
+4	2024-06-15	2
+5	2024-03-01	1
+
+-- !age --
+1	34
+2	34
+3	33
+4	34
+5	34
+
+-- !in_range --
+1	10	true
+2	5	false
+3	100	false
+4	7	false
+5	17	true
+
+-- !xor --
+1	true	true	false
+2	false	true	true
+3	true	false	true
+4	false	false	false
+5	true	true	false
+
+-- !grade --
+1	100	A
+2	200	A
+3	150	A
+4	80	B
+5	300	A
+
+-- !categorize_age --
+1	10	Child
+2	5	Child
+3	100	Senior
+4	7	Child
+5	17	Teenager
+
+-- !tax --
+1	100	15
+2	200	30
+3	150	22.5
+4	80	12
+5	300	45
+
+-- !truncate --
+1	hello world	hello w...
+2	foo bar baz	foo bar...
+3	racecar	racecar
+4	a man a plan a canal panama	a man a...
+5	python udf test	python ...
+
+-- !null_handling --
+1	10	20	30	60
+2	\N	20	30	\N
+3	10	\N	30	\N
+4	10	20	\N	\N
+5	\N	\N	\N	\N
+
+-- !string_edge --
+1	normal string	gnirts lamron	3	2
+2			0	0
+3	   	   	0	0
+4	a	a	1	1
+5	\N	\N	\N	\N
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_module_vector.out b/regression-test/data/pythonudf_p0/test_pythonudf_module_vector.out
new file mode 100644
index 0000000..bfcab62
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_module_vector.out

@@ -0,0 +1,106 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !vec_add_const --
+1	10	20	130
+2	30	15	145
+3	50	50	200
+4	5	25	130
+5	100	10	210
+
+-- !vec_multiply_round --
+1	1.5	2.5	3.75
+2	3.5	4.5	15.75
+3	5.5	2	11
+4	7.5	1.5	11.25
+5	9.5	3.5	33.25
+
+-- !vec_concat_sep --
+1	hello world	python udf	hello world | python udf
+2	foo bar	test case	foo bar | test case
+3	data science	machine learning	data science | machine learning
+4	apache doris	database system	apache doris | database system
+5	vector operations	pandas series	vector operations | pandas series
+
+-- !vec_title_case --
+1	hello world	Hello World
+2	foo bar	Foo Bar
+3	data science	Data Science
+4	apache doris	Apache Doris
+5	vector operations	Vector Operations
+
+-- !vec_conditional --
+1	10	20	20
+2	30	15	30
+3	50	50	50
+4	5	25	25
+5	100	10	100
+
+-- !vec_percentage --
+1	1.5	2.5	60
+2	3.5	4.5	77.78
+3	5.5	2	275
+4	7.5	1.5	500
+5	9.5	3.5	271.43
+
+-- !vec_in_range --
+1	10	true
+2	30	true
+3	50	true
+4	5	false
+5	100	false
+
+-- !vec_safe_div --
+1	1.5	2.5	0.6
+2	3.5	4.5	0.7777777777777778
+3	5.5	2	2.75
+4	7.5	1.5	5
+5	9.5	3.5	2.714285714285714
+
+-- !vec_exp_decay --
+1	1.5	10	1.074796965860684
+2	3.5	30	1.287578044100048
+3	5.5	50	1.03881581560659
+4	7.5	5	6.348612936679606
+5	9.5	100	0.3389029367988978
+
+-- !vec_first_word --
+1	hello world	hello
+2	foo bar	foo
+3	data science	data
+4	apache doris	apache
+5	vector operations	vector
+
+-- !vec_abs_diff --
+1	10	20	10
+2	30	15	15
+3	50	50	0
+4	5	25	20
+5	100	10	90
+
+-- !vec_power --
+1	1.5	2.25
+2	3.5	12.25
+3	5.5	30.25
+4	7.5	56.25
+5	9.5	90.25
+
+-- !vec_bool_and --
+1	true	true	true
+2	false	true	false
+3	true	false	false
+4	false	false	false
+5	true	true	true
+
+-- !vec_bool_or --
+1	true	true	true
+2	false	true	true
+3	true	false	true
+4	false	false	false
+5	true	true	true
+
+-- !vec_clip --
+1	10	20
+2	30	30
+3	50	50
+4	5	20
+5	100	60
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_multiline_inline.out b/regression-test/data/pythonudf_p0/test_pythonudf_multiline_inline.out
new file mode 100644
index 0000000..a6e0c9c
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_multiline_inline.out

@@ -0,0 +1,23 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_complex_calc --
+25
+
+-- !select_business_logic_vip --
+MEDIUM:3750.00
+
+-- !select_business_logic_regular --
+MEDIUM:1800.00
+
+-- !select_text_analyzer --
+len:15,words:3,upper:2,lower:8,digits:3
+
+-- !select_statistics --
+mean:25.00,std:11.18,max:40.00,min:10.00
+
+-- !select_table_multiline --
+1	VIP	15000	150	HIGH:11250.00	len:22,words:3,upper:1,lower:19,digits:0
+2	PREMIUM	8000	80	MEDIUM:6560.00	len:13,words:2,upper:1,lower:11,digits:0
+3	REGULAR	3000	40	MEDIUM:2700.00	len:13,words:2,upper:1,lower:11,digits:0
+4	VIP	500	10	LOW:400.00	len:15,words:3,upper:4,lower:9,digits:0
+5	REGULAR	12000	200	HIGH:10200.00	len:19,words:3,upper:1,lower:16,digits:0
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_performance.out b/regression-test/data/pythonudf_p0/test_pythonudf_performance.out
new file mode 100644
index 0000000..d82f52f
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_performance.out

@@ -0,0 +1,59 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_perf_simple --
+4990
+
+-- !select_perf_aggregate --
+A	2500	996
+B	2500	998
+C	2500	1000
+D	2500	1002
+
+-- !select_perf_multiple_udf --
+D	1880
+C	1870
+B	1870
+A	1870
+
+-- !select_perf_string --
+A	250
+B	250
+C	250
+D	250
+
+-- !select_perf_complex --
+A	3372.3	6744.6	0
+B	3373.65	6745.950000000001	1.35
+C	3375	6747.3	2.7
+D	3376.35	6748.650000000001	4.05
+
+-- !select_perf_nested --
+D	1002
+C	1000
+B	998
+A	996
+
+-- !select_perf_null --
+5000	4000	50
+
+-- !select_perf_order --
+9999	999	1998
+8999	999	1998
+7999	999	1998
+6999	999	1998
+5999	999	1998
+4999	999	1998
+3999	999	1998
+2999	999	1998
+1999	999	1998
+999	999	1998
+9998	998	1996
+8998	998	1996
+7998	998	1996
+6998	998	1996
+5998	998	1996
+4998	998	1996
+3998	998	1996
+2998	998	1996
+1998	998	1996
+998	998	1996
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_ret_map.out b/regression-test/data/pythonudf_p0/test_pythonudf_ret_map.out
new file mode 100644
index 0000000..b1160eb
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_ret_map.out

@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+{1:1.1, 11:11.1}	{10:11, 110:111}
+{2:2.2, 22:22.2}	{20:22, 220:222}
+
+-- !select_2 --
+{1:1, 10:1, 100:1}	{10:10, 100:10, 1000:10}
+{2:2, 20:2, 200:2}	{20:20, 200:20, 2000:20}
+
+-- !select_3 --
+10	1.1	{"11410":"5141.1"}
+20	2.2	{"11420":"5142.2"}
+
+-- !select_4 --
+{"abc":"efg", "h":"i"}	{"abc114":"efg514", "h114":"i514"}
+{"j":"k"}	{"j114":"k514"}
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_runtime_version.out b/regression-test/data/pythonudf_p0/test_pythonudf_runtime_version.out
new file mode 100644
index 0000000..2658cd5
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_runtime_version.out

@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_version_short --
+42
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_schema_check.out b/regression-test/data/pythonudf_p0/test_pythonudf_schema_check.out
new file mode 100644
index 0000000..a17c75e
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_schema_check.out

@@ -0,0 +1,112 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+1	10	1000	1010
+2	20	2000	2020
+3	30	3000	3030
+4	40	4000	4040
+5	50	5000	5050
+
+-- !select_2 --
+1	100	1000	1100
+2	200	2000	2200
+3	300	3000	3300
+4	400	4000	4400
+5	500	5000	5500
+
+-- !select_3 --
+1	1000	10000	11000
+2	2000	20000	22000
+3	3000	30000	33000
+4	4000	40000	44000
+5	5000	50000	55000
+
+-- !select_4 --
+1	1.5	10.5	12
+2	2.5	20.5	23
+3	3.5	30.5	34
+4	4.5	40.5	45
+5	5.5	50.5	56
+
+-- !select_5 --
+1	10	100	1000	1110
+2	20	200	2000	2220
+3	30	300	3000	3330
+4	40	400	4000	4440
+5	50	500	5000	5550
+
+-- !select_6 --
+1	10	100	1000
+2	20	200	4000
+3	30	300	9000
+4	40	400	16000
+5	50	500	25000
+
+-- !select_7 --
+1	1.5	10.5	7
+2	2.5	20.5	8.199999999999999
+3	3.5	30.5	8.714285714285714
+4	4.5	40.5	9
+5	5.5	50.5	9.181818181818182
+
+-- !select_8 --
+1	1000	1.5	2001.5
+2	2000	2.5	4002.5
+3	3000	3.5	6003.5
+4	4000	4.5	8004.5
+5	5000	5.5	10005.5
+
+-- !select_9 --
+1	test1	TEST1
+2	test2	TEST2
+3	test3	TEST3
+4	test4	TEST4
+5	test5	TEST5
+
+-- !select_10 --
+1	true	false
+2	false	true
+3	true	false
+4	false	true
+5	true	false
+
+-- !select_11 --
+1	10	10000	10010
+2	20	20000	20020
+3	30	30000	30030
+4	40	40000	40040
+5	50	50000	50050
+
+-- !select_12 --
+1	1000	1500
+2	2000	3000
+3	3000	4500
+4	4000	6000
+5	5000	7500
+
+-- !select_13 --
+1	test1	\N
+
+-- !select_14 --
+1	10000	11000
+
+-- !select_15 --
+1	10.5	12.0
+
+-- !select_16 --
+1	true	1001
+
+-- !select_17 --
+1	2024-01-01	2024-01-01
+
+-- !select_18 --
+1	1000	false
+
+-- !select_19 --
+1	test1	\N
+
+-- !select_20 --
+1	test1	true	\N
+
+-- !select_22 --
+1	1.5	1001
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudf_string.out b/regression-test/data/pythonudf_p0/test_pythonudf_string.out
new file mode 100644
index 0000000..59f2f7c
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudf_string.out

@@ -0,0 +1,67 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1	1	abcdefg1	poiuytre1abcdefg
+2	2	abcdefg2	poiuytre2abcdefg
+3	3	abcdefg3	poiuytre3abcdefg
+4	4	abcdefg4	poiuytre4abcdefg
+5	5	abcdefg5	poiuytre5abcdefg
+6	6	abcdefg6	poiuytre6abcdefg
+7	7	abcdefg7	poiuytre7abcdefg
+8	8	abcdefg8	poiuytre8abcdefg
+9	9	abcdefg9	poiuytre9abcdefg
+
+-- !select_default_2 --
+1	1	abcdefg1	poiuytre1abcdefg
+2	2	abcdefg2	poiuytre2abcdefg
+3	3	abcdefg3	poiuytre3abcdefg
+4	4	abcdefg4	poiuytre4abcdefg
+5	5	abcdefg5	poiuytre5abcdefg
+6	6	abcdefg6	poiuytre6abcdefg
+7	7	abcdefg7	poiuytre7abcdefg
+8	8	abcdefg8	poiuytre8abcdefg
+9	9	abcdefg9	poiuytre9abcdefg
+
+-- !select --
+ab***fg1
+ab***fg2
+ab***fg3
+ab***fg4
+ab***fg5
+ab***fg6
+ab***fg7
+ab***fg8
+ab***fg9
+
+-- !select --
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+po***********efg
+
+-- !select --
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+ab*def	ab**efg
+
+-- !select_4 --
+ab***fg1	ab***fg1
+ab***fg2	ab***fg2
+ab***fg3	ab***fg3
+ab***fg4	ab***fg4
+ab***fg5	ab***fg5
+ab***fg6	ab***fg6
+ab***fg7	ab***fg7
+ab***fg8	ab***fg8
+ab***fg9	ab***fg9
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_array.out b/regression-test/data/pythonudf_p0/test_pythonudtf_array.out
new file mode 100644
index 0000000..866fd08
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_array.out

@@ -0,0 +1,28 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1	2	a1b
+2	4	a2b
+3	6	a3b
+
+-- !select_1 --
+1	[1, 2, 3]
+1	[1, 2, 3]
+1	[1, 2, 3]
+2	[1, 2, 3]
+2	[1, 2, 3]
+2	[1, 2, 3]
+3	[1, 2, 3]
+3	[1, 2, 3]
+3	[1, 2, 3]
+
+-- !select_2 --
+1	["Hi", "DataMind", "Good"]
+1	["Hi", "DataMind", "Good"]
+1	["Hi", "DataMind", "Good"]
+2	["Hi", "DataMind", "Good"]
+2	["Hi", "DataMind", "Good"]
+2	["Hi", "DataMind", "Good"]
+3	["Hi", "DataMind", "Good"]
+3	["Hi", "DataMind", "Good"]
+3	["Hi", "DataMind", "Good"]
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_float.out b/regression-test/data/pythonudf_p0/test_pythonudtf_float.out
new file mode 100644
index 0000000..907e4e4
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_float.out

@@ -0,0 +1,24 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+111	11111.111	222222.33	1.234567834455677E7	1111112.0
+112	1234556.1	222222.33	2.2222222233333334E8	4.444444444444556E12
+113	8.765432E7	\N	6.666666666666667E9	\N
+
+-- !select1 --
+111	1.234567834455677E7	1.234567834455677E8
+112	2.2222222233333334E8	2.2222222233333335E9
+113	6.666666666666667E9	6.666666666666667E10
+
+-- !select2 --
+111	1111112.0	1.111112E7
+112	4.444444444444556E12	4.4444444444445555E13
+
+-- !select3 --
+111	11111.111	11101.111
+112	1234556.1	1234546.1
+113	8.765432E7	8.7654312E7
+
+-- !select4 --
+111	222222.33	222212.33
+112	222222.33	222212.33
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_int.out b/regression-test/data/pythonudf_p0/test_pythonudtf_int.out
new file mode 100644
index 0000000..7d35ab6
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_int.out

@@ -0,0 +1,128 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+0	3	300	3000
+0	6	600	6000
+0	9	900	9000
+1	1	100	1000
+1	4	400	4000
+1	7	700	7000
+2	2	200	2000
+2	5	500	5000
+2	8	800	8000
+
+-- !select1 --
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+1	1
+2	2
+2	2
+2	2
+2	2
+2	2
+2	2
+2	2
+2	2
+2	2
+
+-- !select2 --
+1	1
+1	1
+1	1
+2	2
+2	2
+2	2
+3	3
+3	3
+3	3
+4	4
+4	4
+4	4
+5	5
+5	5
+5	5
+6	6
+6	6
+6	6
+7	7
+7	7
+7	7
+8	8
+8	8
+8	8
+9	9
+9	9
+9	9
+
+-- !select3 --
+100	100
+100	100
+100	100
+200	200
+200	200
+200	200
+300	300
+300	300
+300	300
+400	400
+400	400
+400	400
+500	500
+500	500
+500	500
+600	600
+600	600
+600	600
+700	700
+700	700
+700	700
+800	800
+800	800
+800	800
+900	900
+900	900
+900	900
+
+-- !select4 --
+1000	1000
+1000	1000
+1000	1000
+2000	2000
+2000	2000
+2000	2000
+3000	3000
+3000	3000
+3000	3000
+4000	4000
+4000	4000
+4000	4000
+5000	5000
+5000	5000
+5000	5000
+6000	6000
+6000	6000
+6000	6000
+7000	7000
+7000	7000
+7000	7000
+8000	8000
+8000	8000
+8000	8000
+9000	9000
+9000	9000
+9000	9000
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_map.out b/regression-test/data/pythonudf_p0/test_pythonudtf_map.out
new file mode 100644
index 0000000..6255675
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_map.out

@@ -0,0 +1,9 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_1 --
+1	{"114":514, "1919":810}
+1	{"114":514, "1919":810}
+1	{"114":514, "1919":810}
+2	{"a":11, "def":22, "hij":33}
+2	{"a":11, "def":22, "hij":33}
+2	{"a":11, "def":22, "hij":33}
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_string.out b/regression-test/data/pythonudf_p0/test_pythonudtf_string.out
new file mode 100644
index 0000000..da31e54
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_string.out

@@ -0,0 +1,32 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+1	1	abc,defg	poiuytre,abcdefg
+2	2	abc,defg	poiuytre,abcdefg
+0	3	abc,defg	poiuytre,abcdefg
+1	4	abc,defg	poiuytre,abcdefg
+2	5	abc,defg	poiuytre,abcdefg
+0	6	abc,defg	poiuytre,abcdefg
+1	7	abc,defg	poiuytre,abcdefg
+2	8	abc,defg	poiuytre,abcdefg
+9	9	ab,cdefg	poiuytreabcde,fg
+
+-- !select1 --
+0	abc,defg	abc
+0	abc,defg	defg
+0	abc,defg	abc
+0	abc,defg	defg
+1	abc,defg	abc
+1	abc,defg	defg
+1	abc,defg	abc
+1	abc,defg	defg
+1	abc,defg	abc
+1	abc,defg	defg
+2	abc,defg	abc
+2	abc,defg	defg
+2	abc,defg	abc
+2	abc,defg	defg
+2	abc,defg	abc
+2	abc,defg	defg
+9	ab,cdefg	ab
+9	ab,cdefg	cdefg
+

diff --git a/regression-test/data/pythonudf_p0/test_pythonudtf_struct.out b/regression-test/data/pythonudf_p0/test_pythonudtf_struct.out
new file mode 100644
index 0000000..f641040
--- /dev/null
+++ b/regression-test/data/pythonudf_p0/test_pythonudtf_struct.out

@@ -0,0 +1,17 @@
+-- This file is automatically generated. You should know what you did if you want to edit this
+-- !select_default --
+0
+1
+2
+
+-- !select1 --
+0	1	0.112	Hello, DataMind
+0	1	0.112	Hello, DataMind
+0	1	0.112	Hello, DataMind
+1	1	0.112	Hello, DataMind
+1	1	0.112	Hello, DataMind
+1	1	0.112	Hello, DataMind
+2	1	0.112	Hello, DataMind
+2	1	0.112	Hello, DataMind
+2	1	0.112	Hello, DataMind
+

diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf
index aa533b0..ed07170 100644
--- a/regression-test/pipeline/p0/conf/be.conf
+++ b/regression-test/pipeline/p0/conf/be.conf

@@ -90,3 +90,8 @@
 enable_graceful_exit_check=true
 
 enable_prefill_all_dbm_agg_cache_after_compaction=true
+
+# enable to use python udf
+enable_python_udf_support = true
+python_env_mode = venv
+python_venv_interpreter_paths = /usr/bin/python3

diff --git a/regression-test/pipeline/p0/conf/fe.conf b/regression-test/pipeline/p0/conf/fe.conf
index d7fae1f..cef89d5 100644
--- a/regression-test/pipeline/p0/conf/fe.conf
+++ b/regression-test/pipeline/p0/conf/fe.conf

@@ -92,3 +92,6 @@
 max_spilled_profile_num = 2000
 
 check_table_lock_leaky=true
+
+# enable python udf
+enable_python_udf = true
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertequal.groovy b/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertequal.groovy
new file mode 100644
index 0000000..09a15f5
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertequal.groovy

@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_assertequal") {
+    def pyPath = """${context.file.parent}/../udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_assertequal """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_assertequal (
+            `col` varchar(10) NOT NULL,
+            `col_1` double NOT NULL,
+            `col_2` double NOT NULL
+            )
+            DISTRIBUTED BY HASH(col) PROPERTIES("replication_num" = "1");
+        """
+
+        sql """ INSERT INTO test_pythonudf_assertequal VALUES ('abc', 23.34, 23.34); """
+
+        File path1 = new File(pyPath)
+        if (!path1.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ CREATE FUNCTION asser_equal(double, double) RETURNS string PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="assert_equal_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT asser_equal(col_1, col_2)  as a FROM test_pythonudf_assertequal ORDER BY a; """
+
+
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS asser_equal(double, double); ")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_assertequal")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertlessthan.groovy b/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertlessthan.groovy
new file mode 100644
index 0000000..08a6b1f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/sanity/test_pythonudf_assertlessthan.groovy

@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_assertlessthan") {
+    def tableName = "test_pythonudf_assertlessthan"
+    def pyPath = """${context.file.parent}/../udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_assertlessthan """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_assertlessthan (
+            `col` varchar(10) NOT NULL,
+            `col_1` double NOT NULL,
+            `col_2` double NOT NULL
+            )
+            DISTRIBUTED BY HASH(col) PROPERTIES("replication_num" = "1");
+        """
+
+        sql """ INSERT INTO test_pythonudf_assertlessthan VALUES ('abc', 23.34, 23.35), ('bcd', 0.123, 0.124); """
+
+        File path1 = new File(pyPath)
+        if (!path1.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ CREATE FUNCTION asser_lessthan(double, double) RETURNS string PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="assert_lessthan_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT asser_lessthan(col_1, col_2)  as a FROM test_pythonudf_assertlessthan ORDER BY a; """
+
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS asser_lessthan(double, double);  ")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_assertlessthan")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_aggregate.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_aggregate.groovy
new file mode 100644
index 0000000..ef74f9f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_aggregate.groovy

@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_aggregate") {
+    def runtime_version = "3.10.12"
+
+    try {
+        // Test 1: Create simple aggregate function (although Python UDF is mainly for scalar functions)
+        // Test using Python UDF in aggregate queries
+        sql """ DROP FUNCTION IF EXISTS py_score_grade(DOUBLE); """
+        sql """
+        CREATE FUNCTION py_score_grade(DOUBLE) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(score):
+    if score is None:
+        return None
+    if score >= 90:
+        return 'A'
+    elif score >= 80:
+        return 'B'
+    elif score >= 70:
+        return 'C'
+    elif score >= 60:
+        return 'D'
+    else:
+        return 'F'
+\$\$;
+        """
+        
+        // Create test table
+        sql """ DROP TABLE IF EXISTS student_scores; """
+        sql """
+        CREATE TABLE student_scores (
+            student_id INT,
+            student_name STRING,
+            subject STRING,
+            score DOUBLE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(student_id)
+        DISTRIBUTED BY HASH(student_id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO student_scores VALUES
+        (1, 'Alice', 'Math', 95.0),
+        (1, 'Alice', 'English', 88.0),
+        (1, 'Alice', 'Science', 92.0),
+        (2, 'Bob', 'Math', 78.0),
+        (2, 'Bob', 'English', 85.0),
+        (2, 'Bob', 'Science', 80.0),
+        (3, 'Charlie', 'Math', 65.0),
+        (3, 'Charlie', 'English', 70.0),
+        (3, 'Charlie', 'Science', 68.0),
+        (4, 'David', 'Math', 55.0),
+        (4, 'David', 'English', 60.0),
+        (4, 'David', 'Science', 58.0);
+        """
+        
+        // Test using UDF in SELECT
+        qt_select_grades """ 
+        SELECT 
+            student_id,
+            student_name,
+            subject,
+            score,
+            py_score_grade(score) AS grade
+        FROM student_scores
+        ORDER BY student_id, subject;
+        """
+        
+        // Test using UDF in GROUP BY
+        qt_select_group_by_grade """ 
+        SELECT 
+            py_score_grade(score) AS grade,
+            COUNT(*) AS count,
+            AVG(score) AS avg_score
+        FROM student_scores
+        GROUP BY py_score_grade(score)
+        ORDER BY grade;
+        """
+        
+        // Test using UDF in aggregate functions
+        qt_select_aggregate_with_udf """ 
+        SELECT 
+            student_id,
+            student_name,
+            AVG(score) AS avg_score,
+            py_score_grade(AVG(score)) AS avg_grade
+        FROM student_scores
+        GROUP BY student_id, student_name
+        ORDER BY student_id;
+        """
+        
+        // Test 2: Create classification function for aggregate analysis
+        sql """ DROP FUNCTION IF EXISTS py_age_group(INT); """
+        sql """
+        CREATE FUNCTION py_age_group(INT) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(age):
+    if age is None:
+        return None
+    if age < 18:
+        return 'Minor'
+    elif age < 30:
+        return 'Young Adult'
+    elif age < 50:
+        return 'Adult'
+    else:
+        return 'Senior'
+\$\$;
+        """
+        
+        sql """ DROP TABLE IF EXISTS users; """
+        sql """
+        CREATE TABLE users (
+            user_id INT,
+            name STRING,
+            age INT,
+            salary DOUBLE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(user_id)
+        DISTRIBUTED BY HASH(user_id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO users VALUES
+        (1, 'User1', 16, 0),
+        (2, 'User2', 25, 50000),
+        (3, 'User3', 35, 80000),
+        (4, 'User4', 55, 100000),
+        (5, 'User5', 28, 60000),
+        (6, 'User6', 45, 90000),
+        (7, 'User7', 22, 45000),
+        (8, 'User8', 60, 110000);
+        """
+        
+        qt_select_age_group_aggregate """ 
+        SELECT 
+            py_age_group(age) AS age_group,
+            COUNT(*) AS user_count,
+            AVG(salary) AS avg_salary,
+            MAX(salary) AS max_salary,
+            MIN(salary) AS min_salary
+        FROM users
+        GROUP BY py_age_group(age)
+        ORDER BY age_group;
+        """
+        
+        // Test 3: Use UDF in HAVING clause
+        qt_select_having_with_udf """ 
+        SELECT 
+            student_id,
+            student_name,
+            AVG(score) AS avg_score
+        FROM student_scores
+        GROUP BY student_id, student_name
+        HAVING py_score_grade(AVG(score)) IN ('A', 'B')
+        ORDER BY student_id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_score_grade(DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_age_group(INT);")
+        try_sql("DROP TABLE IF EXISTS student_scores;")
+        try_sql("DROP TABLE IF EXISTS users;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_always_nullable.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_always_nullable.groovy
new file mode 100644
index 0000000..a66317b
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_always_nullable.groovy

@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_always_nullable") {
+    // Test different configurations of always_nullable parameter
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: always_nullable = true (default value)
+        sql """ DROP FUNCTION IF EXISTS py_nullable_true(INT); """
+        sql """
+        CREATE FUNCTION py_nullable_true(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    if x < 0:
+        return None
+    return x * 2
+\$\$;
+        """
+        
+        qt_select_nullable_true_normal """ SELECT py_nullable_true(10) AS result; """
+        qt_select_nullable_true_null """ SELECT py_nullable_true(NULL) AS result; """
+        qt_select_nullable_true_negative """ SELECT py_nullable_true(-5) AS result; """
+        
+        // Test 2: always_nullable = false
+        sql """ DROP FUNCTION IF EXISTS py_nullable_false(INT); """
+        sql """
+        CREATE FUNCTION py_nullable_false(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "false",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return 0
+    return x * 2
+\$\$;
+        """
+        
+        qt_select_nullable_false_normal """ SELECT py_nullable_false(10) AS result; """
+        qt_select_nullable_false_null """ SELECT py_nullable_false(NULL) AS result; """
+        
+        // Test 3: always_nullable = false but function returns None
+        // This tests the edge case where the function violates the always_nullable contract
+        sql """ DROP FUNCTION IF EXISTS py_nullable_false_returns_none(INT); """
+        sql """
+        CREATE FUNCTION py_nullable_false_returns_none(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "false",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x < 0:
+        return None  # Returns None even though always_nullable is false
+    return x * 2
+\$\$;
+        """
+        
+        qt_select_nullable_false_returns_none_normal """ SELECT py_nullable_false_returns_none(10) AS result; """
+
+        test {
+            sql """ SELECT py_nullable_false_returns_none(-5) AS result; """
+            exception "but the return type is not nullable, please check the always_nullable property in create function statement, it should be true"
+        }
+        
+        // Test 4: Test nullable behavior on table data
+        sql """ DROP TABLE IF EXISTS nullable_test_table; """
+        sql """
+        CREATE TABLE nullable_test_table (
+            id INT,
+            value INT
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO nullable_test_table VALUES
+        (1, 10),
+        (2, NULL),
+        (3, -5),
+        (4, 0),
+        (5, 100);
+        """
+        
+        qt_select_table_nullable_true """ 
+        SELECT 
+            id,
+            value,
+            py_nullable_true(value) AS result
+        FROM nullable_test_table
+        ORDER BY id;
+        """
+        
+        qt_select_table_nullable_false """ 
+        SELECT 
+            id,
+            value,
+            py_nullable_false(value) AS result
+        FROM nullable_test_table
+        ORDER BY id;
+        """
+        
+        test {
+            sql """
+            SELECT
+                id,
+                value,
+                py_nullable_false_returns_none(value) AS result
+            FROM nullable_test_table
+            ORDER BY id;
+            """
+            exception "'<' not supported between instances of 'NoneType' and 'int'"
+        }
+        
+        // Test 5: Nullable test for string type
+        sql """ DROP FUNCTION IF EXISTS py_string_nullable(STRING); """
+        sql """
+        CREATE FUNCTION py_string_nullable(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None or s == "":
+        return None
+    return s.upper()
+\$\$;
+        """
+        
+        qt_select_string_nullable """ SELECT py_string_nullable('hello') AS result; """
+        qt_select_string_nullable_null """ SELECT py_string_nullable(NULL) AS result; """
+        qt_select_string_nullable_empty """ SELECT py_string_nullable('') AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_nullable_true(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_nullable_false(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_nullable_false_returns_none(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_string_nullable(STRING);")
+        try_sql("DROP TABLE IF EXISTS nullable_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_array.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_array.groovy
new file mode 100644
index 0000000..c107fa9
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_array.groovy

@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_array") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_array """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_array (
+            `user_id`      INT      NOT NULL COMMENT "",
+            `tinyint_col`  TINYINT  NOT NULL COMMENT "",
+            `string_col`   STRING   NOT NULL COMMENT ""
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+        StringBuilder sb = new StringBuilder()
+        int i = 1
+        for (; i < 10; i ++) {
+            sb.append("""
+                (${i},${i}*2,'a${i}b'),
+            """)
+        }
+        sb.append("""
+                (${i},${i}*2,'a${i}b')
+            """)
+        sql """ INSERT INTO test_pythonudf_array VALUES
+             ${sb.toString()}
+            """
+        qt_select_default """ SELECT * FROM test_pythonudf_array t ORDER BY user_id; """
+
+        File path = new File(pyPath)
+        if (!path.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_array_int_test(array<int>); """
+        sql """ CREATE FUNCTION python_udf_array_int_test(array<int>) RETURNS int PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="array_int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+        qt_select_1 """ SELECT python_udf_array_int_test(array(user_id)) result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_2 """ SELECT python_udf_array_int_test(null) result ; """
+
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_array_return_int_test(array<int>); """
+        sql """ CREATE FUNCTION python_udf_array_return_int_test(array<int>) RETURNS array<int> PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="array_return_array_int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+        qt_select_3 """ SELECT python_udf_array_return_int_test(array(user_id)), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_4 """ SELECT python_udf_array_return_int_test(array(user_id,user_id)), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_5 """ SELECT python_udf_array_return_int_test(null) result ; """
+
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_array_return_string_test(array<string>); """
+        sql """ CREATE FUNCTION python_udf_array_return_string_test(array<string>) RETURNS array<string> PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="array_return_array_string_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+        qt_select_6 """ SELECT python_udf_array_return_string_test(array(string_col)), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_7 """ SELECT python_udf_array_return_string_test(array(string_col, cast(user_id as string))), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_8 """ SELECT python_udf_array_return_string_test(null) result ; """
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_array_string_test(array<string>); """
+        sql """ CREATE FUNCTION python_udf_array_string_test(array<string>) RETURNS string PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="array_string_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+        qt_select_9 """ SELECT python_udf_array_string_test(array(string_col)), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_10 """ SELECT python_udf_array_string_test(array(string_col, cast(user_id as string))), tinyint_col as result FROM test_pythonudf_array ORDER BY result; """
+        qt_select_11 """ SELECT python_udf_array_string_test(null) result ; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS python_udf_array_int_test(array<int>);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_array_return_int_test(array<int>);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_array_return_string_test(array<string>);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_array_string_test(array<string>);")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_array")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_base_data_type.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_base_data_type.groovy
new file mode 100644
index 0000000..a7fa5ff
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_base_data_type.groovy

@@ -0,0 +1,323 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_base_data_type") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+
+    // TEST INLINE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS row_to_csv_all(
+                BOOLEAN,
+                TINYINT,
+                SMALLINT,
+                INT,
+                BIGINT,
+                LARGEINT,
+                FLOAT,
+                DOUBLE,
+                DECIMAL,
+                DECIMAL,
+                DECIMAL,
+                DATE,
+                DATETIME,
+                CHAR,
+                VARCHAR,
+                STRING
+            );
+        """
+        sql """
+CREATE FUNCTION row_to_csv_all(
+    BOOLEAN,
+    TINYINT,
+    SMALLINT,
+    INT,
+    BIGINT,
+    LARGEINT,
+    FLOAT,
+    DOUBLE,
+    DECIMAL,
+    DECIMAL,
+    DECIMAL,
+    DATE,
+    DATETIME,
+    CHAR,
+    VARCHAR,
+    STRING
+)
+RETURNS STRING
+PROPERTIES (
+    "type" = "PYTHON_UDF",
+    "symbol" = "row_to_csv_all_impl",
+    "always_nullable" = "true",
+    "runtime_version" = "${runtime_version}"
+)
+AS \$\$
+def row_to_csv_all_impl(
+    bool_col, tinyint_col, smallint_col, int_col, bigint_col, largeint_col,
+    float_col, double_col, decimal32_col, decimal64_col, decimal128_col,
+    date_col, datetime_col, char_col, varchar_col, string_col
+):
+    cols = [
+        bool_col, tinyint_col, smallint_col, int_col, bigint_col, largeint_col,
+        float_col, double_col, decimal32_col, decimal64_col, decimal128_col,
+        date_col, datetime_col, char_col, varchar_col, string_col
+    ]
+    
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    return ','.join(safe_str(col) for col in cols)
+\$\$;
+        """
+        sql """ DROP TABLE IF EXISTS test_datatype_table; """
+        sql """
+            CREATE TABLE test_datatype_table (
+                id INT,
+                bool_value BOOLEAN,
+                tinyint_value TINYINT,
+                smallint_value SMALLINT,
+                int_value INT,
+                bigint_value BIGINT,
+                largeint_value LARGEINT,
+                float_value float,
+                double_value DOUBLE,
+                decimal32_value DECIMAL(8, 2),
+                decimal64_value DECIMAL(16, 2),
+                decimal128_value DECIMAL(32, 8),
+                -- decimal256_value DECIMAL(64, 10),
+                date_value DATE,
+                datetime_value DATETIME,
+                char_value CHAR(100),
+                varchar_value VARCHAR(100),
+                string_value STRING
+            ) ENGINE=OLAP 
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_datatype_table VALUES
+            (1, TRUE, 127, 32767, 2147483647, 9223372036854775807, 170141183460469231731687303715884105727,
+            1.23, 4.56789, 123456.78, 12345678901.2345, 123456789012345678901.234567890,
+            '2023-01-01', '2023-01-01 12:34:56', 'char_data_1', 'varchar_data_1', 'string_data_1'),
+
+            (2, FALSE, -128, -32768, -2147483648, -9223372036854775808, -170141183460469231731687303715884105728,
+            -2.34, -5.6789, -987654.32, -98765432.109876543, -987654321098765432.10987654321,
+            '2024-05-15', '2024-05-15 08:22:10', 'char_data_2', 'varchar_data_2', 'string_data_2'),
+
+            (3, TRUE, 0, 0, 0, 0, 0,
+            0.0, 0.0, 0.00, 0.00, 0.00000000,
+            '2025-10-15', '2025-10-15 00:00:00', 'char_zero', 'varchar_zero', 'string_zero'),
+
+            (4, FALSE, 100, 20000, 300000000, 4000000000000000000, 99999999999999999999999999999999999999,
+            3.14, 2.71828, 999999.99, 99999999999999.99, 99999999999999999999999.999999999999999,
+            '2022-12-31', '2022-12-31 23:59:59', 'char_max', 'varchar_max', 'string_max'),
+
+            (5, TRUE, -50, -10000, -100000000, -5000000000000000000, -99999999999999999999999999999999999999,
+            -1.41, -0.57721, -0.01, -0.01, -0.000000001,
+            '2021-07-04', '2021-07-04 14:30:00', 'char_neg', 'varchar_neg', 'string_neg');
+        """
+
+        qt_select_1 """
+            SELECT row_to_csv_all(
+                bool_value,
+                tinyint_value,
+                smallint_value,
+                int_value,
+                bigint_value,
+                largeint_value,
+                float_value,
+                double_value,
+                decimal32_value,
+                decimal64_value,
+                decimal128_value,
+                date_value,
+                datetime_value,
+                char_value,
+                varchar_value,
+                string_value
+            ) AS csv_row
+            FROM test_datatype_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS row_to_csv_all(
+                BOOLEAN,
+                TINYINT,
+                SMALLINT,
+                INT,
+                BIGINT,
+                LARGEINT,
+                FLOAT,
+                DOUBLE,
+                DECIMAL,
+                DECIMAL,
+                DECIMAL,
+                DATE,
+                DATETIME,
+                CHAR,
+                VARCHAR,
+                STRING
+            );""")
+        try_sql("DROP TABLE IF EXISTS test_datatype_table;")
+    }
+
+    // TEST MODULE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS row_to_csv_all(
+                BOOLEAN,
+                TINYINT,
+                SMALLINT,
+                INT,
+                BIGINT,
+                LARGEINT,
+                FLOAT,
+                DOUBLE,
+                DECIMAL,
+                DECIMAL,
+                DECIMAL,
+                DATE,
+                DATETIME,
+                CHAR,
+                VARCHAR,
+                STRING
+            );
+        """
+        sql """
+            CREATE FUNCTION row_to_csv_all(
+                BOOLEAN,
+                TINYINT,
+                SMALLINT,
+                INT,
+                BIGINT,
+                LARGEINT,
+                FLOAT,
+                DOUBLE,
+                DECIMAL,
+                DECIMAL,
+                DECIMAL,
+                DATE,
+                DATETIME,
+                CHAR,
+                VARCHAR,
+                STRING
+            )
+            RETURNS STRING
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file" = "file://${pyPath}",
+                "symbol" = "python_udf_data_type.row_to_csv_all_impl",
+                "always_nullable" = "true",
+                "runtime_version" = "${runtime_version}"
+            );
+        """
+        sql """ DROP TABLE IF EXISTS test_datatype_table; """
+        sql """
+            CREATE TABLE test_datatype_table (
+                id INT,
+                bool_value BOOLEAN,
+                tinyint_value TINYINT,
+                smallint_value SMALLINT,
+                int_value INT,
+                bigint_value BIGINT,
+                largeint_value LARGEINT,
+                float_value float,
+                double_value DOUBLE,
+                decimal32_value DECIMAL(8, 2),
+                decimal64_value DECIMAL(16, 2),
+                decimal128_value DECIMAL(32, 8),
+                -- decimal256_value DECIMAL(64, 10),
+                date_value DATE,
+                datetime_value DATETIME,
+                char_value CHAR(100),
+                varchar_value VARCHAR(100),
+                string_value STRING
+            ) ENGINE=OLAP 
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_datatype_table VALUES
+            (1, TRUE, 127, 32767, 2147483647, 9223372036854775807, 170141183460469231731687303715884105727,
+            1.23, 4.56789, 123456.78, 12345678901.2345, 123456789012345678901.234567890,
+            '2023-01-01', '2023-01-01 12:34:56', 'char_data_1', 'varchar_data_1', 'string_data_1'),
+
+            (2, FALSE, -128, -32768, -2147483648, -9223372036854775808, -170141183460469231731687303715884105728,
+            -2.34, -5.6789, -987654.32, -98765432.109876543, -987654321098765432.10987654321,
+            '2024-05-15', '2024-05-15 08:22:10', 'char_data_2', 'varchar_data_2', 'string_data_2'),
+
+            (3, TRUE, 0, 0, 0, 0, 0,
+            0.0, 0.0, 0.00, 0.00, 0.00000000,
+            '2025-10-15', '2025-10-15 00:00:00', 'char_zero', 'varchar_zero', 'string_zero'),
+
+            (4, FALSE, 100, 20000, 300000000, 4000000000000000000, 99999999999999999999999999999999999999,
+            3.14, 2.71828, 999999.99, 99999999999999.99, 99999999999999999999999.999999999999999,
+            '2022-12-31', '2022-12-31 23:59:59', 'char_max', 'varchar_max', 'string_max'),
+
+            (5, TRUE, -50, -10000, -100000000, -5000000000000000000, -99999999999999999999999999999999999999,
+            -1.41, -0.57721, -0.01, -0.01, -0.000000001,
+            '2021-07-04', '2021-07-04 14:30:00', 'char_neg', 'varchar_neg', 'string_neg');
+        """
+
+        qt_select_2 """
+            SELECT row_to_csv_all(
+                bool_value,
+                tinyint_value,
+                smallint_value,
+                int_value,
+                bigint_value,
+                largeint_value,
+                float_value,
+                double_value,
+                decimal32_value,
+                decimal64_value,
+                decimal128_value,
+                date_value,
+                datetime_value,
+                char_value,
+                varchar_value,
+                string_value
+            ) AS csv_row
+            FROM test_datatype_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS row_to_csv_all(
+                BOOLEAN,
+                TINYINT,
+                SMALLINT,
+                INT,
+                BIGINT,
+                LARGEINT,
+                FLOAT,
+                DOUBLE,
+                DECIMAL,
+                DECIMAL,
+                DECIMAL,
+                DATE,
+                DATETIME,
+                CHAR,
+                VARCHAR,
+                STRING
+            );""")
+        try_sql("DROP TABLE IF EXISTS test_datatype_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_benchmark.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_benchmark.groovy
new file mode 100644
index 0000000..2ce0564
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_benchmark.groovy

@@ -0,0 +1,341 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_benchmark") {
+    // Benchmark test for Python UDF with large dataset
+    
+    def scalarPyPath = """${context.file.parent}/udf_scripts/python_udf_scalar_ops.zip"""
+    def vectorPyPath = """${context.file.parent}/udf_scripts/python_udf_vector_ops.zip"""
+    scp_udf_file_to_all_be(scalarPyPath)
+    scp_udf_file_to_all_be(vectorPyPath)
+    def runtime_version = "3.10.12"
+
+    sql "CREATE DATABASE IF NOT EXISTS test_pythonudf_benchmark"
+    sql "USE test_pythonudf_benchmark"
+    
+    log.info("Python scalar module path: ${scalarPyPath}".toString())
+    log.info("Python vector module path: ${vectorPyPath}".toString())
+    
+    try {
+        // ==================== Create Large Test Table ====================
+        sql """ DROP TABLE IF EXISTS python_udf_benchmark_table; """
+        sql """
+        CREATE TABLE python_udf_benchmark_table (
+            id BIGINT,
+            int_val INT,
+            double_val DOUBLE,
+            string_val STRING,
+            email STRING,
+            bool_val BOOLEAN,
+            date_val DATE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        log.info("Creating benchmark table with large dataset...")
+        
+        // Load 1 million rows using streamLoad (much faster)
+        def totalRows = 1000000
+        
+        log.info("Loading ${totalRows} rows using streamLoad from CSV file...")
+        def loadStartTime = System.currentTimeMillis()
+        
+        streamLoad {
+            db 'test_pythonudf_benchmark'
+            table "python_udf_benchmark_table"
+            set 'column_separator', '\t'
+            file 'benchmark_data_1m.csv'
+            time 120000 // 120 seconds timeout
+            
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+        
+        def loadEndTime = System.currentTimeMillis()
+        log.info("Data loaded in ${loadEndTime - loadStartTime} ms (${String.format('%.2f', totalRows / ((loadEndTime - loadStartTime) / 1000.0))} rows/sec)")
+        
+        sql "sync"
+        
+        // Verify row count
+        def rowCount = sql "SELECT COUNT(*) FROM python_udf_benchmark_table"
+        log.info("Verified row count: ${rowCount[0][0]}")
+        
+        // ==================== Benchmark 1: Simple Scalar UDF ====================
+        log.info("=== Benchmark 1: Simple Scalar UDF (multiply_with_default) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_multiply(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_multiply(INT, INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.multiply_with_default",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        def startTime1 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT id, py_multiply(int_val, 2, 1) AS result
+            FROM python_udf_benchmark_table
+        ) t;
+        """
+        def endTime1 = System.currentTimeMillis()
+        log.info("Scalar UDF (simple): ${endTime1 - startTime1} ms for ${totalRows} rows")
+        
+        // ==================== Benchmark 2: Complex Scalar UDF ====================
+        log.info("=== Benchmark 2: Complex Scalar UDF (Levenshtein distance) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_levenshtein(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_levenshtein(STRING, STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.levenshtein_distance",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        def startTime2 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT id, py_levenshtein(string_val, 'test_string_50') AS distance
+            FROM python_udf_benchmark_table
+            LIMIT 100000
+        ) t;
+        """
+        def endTime2 = System.currentTimeMillis()
+        log.info("Scalar UDF (complex): ${endTime2 - startTime2} ms for 100000 rows")
+        
+        // ==================== Benchmark 3: String Processing Scalar UDF ====================
+        log.info("=== Benchmark 3: String Processing Scalar UDF (extract_domain) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_extract_domain(STRING); """
+        sql """
+        CREATE FUNCTION py_extract_domain(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.extract_domain",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        def startTime3 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT id, py_extract_domain(email) AS domain
+            FROM python_udf_benchmark_table
+        ) t;
+        """
+        def endTime3 = System.currentTimeMillis()
+        log.info("Scalar UDF (string): ${endTime3 - startTime3} ms for ${totalRows} rows")
+        
+        // ==================== Benchmark 4: Simple Vector UDF ====================
+        log.info("=== Benchmark 4: Simple Vector UDF (add_constant) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_add(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_add(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${vectorPyPath}",
+            "symbol" = "python_udf_vector_ops.add_constant",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true",
+            "vectorized" = "true"
+        );
+        """
+        
+        def startTime4 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT id, py_vec_add(int_val, 100) AS result
+            FROM python_udf_benchmark_table
+        ) t;
+        """
+        def endTime4 = System.currentTimeMillis()
+        log.info("Vector UDF (simple): ${endTime4 - startTime4} ms for ${totalRows} rows")
+        
+        // ==================== Benchmark 5: Complex Vector UDF ====================
+        log.info("=== Benchmark 5: Complex Vector UDF (string_length) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_strlen(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_strlen(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${vectorPyPath}",
+            "symbol" = "python_udf_vector_ops.string_length",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true",
+            "vectorized" = "true"
+        );
+        """
+        
+        def startTime5 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT id, py_vec_strlen(string_val) AS len
+            FROM python_udf_benchmark_table
+        ) t;
+        """
+        def endTime5 = System.currentTimeMillis()
+        log.info("Vector UDF (string): ${endTime5 - startTime5} ms for ${totalRows} rows")
+        
+        // ==================== Benchmark 6: Scalar UDF with Grouping ====================
+        log.info("=== Benchmark 6: Scalar UDF with Grouping ===")
+        
+        def startTime6 = System.currentTimeMillis()
+        sql """
+        SELECT 
+            int_val % 100 AS bucket,
+            COUNT(*) AS cnt,
+            SUM(int_val) AS total
+        FROM python_udf_benchmark_table
+        WHERE py_multiply(int_val, 2, 1) > 1000
+        GROUP BY int_val % 100
+        ORDER BY bucket
+        LIMIT 10;
+        """
+        def endTime6 = System.currentTimeMillis()
+        log.info("Scalar UDF with Grouping (WHERE clause): ${endTime6 - startTime6} ms")
+        
+        // ==================== Benchmark 7: Vector UDF with Grouping ====================
+        log.info("=== Benchmark 7: Vector UDF with Grouping ===")
+        
+        def startTime7 = System.currentTimeMillis()
+        sql """
+        SELECT 
+            int_val % 100 AS bucket,
+            COUNT(*) AS cnt,
+            SUM(int_val) AS total
+        FROM python_udf_benchmark_table
+        WHERE py_vec_add(int_val, 100) > 1000
+        GROUP BY int_val % 100
+        ORDER BY bucket
+        LIMIT 10;
+        """
+        def endTime7 = System.currentTimeMillis()
+        log.info("Vector UDF with Grouping (WHERE clause): ${endTime7 - startTime7} ms")
+        
+        // ==================== Benchmark 8: Multiple UDFs in Single Query ====================
+        log.info("=== Benchmark 8: Multiple UDFs in Single Query ===")
+        
+        def startTime8 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM (
+            SELECT 
+                id,
+                py_multiply(int_val, 2, 1) AS mul_result,
+                py_extract_domain(email) AS domain,
+                py_vec_add(int_val, 100) AS vec_result
+            FROM python_udf_benchmark_table
+            LIMIT 100000
+        ) t;
+        """
+        def endTime8 = System.currentTimeMillis()
+        log.info("Multiple UDFs: ${endTime8 - startTime8} ms for 100000 rows")
+        
+        // ==================== Benchmark 9: Filter with UDF ====================
+        log.info("=== Benchmark 9: Filter with UDF ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_is_prime(INT); """
+        sql """
+        CREATE FUNCTION py_is_prime(INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.is_prime",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        def startTime9 = System.currentTimeMillis()
+        sql """
+        SELECT COUNT(*) 
+        FROM python_udf_benchmark_table
+        WHERE py_is_prime(int_val) = true
+        LIMIT 10000;
+        """
+        def endTime9 = System.currentTimeMillis()
+        log.info("Filter with UDF: ${endTime9 - startTime9} ms")
+        
+        // ==================== Benchmark Summary ====================
+        log.info("\n" + "=" * 80 + "\nBENCHMARK SUMMARY\n" + "=" * 80 + "\nDataset size: ${totalRows} rows\n" +
+            "1. Scalar UDF (simple multiply):        ${endTime1 - startTime1} ms\n" +
+            "2. Scalar UDF (complex Levenshtein):    ${endTime2 - startTime2} ms (100K rows)\n" +
+            "3. Scalar UDF (string extract_domain):  ${endTime3 - startTime3} ms\n" +
+            "4. Vector UDF (simple add):             ${endTime4 - startTime4} ms\n" +
+            "5. Vector UDF (string length):          ${endTime5 - startTime5} ms\n" +
+            "6. Aggregation with Scalar UDF:         ${endTime6 - startTime6} ms\n" +
+            "7. Aggregation with Vector UDF:         ${endTime7 - startTime7} ms\n" +
+            "8. Multiple UDFs in query:              ${endTime8 - startTime8} ms (100K rows)\n" +
+            "9. Filter with UDF:                     ${endTime9 - startTime9} ms\n" +
+            "=" * 80)
+        
+        // Calculate throughput
+        def throughput1 = totalRows / ((endTime1 - startTime1) / 1000.0)
+        def throughput4 = totalRows / ((endTime4 - startTime4) / 1000.0)
+        log.info("Scalar UDF throughput: ${String.format('%.2f', throughput1)} rows/sec")
+        log.info("Vector UDF throughput: ${String.format('%.2f', throughput4)} rows/sec")
+        log.info("Vector speedup: ${String.format('%.2f', (endTime1 - startTime1) / (endTime4 - startTime4))}x")
+        
+    } finally {
+        // Cleanup
+        log.info("Cleaning up benchmark resources...")
+        
+        try_sql("DROP FUNCTION IF EXISTS py_multiply(INT, INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_levenshtein(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_extract_domain(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_add(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_strlen(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_is_prime(INT);")
+        
+        try_sql("DROP TABLE IF EXISTS python_udf_benchmark_table;")
+        try_sql("DROP DATABASE IF EXISTS test_pythonudf_benchmark;")
+        log.info("Benchmark cleanup completed.")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_boolean.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_boolean.groovy
new file mode 100644
index 0000000..7d2ba90
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_boolean.groovy

@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_boolean") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_boolean """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_boolean (
+            `user_id`  INT     NOT NULL COMMENT "",
+            `boo_1`    BOOLEAN NOT NULL COMMENT ""
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+
+        sql """ INSERT INTO test_pythonudf_boolean (`user_id`,`boo_1`) VALUES
+                (111,true),
+                (112,false),
+                (113,0),
+                (114,1)
+            """
+        qt_select_default """ SELECT * FROM test_pythonudf_boolean t ORDER BY user_id; """
+
+        File path1 = new File(pyPath)
+        if (!path1.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ CREATE FUNCTION python_udf_boolean_test(BOOLEAN) RETURNS BOOLEAN PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="boolean_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_boolean_test(1)     as result; """
+        qt_select """ SELECT python_udf_boolean_test(0)     as result ; """
+        qt_select """ SELECT python_udf_boolean_test(true)  as result ; """
+        qt_select """ SELECT python_udf_boolean_test(false) as result ; """
+        qt_select """ SELECT python_udf_boolean_test(null)  as result ; """
+        qt_select """ SELECT user_id,python_udf_boolean_test(boo_1) as result FROM test_pythonudf_boolean order by user_id; """
+        
+
+
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS python_udf_boolean_test(BOOLEAN);")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_boolean")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_complex_data_type.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_complex_data_type.groovy
new file mode 100644
index 0000000..b071654
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_complex_data_type.groovy

@@ -0,0 +1,408 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_complex_data_type") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+
+    // TEST ARRAY INLINE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS array_to_csv(
+                ARRAY<INT>,
+                ARRAY<STRING>,
+                ARRAY<ARRAY<INT>>
+            );
+        """
+        sql """
+CREATE FUNCTION array_to_csv(
+    ARRAY<INT>,
+    ARRAY<STRING>,
+    ARRAY<ARRAY<INT>>
+)
+RETURNS STRING
+PROPERTIES (
+    "type" = "PYTHON_UDF",
+    "symbol" = "array_to_csv_impl",
+    "always_nullable" = "true",
+    "runtime_version" = "${runtime_version}"
+)
+AS \$\$
+def array_to_csv_impl(int_arr, str_arr, nested_arr):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(safe_str(item) for item in arr) + ']'
+    
+    def format_nested_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(format_array(inner) for inner in arr) + ']'
+    
+    parts = [
+        format_array(int_arr),
+        format_array(str_arr),
+        format_nested_array(nested_arr)
+    ]
+    return '|'.join(parts)
+\$\$;
+        """
+        sql """ DROP TABLE IF EXISTS test_array_table; """
+        sql """
+            CREATE TABLE test_array_table (
+                id INT,
+                int_array ARRAY<INT>,
+                string_array ARRAY<STRING>,
+                nested_array ARRAY<ARRAY<INT>>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_array_table VALUES
+            (1, [1, 2, 3], ['a', 'b', 'c'], [[1,2], [3,4]]),
+            (2, [], [], []),
+            (3, NULL, ['x', NULL, 'z'], NULL),
+            (4, [0, -1, 2147483647], ['hello', 'world'], [[], [1]]);
+        """
+
+        qt_select_1 """
+            SELECT array_to_csv(int_array, string_array, nested_array) AS result FROM test_array_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS array_to_csv(
+                ARRAY<INT>,
+                ARRAY<STRING>,
+                ARRAY<ARRAY<INT>>
+            );""")
+        try_sql("DROP TABLE IF EXISTS test_array_table;")
+    }
+
+    // TEST ARRAY MODULE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS array_to_csv(
+                ARRAY<INT>,
+                ARRAY<STRING>,
+                ARRAY<ARRAY<INT>>
+            );
+        """
+        sql """
+            CREATE FUNCTION array_to_csv(
+                ARRAY<INT>,
+                ARRAY<STRING>,
+                ARRAY<ARRAY<INT>>
+            )
+            RETURNS STRING
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file"="file://${pyPath}",
+                "symbol" = "python_udf_array_type.array_to_csv_impl",
+                "always_nullable" = "true",
+                "runtime_version" = "${runtime_version}"
+            );
+        """
+        sql """ DROP TABLE IF EXISTS test_array_table; """
+        sql """
+            CREATE TABLE test_array_table (
+                id INT,
+                int_array ARRAY<INT>,
+                string_array ARRAY<STRING>,
+                nested_array ARRAY<ARRAY<INT>>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_array_table VALUES
+            (1, [1, 2, 3], ['a', 'b', 'c'], [[1,2], [3,4]]),
+            (2, [], [], []),
+            (3, NULL, ['x', NULL, 'z'], NULL),
+            (4, [0, -1, 2147483647], ['hello', 'world'], [[], [1]]);
+        """
+
+
+        qt_select_2 """
+            SELECT array_to_csv(int_array, string_array, nested_array) AS result FROM test_array_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS array_to_csv(
+                ARRAY<INT>,
+                ARRAY<STRING>,
+                ARRAY<ARRAY<INT>>
+            );""")
+        try_sql("DROP TABLE IF EXISTS test_array_table;")
+    }
+
+    // TEST MAP INLINE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS map_to_csv(
+                MAP<INT, STRING>,
+                MAP<STRING, DOUBLE>
+            );
+        """
+        sql """
+CREATE FUNCTION map_to_csv(
+    MAP<INT, STRING>,
+    MAP<STRING, DOUBLE>
+)
+RETURNS STRING
+PROPERTIES (
+    "type" = "PYTHON_UDF",
+    "symbol" = "map_to_csv_impl",
+    "always_nullable" = "true",
+    "runtime_version" = "${runtime_version}"
+)
+AS \$\$
+def map_to_csv_impl(map1, map2):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_map(m):
+        if m is None:
+            return 'NULL'
+        # Doris passes MAP as Python dict
+        items = [f"{safe_str(k)}:{safe_str(v)}" for k, v in m.items()]
+        return '{' + ','.join(sorted(items)) + '}'
+    
+    return '|'.join([format_map(map1), format_map(map2)])
+\$\$;
+        """
+        sql """ DROP TABLE IF EXISTS test_map_table; """
+        sql """
+            CREATE TABLE test_map_table (
+                id INT,
+                int_string_map MAP<INT, STRING>,
+                string_double_map MAP<STRING, DOUBLE>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_map_table VALUES
+            (1, {1:'one', 2:'two'}, {'pi':3.14, 'e':2.718}),
+            (2, {}, {}),
+            (3, NULL, {'null_key': NULL}),
+            (4, {0:'zero', -1:'minus_one'}, {'max':1.79769e308});
+        """
+
+        qt_select_3 """
+            SELECT map_to_csv(int_string_map, string_double_map) AS result FROM test_map_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS map_to_csv(
+                    MAP<INT, STRING>,
+                    MAP<STRING, DOUBLE>
+                );""")
+        try_sql("DROP TABLE IF EXISTS test_map_table;")
+    }
+
+    // TEST MAP MODULE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS map_to_csv(
+                MAP<INT, STRING>,
+                MAP<STRING, DOUBLE>
+            );
+        """
+        sql """
+            CREATE FUNCTION map_to_csv(
+                MAP<INT, STRING>,
+                MAP<STRING, DOUBLE>
+            )
+            RETURNS STRING
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file"="file://${pyPath}",
+                "symbol" = "python_udf_map_type.map_to_csv_impl",
+                "always_nullable" = "true",
+                "runtime_version" = "${runtime_version}"
+            );
+        """
+        sql """ DROP TABLE IF EXISTS test_map_table; """
+        sql """
+            CREATE TABLE test_map_table (
+                id INT,
+                int_string_map MAP<INT, STRING>,
+                string_double_map MAP<STRING, DOUBLE>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_map_table VALUES
+            (1, {1:'one', 2:'two'}, {'pi':3.14, 'e':2.718}),
+            (2, {}, {}),
+            (3, NULL, {'null_key': NULL}),
+            (4, {0:'zero', -1:'minus_one'}, {'max':1.79769e308});
+        """
+
+        qt_select_4 """
+            SELECT map_to_csv(int_string_map, string_double_map) AS result FROM test_map_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS map_to_csv(
+                    MAP<INT, STRING>,
+                    MAP<STRING, DOUBLE>
+                );""")
+        try_sql("DROP TABLE IF EXISTS test_map_table;")
+    }
+
+    // TEST STRUCT INLINE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS struct_to_csv(
+                STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+            );
+        """
+        sql """
+CREATE FUNCTION struct_to_csv(
+    STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+    STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+)
+RETURNS STRING
+PROPERTIES (
+    "type" = "PYTHON_UDF",
+    "symbol" = "struct_to_csv_impl",
+    "always_nullable" = "true",
+    "runtime_version" = "${runtime_version}"
+)
+AS \$\$
+def struct_to_csv_impl(person, point):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(safe_str(item) for item in arr) + ']'
+    
+    def format_struct_dict(s, field_names):
+        if s is None:
+            return 'NULL'
+        parts = []
+        for field in field_names:
+            val = s.get(field)
+            parts.append(safe_str(val))
+        return '(' + ','.join(parts) + ')'
+    
+    person_str = format_struct_dict(person, ['name', 'age', 'salary'])
+    
+    if point is None:
+        point_str = 'NULL'
+    else:
+        x_val = safe_str(point.get('x'))
+        y_val = safe_str(point.get('y'))
+        tags_val = format_array(point.get('tags'))
+        point_str = f"({x_val},{y_val},{tags_val})"
+    
+    return '|'.join([person_str, point_str])
+\$\$;
+        """
+        sql """ DROP TABLE IF EXISTS test_struct_table; """
+        sql """
+            CREATE TABLE test_struct_table(
+                id INT,
+                person STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                point STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_struct_table VALUES
+            (1, {'Alice', 30, 75000.50}, {1.5, 2.5, ['red', 'blue']}),
+            (2, {NULL, NULL, NULL}, {0.0, 0.0, []}),
+            (3, {'Bob', 25, 60000.00}, {NULL, 3.14, ['tag1', NULL, 'tag3']}),
+            (4, {'', 0, 0.0}, {-1.0, -2.0, NULL});
+        """
+
+        qt_select_5 """
+            SELECT struct_to_csv(person, point) AS result FROM test_struct_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS struct_to_csv(
+                    STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                    STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+                );""")
+        try_sql("DROP TABLE IF EXISTS test_struct_table;")
+    }
+
+    // TEST STRUCT MODULE CASE
+    try {
+        sql """
+            DROP FUNCTION IF EXISTS struct_to_csv(
+                STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+            );
+        """
+        sql """
+            CREATE FUNCTION struct_to_csv(
+                STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+            )
+            RETURNS STRING
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file"="file://${pyPath}",
+                "symbol" = "python_udf_struct_type.struct_to_csv_impl",
+                "always_nullable" = "true",
+                "runtime_version" = "${runtime_version}"
+            );
+        """
+        sql """ DROP TABLE IF EXISTS test_struct_table; """
+        sql """
+            CREATE TABLE test_struct_table(
+                id INT,
+                person STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                point STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+            ) ENGINE=OLAP
+            DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES("replication_num" = "1");
+        """
+        sql """
+            INSERT INTO test_struct_table VALUES
+            (1, {'Alice', 30, 75000.50}, {1.5, 2.5, ['red', 'blue']}),
+            (2, {NULL, NULL, NULL}, {0.0, 0.0, []}),
+            (3, {'Bob', 25, 60000.00}, {NULL, 3.14, ['tag1', NULL, 'tag3']}),
+            (4, {'', 0, 0.0}, {-1.0, -2.0, NULL});
+        """
+
+        qt_select_6 """
+            SELECT struct_to_csv(person, point) AS result FROM test_struct_table;
+        """
+    } finally {
+        try_sql("""DROP FUNCTION IF EXISTS struct_to_csv(
+                    STRUCT<name: STRING, age: INT, salary: DECIMAL(12,2)>,
+                    STRUCT<x: DOUBLE, y: DOUBLE, tags: ARRAY<STRING>>
+                );""")
+        try_sql("DROP TABLE IF EXISTS test_struct_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_data_types.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_data_types.groovy
new file mode 100644
index 0000000..abbb22f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_data_types.groovy

@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_data_types") {
+    // Test various data types supported by Python UDF
+    def runtime_version = "3.10.12"
+    
+    try {
+        // Test 1: TINYINT type
+        sql """ DROP FUNCTION IF EXISTS py_tinyint_test(TINYINT); """
+        sql """
+        CREATE FUNCTION py_tinyint_test(TINYINT) 
+        RETURNS TINYINT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x + 1
+\$\$;
+        """
+        
+        qt_select_tinyint """ SELECT py_tinyint_test(CAST(10 AS TINYINT)) AS result; """
+        
+        // Test 2: SMALLINT type
+        sql """ DROP FUNCTION IF EXISTS py_smallint_test(SMALLINT); """
+        sql """
+        CREATE FUNCTION py_smallint_test(SMALLINT) 
+        RETURNS SMALLINT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * 2
+\$\$;
+        """
+        
+        qt_select_smallint """ SELECT py_smallint_test(CAST(1000 AS SMALLINT)) AS result; """
+        
+        // Test 3: BIGINT type
+        sql """ DROP FUNCTION IF EXISTS py_bigint_test(BIGINT); """
+        sql """
+        CREATE FUNCTION py_bigint_test(BIGINT) 
+        RETURNS BIGINT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x + 1000000
+\$\$;
+        """
+        
+        qt_select_bigint """ SELECT py_bigint_test(1000000000000) AS result; """
+        
+        // Test 4: DECIMAL type
+        sql """ DROP FUNCTION IF EXISTS py_decimal_test(DECIMAL(10,2)); """
+        sql """
+        CREATE FUNCTION py_decimal_test(DECIMAL(10,2)) 
+        RETURNS DECIMAL(10,2) 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * 1.1
+\$\$;
+        """
+        
+        qt_select_decimal """ SELECT py_decimal_test(100.50) AS result; """
+        
+        // Test 5: DATE type
+        sql """ DROP FUNCTION IF EXISTS py_date_test(DATE); """
+        sql """
+        CREATE FUNCTION py_date_test(DATE) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(d):
+    if d is None:
+        return None
+    return str(d)
+\$\$;
+        """
+        
+        qt_select_date """ SELECT py_date_test('2024-01-15') AS result; """
+        
+        // Test 6: DATETIME type
+        sql """ DROP FUNCTION IF EXISTS py_datetime_test(DATETIME); """
+        sql """
+        CREATE FUNCTION py_datetime_test(DATETIME) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(dt):
+    if dt is None:
+        return None
+    return str(dt)
+\$\$;
+        """
+        
+        qt_select_datetime """ SELECT py_datetime_test('2024-01-15 10:30:45') AS result; """
+        
+        // Test 7: Comprehensive test - create table and test multiple data types
+        sql """ DROP TABLE IF EXISTS data_types_test_table; """
+        sql """
+        CREATE TABLE data_types_test_table (
+            id INT,
+            tiny_val TINYINT,
+            small_val SMALLINT,
+            int_val INT,
+            big_val BIGINT,
+            float_val FLOAT,
+            double_val DOUBLE,
+            decimal_val DECIMAL(10,2),
+            string_val STRING,
+            bool_val BOOLEAN
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO data_types_test_table VALUES
+        (1, 10, 100, 1000, 10000, 1.5, 2.5, 100.50, 'test1', true),
+        (2, 20, 200, 2000, 20000, 2.5, 3.5, 200.75, 'test2', false),
+        (3, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+        """
+        
+        qt_select_table_types """ 
+        SELECT 
+            id,
+            py_tinyint_test(tiny_val) AS tiny_result,
+            py_smallint_test(small_val) AS small_result,
+            py_bigint_test(big_val) AS big_result
+        FROM data_types_test_table
+        ORDER BY id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_tinyint_test(TINYINT);")
+        try_sql("DROP FUNCTION IF EXISTS py_smallint_test(SMALLINT);")
+        try_sql("DROP FUNCTION IF EXISTS py_bigint_test(BIGINT);")
+        try_sql("DROP FUNCTION IF EXISTS py_decimal_test(DECIMAL(10,2));")
+        try_sql("DROP FUNCTION IF EXISTS py_date_test(DATE);")
+        try_sql("DROP FUNCTION IF EXISTS py_datetime_test(DATETIME);")
+        try_sql("DROP TABLE IF EXISTS data_types_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_error_handling.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_error_handling.groovy
new file mode 100644
index 0000000..1871d81
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_error_handling.groovy

@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_error_handling") {
+    // Test error handling and exception cases for Python UDF
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: Division by zero error handling
+        sql """ DROP FUNCTION IF EXISTS py_safe_divide(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_safe_divide(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(a, b):
+    if a is None or b is None:
+        return None
+    if b == 0:
+        return None
+    return a / b
+\$\$;
+        """
+        
+        qt_select_divide_normal """ SELECT py_safe_divide(10.0, 2.0) AS result; """
+        qt_select_divide_zero """ SELECT py_safe_divide(10.0, 0.0) AS result; """
+        qt_select_divide_null """ SELECT py_safe_divide(10.0, NULL) AS result; """
+        
+        // Test 2: String index out of bounds handling
+        sql """ DROP FUNCTION IF EXISTS py_safe_substring(STRING, INT); """
+        sql """
+        CREATE FUNCTION py_safe_substring(STRING, INT) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s, index):
+    if s is None or index is None:
+        return None
+    if index < 0 or index >= len(s):
+        return None
+    return s[index]
+\$\$;
+        """
+        
+        qt_select_substring_valid """ SELECT py_safe_substring('hello', 1) AS result; """
+        qt_select_substring_invalid """ SELECT py_safe_substring('hello', 10) AS result; """
+        qt_select_substring_negative """ SELECT py_safe_substring('hello', -1) AS result; """
+        
+        // Test 3: Type conversion error handling
+        sql """ DROP FUNCTION IF EXISTS py_safe_int_parse(STRING); """
+        sql """
+        CREATE FUNCTION py_safe_int_parse(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None:
+        return None
+    try:
+        return int(s)
+    except (ValueError, TypeError):
+        return None
+\$\$;
+        """
+        
+        qt_select_parse_valid """ SELECT py_safe_int_parse('123') AS result; """
+        qt_select_parse_invalid """ SELECT py_safe_int_parse('abc') AS result; """
+        qt_select_parse_empty """ SELECT py_safe_int_parse('') AS result; """
+        
+        // Test 4: Array out of bounds handling
+        sql """ DROP FUNCTION IF EXISTS py_safe_array_get(ARRAY<INT>, INT); """
+        sql """
+        CREATE FUNCTION py_safe_array_get(ARRAY<INT>, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(arr, index):
+    if arr is None or index is None:
+        return None
+    if index < 0 or index >= len(arr):
+        return None
+    return arr[index]
+\$\$;
+        """
+        
+        qt_select_array_valid """ SELECT py_safe_array_get([10, 20, 30], 1) AS result; """
+        qt_select_array_invalid """ SELECT py_safe_array_get([10, 20, 30], 5) AS result; """
+        
+        // Test 5: Test error handling on table data
+        sql """ DROP TABLE IF EXISTS error_handling_test_table; """
+        sql """
+        CREATE TABLE error_handling_test_table (
+            id INT,
+            numerator DOUBLE,
+            denominator DOUBLE,
+            text STRING,
+            arr_index INT
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO error_handling_test_table VALUES
+        (1, 100.0, 10.0, '123', 0),
+        (2, 50.0, 0.0, 'abc', 1),
+        (3, NULL, 5.0, '', 2),
+        (4, 75.0, NULL, '456', -1),
+        (5, 25.0, 5.0, 'xyz', 10);
+        """
+        
+        qt_select_table_error_handling """ 
+        SELECT 
+            id,
+            numerator,
+            denominator,
+            py_safe_divide(numerator, denominator) AS divide_result,
+            text,
+            py_safe_int_parse(text) AS parse_result
+        FROM error_handling_test_table
+        ORDER BY id;
+        """
+        
+        // Test 6: Empty string handling
+        sql """ DROP FUNCTION IF EXISTS py_safe_length(STRING); """
+        sql """
+        CREATE FUNCTION py_safe_length(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None:
+        return None
+    return len(s)
+\$\$;
+        """
+        
+        qt_select_length_normal """ SELECT py_safe_length('hello') AS result; """
+        qt_select_length_empty """ SELECT py_safe_length('') AS result; """
+        qt_select_length_null """ SELECT py_safe_length(NULL) AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_safe_divide(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_safe_substring(STRING, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_safe_int_parse(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_safe_array_get(ARRAY<INT>, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_safe_length(STRING);")
+        try_sql("DROP TABLE IF EXISTS error_handling_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_file_protocol.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_file_protocol.groovy
new file mode 100644
index 0000000..d4f8f21
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_file_protocol.groovy

@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_file_protocol") {
+    // Test loading Python UDF from zip package using file:// protocol
+    
+    def zipPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(zipPath)
+    def runtime_version = "3.10.12"
+    log.info("Python zip path: ${zipPath}".toString())
+    
+    try {
+        // Test 1: Load int_test.py from zip package using file:// protocol
+        sql """ DROP FUNCTION IF EXISTS py_file_int_add(INT); """
+        sql """
+        CREATE FUNCTION py_file_int_add(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "int_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        );
+        """
+        
+        qt_select_file_int """ SELECT py_file_int_add(99) AS result; """
+        
+        // Test 2: Load string_test.py from zip package using file:// protocol
+        sql """ DROP FUNCTION IF EXISTS py_file_string_mask(STRING, INT, INT); """
+        sql """
+        CREATE FUNCTION py_file_string_mask(STRING, INT, INT) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "string_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        );
+        """
+        
+        qt_select_file_string """ SELECT py_file_string_mask('1234567890', 3, 3) AS result; """
+        
+        // Test 3: Load float_test.py from zip package using file:// protocol
+        sql """ DROP FUNCTION IF EXISTS py_file_float_process(FLOAT); """
+        sql """
+        CREATE FUNCTION py_file_float_process(FLOAT) 
+        RETURNS FLOAT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "float_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        );
+        """
+        
+        qt_select_file_float """ SELECT py_file_float_process(3.14) AS result; """
+        
+        // Test 4: Load boolean_test.py from zip package using file:// protocol
+        sql """ DROP FUNCTION IF EXISTS py_file_bool_not(BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_file_bool_not(BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "boolean_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        );
+        """
+        
+        qt_select_file_bool_true """ SELECT py_file_bool_not(true) AS result; """
+        qt_select_file_bool_false """ SELECT py_file_bool_not(false) AS result; """
+        
+        // Test 5: Test UDF with file:// protocol on table data
+        sql """ DROP TABLE IF EXISTS file_protocol_test_table; """
+        sql """
+        CREATE TABLE file_protocol_test_table (
+            id INT,
+            num INT,
+            text STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO file_protocol_test_table VALUES
+        (1, 10, 'hello'),
+        (2, 20, 'world'),
+        (3, 30, 'python'),
+        (4, 40, 'doris');
+        """
+        
+        qt_select_table_file """ 
+        SELECT 
+            id,
+            num,
+            py_file_int_add(num) AS num_result,
+            text,
+            py_file_string_mask(text, 1, 1) AS text_result
+        FROM file_protocol_test_table
+        ORDER BY id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_file_int_add(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_file_string_mask(STRING, INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_file_float_process(FLOAT);")
+        try_sql("DROP FUNCTION IF EXISTS py_file_bool_not(BOOLEAN);")
+        try_sql("DROP TABLE IF EXISTS file_protocol_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_float.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_float.groovy
new file mode 100644
index 0000000..5ac06aa
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_float.groovy

@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_float") {
+    def pyPath   = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.12.0"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_float """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_float (
+            `user_id`  INT    NOT NULL COMMENT "",
+            `float_1`  FLOAT  NOT NULL COMMENT "",
+            `float_2`  FLOAT           COMMENT "",
+            `double_1` DOUBLE NOT NULL COMMENT "",
+            `double_2` DOUBLE          COMMENT ""
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+        
+        
+        sql """ INSERT INTO test_pythonudf_float (`user_id`,`float_1`,`float_2`,double_1,double_2) VALUES
+                (111,11111.11111,222222.3333333,12345678.34455677,1111111.999999999999),
+                (112,1234556.11111,222222.3333333,222222222.3333333333333,4444444444444.555555555555),
+                (113,87654321.11111,null,6666666666.6666666666,null)
+            """
+        qt_select_default """ SELECT * FROM test_pythonudf_float t ORDER BY user_id; """
+
+        File path = new File(pyPath)
+        if (!path.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_float_test(FLOAT,FLOAT) """
+
+        sql """ CREATE FUNCTION python_udf_float_test(FLOAT,FLOAT) RETURNS FLOAT PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="float_test.evaluate",
+            "type"="PYTHON_UDF",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        ); """
+
+        qt_select """ SELECT python_udf_float_test(cast(2.83645 as float),cast(111.1111111 as float)) as result; """
+        qt_select """ SELECT python_udf_float_test(2.83645,111.1111111) as result ; """
+        qt_select """ SELECT python_udf_float_test(2.83645,null) as result ; """
+        qt_select """ SELECT python_udf_float_test(cast(2.83645 as float),null) as result ; """
+        qt_select """ SELECT user_id,python_udf_float_test(float_1, float_2) as sum FROM test_pythonudf_float order by user_id; """
+        createMV("create materialized view udf_mv as SELECT user_id as a1,python_udf_float_test(float_1, float_2) as sum FROM test_pythonudf_float order by user_id;")
+        qt_select """ SELECT user_id,python_udf_float_test(float_1, float_2) as sum FROM test_pythonudf_float order by user_id; """
+
+        explain {
+            sql("SELECT user_id,python_udf_float_test(float_1, float_2) as sum FROM test_pythonudf_float order by user_id; ")
+            contains "(udf_mv)"
+        }
+
+        sql """ CREATE FUNCTION python_udf_double_test(DOUBLE,DOUBLE) RETURNS DOUBLE PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="double_test.evaluate",
+            "type"="PYTHON_UDF",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        ); """
+
+        qt_select """ SELECT python_udf_double_test(cast(2.83645 as DOUBLE),cast(111.1111111 as DOUBLE)) as result; """
+        qt_select """ SELECT python_udf_double_test(2.83645,111.1111111) as result ; """
+        qt_select """ SELECT python_udf_double_test(2.83645,null) as result ; """
+        qt_select """ SELECT python_udf_double_test(cast(2.83645 as DOUBLE),null) as result ; """
+        qt_select """ SELECT user_id,python_udf_double_test(double_1, double_1) as sum FROM test_pythonudf_float order by user_id; """
+
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS python_udf_double_test(DOUBLE,DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_float_test(FLOAT,FLOAT);")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_float")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_global_function.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_global_function.groovy
new file mode 100644
index 0000000..099ca46
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_global_function.groovy

@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_global_function") {
+    // Test creating global Python UDF with GLOBAL keyword
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: Create GLOBAL function
+        sql """ DROP GLOBAL FUNCTION IF EXISTS py_global_multiply(INT, INT); """
+        sql """
+        CREATE GLOBAL FUNCTION py_global_multiply(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(a, b):
+    if a is None or b is None:
+        return None
+    return a * b
+\$\$;
+        """
+        
+        qt_select_global_multiply """ SELECT py_global_multiply(7, 8) AS result; """
+        
+        // Test 2: Create GLOBAL string function
+        sql """ DROP GLOBAL FUNCTION IF EXISTS py_global_lower(STRING); """
+        sql """
+        CREATE GLOBAL FUNCTION py_global_lower(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None:
+        return None
+    return s.lower()
+\$\$;
+        """
+        
+        qt_select_global_lower """ SELECT py_global_lower('HELLO WORLD') AS result; """
+        
+        // Test 3: Create regular (non-GLOBAL) function for comparison
+        sql """ DROP FUNCTION IF EXISTS py_local_add(INT, INT); """
+        sql """
+        CREATE FUNCTION py_local_add(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(a, b):
+    if a is None or b is None:
+        return None
+    return a + b
+\$\$;
+        """
+        
+        qt_select_local_add """ SELECT py_local_add(15, 25) AS result; """
+        
+        // Test 4: Test GLOBAL function on table data
+        sql """ DROP TABLE IF EXISTS global_function_test_table; """
+        sql """
+        CREATE TABLE global_function_test_table (
+            id INT,
+            val1 INT,
+            val2 INT,
+            text STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO global_function_test_table VALUES
+        (1, 5, 6, 'APPLE'),
+        (2, 10, 20, 'BANANA'),
+        (3, 3, 7, 'CHERRY'),
+        (4, NULL, 5, 'DATE'),
+        (5, 8, 9, NULL);
+        """
+        
+        qt_select_table_global """ 
+        SELECT 
+            id,
+            val1,
+            val2,
+            py_global_multiply(val1, val2) AS multiply_result,
+            text,
+            py_global_lower(text) AS lower_result
+        FROM global_function_test_table
+        ORDER BY id;
+        """
+        
+        // Test 5: Mathematical calculation with GLOBAL function
+        sql """ DROP GLOBAL FUNCTION IF EXISTS py_global_power(DOUBLE, DOUBLE); """
+        sql """
+        CREATE GLOBAL FUNCTION py_global_power(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(base, exponent):
+    if base is None or exponent is None:
+        return None
+    return base ** exponent
+\$\$;
+        """
+        
+        qt_select_global_power """ SELECT py_global_power(2.0, 3.0) AS result; """
+        qt_select_global_power_decimal """ SELECT py_global_power(5.0, 0.5) AS result; """
+        
+    } finally {
+        try_sql("DROP GLOBAL FUNCTION IF EXISTS py_global_multiply(INT, INT);")
+        try_sql("DROP GLOBAL FUNCTION IF EXISTS py_global_lower(STRING);")
+        try_sql("DROP GLOBAL FUNCTION IF EXISTS py_global_power(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_local_add(INT, INT);")
+        try_sql("DROP TABLE IF EXISTS global_function_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_inline_complex.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_complex.groovy
new file mode 100644
index 0000000..0caf6c8
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_complex.groovy

@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_inline_complex") {
+    // Test complex Python UDF using Inline mode
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: Array processing
+        sql """ DROP FUNCTION IF EXISTS py_array_sum(ARRAY<INT>); """
+        sql """
+        CREATE FUNCTION py_array_sum(ARRAY<INT>) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(arr):
+    if arr is None:
+        return None
+    return sum(arr)
+\$\$;
+        """
+        
+        qt_select_array_sum """ SELECT py_array_sum([1, 2, 3, 4, 5]) AS result; """
+        
+        // Test 2: String processing - reverse
+        sql """ DROP FUNCTION IF EXISTS py_reverse_string(STRING); """
+        sql """
+        CREATE FUNCTION py_reverse_string(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None:
+        return None
+    return s[::-1]
+\$\$;
+        """
+        
+        qt_select_reverse """ SELECT py_reverse_string('Hello') AS result; """
+        
+        // Test 3: Multi-parameter complex calculation
+        sql """ DROP FUNCTION IF EXISTS py_weighted_avg(DOUBLE, DOUBLE, DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_weighted_avg(DOUBLE, DOUBLE, DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(val1, weight1, val2, weight2):
+    if any(x is None for x in [val1, weight1, val2, weight2]):
+        return None
+    total_weight = weight1 + weight2
+    if total_weight == 0:
+        return None
+    return (val1 * weight1 + val2 * weight2) / total_weight
+\$\$;
+        """
+        
+        qt_select_weighted_avg """ SELECT py_weighted_avg(80.0, 0.6, 90.0, 0.4) AS result; """
+        
+        // Test 4: String formatting
+        sql """ DROP FUNCTION IF EXISTS py_format_name(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_format_name(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(first_name, last_name):
+    if first_name is None or last_name is None:
+        return None
+    return f"{last_name.upper()}, {first_name.capitalize()}"
+\$\$;
+        """
+        
+        qt_select_format_name """ SELECT py_format_name('john', 'doe') AS result; """
+        
+        // Test 5: Numeric range validation
+        sql """ DROP FUNCTION IF EXISTS py_in_range(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_in_range(INT, INT, INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(value, min_val, max_val):
+    if any(x is None for x in [value, min_val, max_val]):
+        return None
+    return min_val <= value <= max_val
+\$\$;
+        """
+        
+        qt_select_in_range_true """ SELECT py_in_range(50, 0, 100) AS result; """
+        qt_select_in_range_false """ SELECT py_in_range(150, 0, 100) AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_array_sum(ARRAY<INT>);")
+        try_sql("DROP FUNCTION IF EXISTS py_reverse_string(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_weighted_avg(DOUBLE, DOUBLE, DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_format_name(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_in_range(INT, INT, INT);")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_inline_priority.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_priority.groovy
new file mode 100644
index 0000000..5693f7b
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_priority.groovy

@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_inline_priority") {
+    // Test that inline code has higher priority when both file and inline code are specified
+    
+    // Disabled temporarily
+    return
+    
+    def zipPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(zipPath)
+    def runtime_version = "3.10.12"
+    log.info("Python zip path: ${zipPath}".toString())
+    
+    try {
+        // Test 1: Specify both file and inline code, verify inline code takes priority
+        // Function in int_test.py returns arg + 1
+        // But inline code returns arg * 10
+        sql """ DROP FUNCTION IF EXISTS py_priority_test(INT); """
+        sql """
+        CREATE FUNCTION py_priority_test(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "int_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(arg):
+    # inline code: returns arg * 10
+    if arg is None:
+        return None
+    return arg * 10
+\$\$;
+        """
+        
+        // If using code from file, result should be 6 (5 + 1)
+        // If using inline code, result should be 50 (5 * 10)
+        qt_select_priority_inline """ SELECT py_priority_test(5) AS result; """
+        
+        // Test 2: Another priority test - string processing
+        sql """ DROP FUNCTION IF EXISTS py_priority_string_test(STRING); """
+        sql """
+        CREATE FUNCTION py_priority_string_test(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s):
+    # inline code: returns reversed string
+    if s is None:
+        return None
+    return s[::-1]
+\$\$;
+        """
+        
+        // inline code should return reversed string
+        qt_select_priority_string """ SELECT py_priority_string_test('hello') AS result; """
+        
+        // Test 3: Verify priority on table data
+        sql """ DROP TABLE IF EXISTS priority_test_table; """
+        sql """
+        CREATE TABLE priority_test_table (
+            id INT,
+            num INT
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO priority_test_table VALUES
+        (1, 1),
+        (2, 2),
+        (3, 3),
+        (4, 4),
+        (5, 5);
+        """
+        
+        // Verify inline code priority: should return num * 10
+        qt_select_table_priority """ 
+        SELECT 
+            id,
+            num,
+            py_priority_test(num) AS result
+        FROM priority_test_table
+        ORDER BY id;
+        """
+        
+        // Test 4: Only file parameter, no inline code
+        sql """ DROP FUNCTION IF EXISTS py_file_only_test(INT); """
+        sql """
+        CREATE FUNCTION py_file_only_test(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "int_test.evaluate",
+            "runtime_version" = "${runtime_version}"
+        );
+        """
+        
+        // Should use code from file: returns arg + 1
+        qt_select_file_only """ SELECT py_file_only_test(5) AS result; """
+        
+        // Test 5: Only inline code, no file parameter
+        sql """ DROP FUNCTION IF EXISTS py_inline_only_test(INT); """
+        sql """
+        CREATE FUNCTION py_inline_only_test(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(arg):
+    if arg is None:
+        return None
+    return arg * 100
+\$\$;
+        """
+        
+        // Should use inline code: returns arg * 100
+        qt_select_inline_only """ SELECT py_inline_only_test(5) AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_priority_test(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_priority_string_test(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_file_only_test(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_inline_only_test(INT);")
+        try_sql("DROP TABLE IF EXISTS priority_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_inline_scalar.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_scalar.groovy
new file mode 100644
index 0000000..1ab003a
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_scalar.groovy

@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_inline_basic") {
+    // Test basic Python UDF using Inline mode
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: Simple integer addition
+        sql """ DROP FUNCTION IF EXISTS py_add(INT, INT); """
+        sql """
+        CREATE FUNCTION py_add(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(a, b):
+    return a + b
+\$\$;
+        """
+        
+        qt_select_add """ SELECT py_add(10, 20) AS result; """
+        qt_select_add_null """ SELECT py_add(NULL, 20) AS result; """
+        
+        // Test 2: String concatenation
+        sql """ DROP FUNCTION IF EXISTS py_concat(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_concat(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s1, s2):
+    if s1 is None or s2 is None:
+        return None
+    return s1 + s2
+\$\$;
+        """
+        
+        qt_select_concat """ SELECT py_concat('Hello', ' World') AS result; """
+        qt_select_concat_null """ SELECT py_concat('Hello', NULL) AS result; """
+        
+        // Test 3: Mathematical operations
+        sql """ DROP FUNCTION IF EXISTS py_square(DOUBLE); """
+        sql """
+        CREATE FUNCTION py_square(DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * x
+\$\$;
+        """
+        
+        qt_select_square """ SELECT py_square(5.0) AS result; """
+        qt_select_square_negative """ SELECT py_square(-3.0) AS result; """
+        
+        // Test 4: Conditional logic
+        sql """ DROP FUNCTION IF EXISTS py_is_positive(INT); """
+        sql """
+        CREATE FUNCTION py_is_positive(INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(num):
+    if num is None:
+        return None
+    return num > 0
+\$\$;
+        """
+        
+        qt_select_positive """ SELECT py_is_positive(10) AS result; """
+        qt_select_negative """ SELECT py_is_positive(-5) AS result; """
+        qt_select_zero """ SELECT py_is_positive(0) AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_add(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_concat(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_square(DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_is_positive(INT);")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_inline_vector.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_vector.groovy
new file mode 100644
index 0000000..f321191
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_inline_vector.groovy

@@ -0,0 +1,409 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_inline_vector") {
+    // Test vectorized Python UDF using Inline mode with pandas.Series
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Create test table
+        sql """ DROP TABLE IF EXISTS vector_udf_test_table; """
+        sql """
+        CREATE TABLE vector_udf_test_table (
+            id INT,
+            int_col1 INT,
+            int_col2 INT,
+            double_col1 DOUBLE,
+            double_col2 DOUBLE,
+            string_col1 STRING,
+            string_col2 STRING,
+            bool_col BOOLEAN
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO vector_udf_test_table VALUES
+        (1, 10, 20, 1.5, 2.5, 'hello', 'world', true),
+        (2, 30, 40, 3.5, 4.5, 'foo', 'bar', false),
+        (3, NULL, 50, 5.5, NULL, NULL, 'test', true),
+        (4, 60, NULL, NULL, 6.5, 'data', NULL, false),
+        (5, 70, 80, 7.5, 8.5, 'python', 'udf', true);
+        """
+        
+        // Test 1: Vector INT addition with pandas.Series
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_int(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_add_int(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "add",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def add(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a + b + 1
+\$\$;
+        """
+        
+        qt_vec_add_int """ 
+        SELECT 
+            id,
+            int_col1,
+            int_col2,
+            py_vec_add_int(int_col1, int_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 2: Vector DOUBLE multiplication with pandas.Series
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply_double(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_multiply_double(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "multiply",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def multiply(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a * b
+\$\$;
+        """
+        
+        qt_vec_multiply_double """ 
+        SELECT 
+            id,
+            double_col1,
+            double_col2,
+            py_vec_multiply_double(double_col1, double_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 3: Vector STRING concatenation with pandas.Series
+        sql """ DROP FUNCTION IF EXISTS py_vec_concat_string(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_vec_concat_string(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "concat",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def concat(s1: pd.Series, s2: pd.Series) -> pd.Series:
+    return s1 + '_' + s2
+\$\$;
+        """
+        
+        qt_vec_concat_string """ 
+        SELECT 
+            id,
+            string_col1,
+            string_col2,
+            py_vec_concat_string(string_col1, string_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 4: Vector INT with conditional logic using pandas.Series
+        sql """ DROP FUNCTION IF EXISTS py_vec_max_int(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_max_int(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "get_max",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+import numpy as np
+
+def get_max(a: pd.Series, b: pd.Series) -> pd.Series:
+    return pd.Series(np.maximum(a, b))
+\$\$;
+        """
+        
+        qt_vec_max_int """ 
+        SELECT 
+            id,
+            int_col1,
+            int_col2,
+            py_vec_max_int(int_col1, int_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 5: Vector DOUBLE with mathematical operations
+        sql """ DROP FUNCTION IF EXISTS py_vec_sqrt_double(DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_sqrt_double(DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "sqrt",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+import numpy as np
+
+def sqrt(x: pd.Series) -> pd.Series:
+    return np.sqrt(x)
+\$\$;
+        """
+        
+        qt_vec_sqrt_double """ 
+        SELECT 
+            id,
+            double_col1,
+            py_vec_sqrt_double(double_col1) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 6: Vector STRING with upper case transformation
+        sql """ DROP FUNCTION IF EXISTS py_vec_upper_string(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_upper_string(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "to_upper",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def to_upper(s: pd.Series) -> pd.Series:
+    return s.str.upper()
+\$\$;
+        """
+        
+        qt_vec_upper_string """ 
+        SELECT 
+            id,
+            string_col1,
+            py_vec_upper_string(string_col1) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 7: Vector INT with complex calculation
+        sql """ DROP FUNCTION IF EXISTS py_vec_weighted_sum(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_weighted_sum(INT, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "weighted_sum",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def weighted_sum(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a * 0.3 + b * 0.7
+\$\$;
+        """
+        
+        qt_vec_weighted_sum """ 
+        SELECT 
+            id,
+            int_col1,
+            int_col2,
+            py_vec_weighted_sum(int_col1, int_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 8: Vector BOOLEAN operations
+        sql """ DROP FUNCTION IF EXISTS py_vec_not_bool(BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_vec_not_bool(BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "negate",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def negate(b: pd.Series) -> pd.Series:
+    return ~b
+\$\$;
+        """
+        
+        qt_vec_not_bool """ 
+        SELECT 
+            id,
+            bool_col,
+            py_vec_not_bool(bool_col) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 9: Vector INT comparison returning BOOLEAN
+        sql """ DROP FUNCTION IF EXISTS py_vec_greater_than(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_greater_than(INT, INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "greater",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def greater(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a > b
+\$\$;
+        """
+        
+        qt_vec_greater_than """ 
+        SELECT 
+            id,
+            int_col1,
+            int_col2,
+            py_vec_greater_than(int_col1, int_col2) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 10: Vector STRING length calculation
+        sql """ DROP FUNCTION IF EXISTS py_vec_string_length(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_string_length(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "str_len",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def str_len(s: pd.Series) -> pd.Series:
+    return s.str.len()
+\$\$;
+        """
+        
+        qt_vec_string_length """ 
+        SELECT 
+            id,
+            string_col1,
+            py_vec_string_length(string_col1) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 11: Vector with NULL handling using fillna
+        sql """ DROP FUNCTION IF EXISTS py_vec_fill_null_int(INT); """
+        sql """
+        CREATE FUNCTION py_vec_fill_null_int(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "fill_null",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def fill_null(x: pd.Series) -> pd.Series:
+    return x.fillna(0)
+\$\$;
+        """
+        
+        qt_vec_fill_null_int """ 
+        SELECT 
+            id,
+            int_col1,
+            py_vec_fill_null_int(int_col1) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+        // Test 12: Vector with aggregation-like operation (cumulative sum)
+        sql """ DROP FUNCTION IF EXISTS py_vec_cumsum_int(INT); """
+        sql """
+        CREATE FUNCTION py_vec_cumsum_int(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "cumsum",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def cumsum(x: pd.Series) -> pd.Series:
+    return x.cumsum()
+\$\$;
+        """
+        
+        qt_vec_cumsum_int """ 
+        SELECT 
+            id,
+            int_col1,
+            py_vec_cumsum_int(int_col1) AS result
+        FROM vector_udf_test_table
+        ORDER BY id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_vec_add_int(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_multiply_double(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_concat_string(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_max_int(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_sqrt_double(DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_upper_string(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_weighted_sum(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_not_bool(BOOLEAN);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_greater_than(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_string_length(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_fill_null_int(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_cumsum_int(INT);")
+        try_sql("DROP TABLE IF EXISTS vector_udf_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_int.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_int.groovy
new file mode 100644
index 0000000..2a3906c
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_int.groovy

@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_int") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_int """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_int (
+            `user_id`      INT      NOT NULL COMMENT "",
+            `tinyint_col`  TINYINT  NOT NULL COMMENT "",
+            `smallint_col` SMALLINT NOT NULL COMMENT "",
+            `bigint_col`   BIGINT   NOT NULL COMMENT ""
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+        StringBuilder sb = new StringBuilder()
+        int i = 1
+        for (; i < 10; i++) {
+            sb.append("""
+                (${i},${i}*2,${i}*3,${i}*4),
+            """)
+        }
+        sb.append("""
+                (${i},${i}*2,${i}*3,${i}*4)
+            """)
+        sql """ INSERT INTO test_pythonudf_int VALUES
+             ${sb.toString()}
+            """
+        qt_select_default """ SELECT * FROM test_pythonudf_int t ORDER BY user_id; """
+
+        File path = new File(pyPath)
+        if (!path.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ DROP FUNCTION IF EXISTS python_udf_int_test(int) """
+
+        sql """ CREATE FUNCTION python_udf_int_test(int) RETURNS int PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_int_test(user_id) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select """ SELECT python_udf_int_test(null) result ; """
+
+
+        sql """ CREATE FUNCTION python_udf_tinyint_test(tinyint) RETURNS tinyint PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_tinyint_test(tinyint_col) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select """ SELECT python_udf_tinyint_test(null) result ; """
+
+
+        sql """ CREATE FUNCTION python_udf_smallint_test(smallint) RETURNS smallint PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_smallint_test(smallint_col) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select """ SELECT python_udf_smallint_test(null) result ; """
+
+
+        sql """ CREATE FUNCTION python_udf_bigint_test(bigint) RETURNS bigint PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_bigint_test(bigint_col) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select """ SELECT python_udf_bigint_test(null) result ; """
+
+        sql """ CREATE GLOBAL FUNCTION python_udf_int_test_global(int) RETURNS int PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select_global_1 """ SELECT python_udf_int_test_global(user_id) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select_global_2 """ SELECT python_udf_int_test_global(null) result ; """
+        qt_select_global_3 """ SELECT python_udf_int_test_global(3) result FROM test_pythonudf_int ORDER BY result; """
+        qt_select_global_4 """ SELECT abs(python_udf_int_test_global(3)) result FROM test_pythonudf_int ORDER BY result; """
+
+    } finally {
+        try_sql("DROP GLOBAL FUNCTION IF EXISTS python_udf_int_test_global(int);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_tinyint_test(tinyint);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_smallint_test(smallint);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_bigint_test(bigint);")
+        try_sql("DROP FUNCTION IF EXISTS python_udf_int_test(int);")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_int")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_map.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_map.groovy
new file mode 100644
index 0000000..58be735
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_map.groovy

@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_map") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        try_sql("DROP FUNCTION IF EXISTS udfii(Map<INT, INT>);")
+        try_sql("DROP FUNCTION IF EXISTS udfss(Map<String, String>);")
+        try_sql("DROP TABLE IF EXISTS map_ii")
+        try_sql("DROP TABLE IF EXISTS map_ss")
+        sql """
+            CREATE TABLE IF NOT EXISTS map_ii (
+                        `id` INT(11) NULL COMMENT "",
+                        `m` Map<INT, INT> NULL COMMENT ""
+                        ) ENGINE=OLAP
+                        DUPLICATE KEY(`id`)
+                        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+                        PROPERTIES (
+                        "replication_allocation" = "tag.location.default: 1",
+                        "storage_format" = "V2"
+            );
+        """
+                 sql """  """
+        sql """ INSERT INTO map_ii VALUES(1, {1:1,10:1,100:1}); """
+        sql """ INSERT INTO map_ii VALUES(2, {2:1,20:1,200:1,2000:1});   """
+        sql """ INSERT INTO map_ii VALUES(3, {3:1}); """
+        sql """ DROP FUNCTION IF EXISTS udfii(Map<INT, INT>); """
+        sql """ CREATE FUNCTION udfii(Map<INT, INT>) RETURNS INT PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="map_int_int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+
+        qt_select_1 """ select m,udfii(m) from map_ii order by id; """
+
+        sql """ CREATE TABLE IF NOT EXISTS map_ss (
+              `id` INT(11) NULL COMMENT "",
+              `m` Map<String, String> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2"
+        ); """
+        sql """ INSERT INTO map_ss VALUES(1, {"114":"514","1919":"810"});         """
+        sql """ INSERT INTO map_ss VALUES(2, {"a":"bc","def":"g","hij":"k"});   """
+        sql """ DROP FUNCTION IF EXISTS udfss(Map<String, String>); """
+
+        sql """ CREATE FUNCTION udfss(Map<String, String>) RETURNS STRING PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="map_string_string_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select_2 """ select m,udfss(m) from map_ss order by id; """
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS udfii(Map<INT, INT>);")
+        try_sql("DROP FUNCTION IF EXISTS udfss(Map<String, String>);")
+        try_sql("DROP TABLE IF EXISTS map_ii")
+        try_sql("DROP TABLE IF EXISTS map_ss")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_mixed_params.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_mixed_params.groovy
new file mode 100644
index 0000000..a9e6f6f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_mixed_params.groovy

@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_mixed_params") {
+    // Test vectorized Python UDF with mixed parameter types (pd.Series + scalar)
+    // This tests the scenario where some parameters are vectorized (pd.Series) 
+    // and some are scalar values (int, float, str)
+    // 
+    // Key concept: In vectorized UDF, you can mix:
+    //   - pd.Series parameters (process entire column)
+    //   - scalar parameters (single value like int, float, str)
+    
+    def runtime_version = "3.10.12"
+    
+    try {
+        // Create test table
+        sql """ DROP TABLE IF EXISTS test_mixed_params_table; """
+        sql """
+        CREATE TABLE test_mixed_params_table (
+            id INT,
+            price DOUBLE,
+            quantity INT,
+            discount_rate DOUBLE,
+            category STRING
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        // Insert test data
+        sql """
+        INSERT INTO test_mixed_params_table VALUES
+        (1, 100.0, 5, 0.1, 'A'),
+        (2, 200.0, 3, 0.15, 'B'),
+        (3, 150.0, 8, 0.2, 'A'),
+        (4, 300.0, 2, 0.05, 'C'),
+        (5, 250.0, 6, 0.12, 'B'),
+        (6, 180.0, 4, 0.18, 'A'),
+        (7, 220.0, 7, 0.08, 'C'),
+        (8, 120.0, 9, 0.25, 'B'),
+        (9, 280.0, 1, 0.1, 'A'),
+        (10, 350.0, 5, 0.15, 'C');
+        """
+        
+        sql "sync"
+        
+        // ==================== Test 1: pd.Series + scalar float ====================
+        log.info("=== Test 1: pd.Series + scalar float ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply_constant(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_multiply_constant(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_multiply_constant",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_multiply_constant(values: pd.Series, multiplier: float) -> pd.Series:
+    # values: pd.Series (vectorized column data)
+    # multiplier: float (scalar constant)
+    return values * multiplier
+\$\$;
+        """
+        
+        qt_select_1 """
+        SELECT 
+            id,
+            price,
+            py_vec_multiply_constant(price, 1.5) AS price_multiplied
+        FROM test_mixed_params_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 2: Multiple pd.Series + scalar float ====================
+        log.info("=== Test 2: Multiple pd.Series + scalar float ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_calc_total(DOUBLE, INT, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_calc_total(DOUBLE, INT, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_calc_total",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_calc_total(price: pd.Series, quantity: pd.Series, tax_rate: float) -> pd.Series:
+    # price: pd.Series (vectorized)
+    # quantity: pd.Series (vectorized)
+    # tax_rate: float (scalar constant)
+    subtotal = price * quantity
+    return subtotal * (1 + tax_rate)
+\$\$;
+        """
+        
+        qt_select_2 """
+        SELECT 
+            id,
+            price,
+            quantity,
+            py_vec_calc_total(price, quantity, 0.1) AS total_with_tax
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 3: Two pd.Series (both vectorized) ====================
+        log.info("=== Test 3: Two pd.Series (both vectorized) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_apply_discount(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_apply_discount(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_apply_discount",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_apply_discount(price: pd.Series, discount_rate: pd.Series) -> pd.Series:
+    # Both are pd.Series (vectorized)
+    # Each row has its own discount rate from the column
+    return price * (1 - discount_rate)
+\$\$;
+        """
+        
+        qt_select_3 """
+        SELECT 
+            id,
+            price,
+            discount_rate,
+            py_vec_apply_discount(price, discount_rate) AS final_price
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 4: Complex Mixed Parameters (3 Series + 1 scalar) ====================
+        log.info("=== Test 4: Complex calculation with mixed params (3 Series + 1 scalar) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_complex_calc(DOUBLE, INT, DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_complex_calc(DOUBLE, INT, DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_complex_calc",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_complex_calc(price: pd.Series, quantity: pd.Series, discount_rate: pd.Series, shipping_fee: float) -> pd.Series:
+    # price: pd.Series (vectorized)
+    # quantity: pd.Series (vectorized)
+    # discount_rate: pd.Series (vectorized, per-row discount)
+    # shipping_fee: float (scalar constant)
+    
+    # Calculate: (price * quantity) * (1 - discount) + shipping_fee
+    subtotal = price * quantity
+    after_discount = subtotal * (1 - discount_rate)
+    return after_discount + shipping_fee
+\$\$;
+        """
+        
+        qt_select_4 """
+        SELECT 
+            id,
+            price,
+            quantity,
+            discount_rate,
+            py_vec_complex_calc(price, quantity, discount_rate, 10.0) AS final_total
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 5: String pd.Series + scalar str ====================
+        log.info("=== Test 5: String pd.Series + scalar str ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_prefix(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_vec_add_prefix(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_add_prefix",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_add_prefix(categories: pd.Series, prefix: str) -> pd.Series:
+    # categories: pd.Series (vectorized string column)
+    # prefix: str (scalar constant)
+    return prefix + '_' + categories
+\$\$;
+        """
+        
+        qt_select_5 """
+        SELECT 
+            id,
+            category,
+            py_vec_add_prefix(category, 'CAT') AS prefixed_category
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 6: pd.Series + scalar int ====================
+        log.info("=== Test 6: pd.Series + scalar int ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_int(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_add_int(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_add_int",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_add_int(quantities: pd.Series, bonus: int) -> pd.Series:
+    # quantities: pd.Series (vectorized int column)
+    # bonus: int (scalar constant)
+    return quantities + bonus
+\$\$;
+        """
+        
+        qt_select_6 """
+        SELECT 
+            id,
+            quantity,
+            py_vec_add_int(quantity, 10) AS quantity_with_bonus
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 7: Conditional Logic with Mixed Params ====================
+        log.info("=== Test 7: Conditional logic with mixed params (2 Series + 1 scalar) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_conditional_discount(DOUBLE, DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_conditional_discount(DOUBLE, DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_conditional_discount",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+import numpy as np
+
+def py_vec_conditional_discount(price: pd.Series, discount_rate: pd.Series, threshold: float) -> pd.Series:
+    # price: pd.Series (vectorized)
+    # discount_rate: pd.Series (vectorized)
+    # threshold: float (scalar constant - minimum price for discount)
+    
+    # Apply discount only if price >= threshold
+    result = np.where(price >= threshold, 
+                     price * (1 - discount_rate),
+                     price)
+    return pd.Series(result)
+\$\$;
+        """
+        
+        qt_select_7 """
+        SELECT 
+            id,
+            price,
+            discount_rate,
+            py_vec_conditional_discount(price, discount_rate, 200.0) AS final_price
+        FROM test_mixed_params_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 8: Scalar first, then Series ====================
+        log.info("=== Test 8: Scalar parameter first, then Series ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_scale_and_add(DOUBLE, DOUBLE, INT); """
+        sql """
+        CREATE FUNCTION py_vec_scale_and_add(DOUBLE, DOUBLE, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_scale_and_add",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_scale_and_add(scale_factor: float, prices: pd.Series, quantities: pd.Series) -> pd.Series:
+    # scale_factor: float (scalar constant)
+    # prices: pd.Series (vectorized)
+    # quantities: pd.Series (vectorized)
+    return (prices * quantities) * scale_factor
+\$\$;
+        """
+        
+        qt_select_8 """
+        SELECT 
+            id,
+            price,
+            quantity,
+            py_vec_scale_and_add(1.2, price, quantity) AS scaled_total
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 3;
+        """
+        
+        // ==================== Test 9: Alternating Series and Scalar ====================
+        log.info("=== Test 9: Alternating Series and scalar parameters ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_alternating(DOUBLE, DOUBLE, INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_alternating(DOUBLE, DOUBLE, INT, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_alternating",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_alternating(prices: pd.Series, markup: float, quantities: pd.Series, min_qty: int) -> pd.Series:
+    # prices: pd.Series (vectorized)
+    # markup: float (scalar constant)
+    # quantities: pd.Series (vectorized)
+    # min_qty: int (scalar constant)
+    
+    import numpy as np
+    # Apply markup only if quantity >= min_qty
+    result = np.where(quantities >= min_qty,
+                     prices * (1 + markup),
+                     prices)
+    return pd.Series(result)
+\$\$;
+        """
+        
+        qt_select_9 """
+        SELECT 
+            id,
+            price,
+            quantity,
+            py_vec_alternating(price, 0.2, quantity, 5) AS conditional_price
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 5;
+        """
+        
+        // ==================== Test 10: Multiple scalars with one Series ====================
+        log.info("=== Test 10: Multiple scalar parameters with one Series ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_multi_scalar(DOUBLE, DOUBLE, DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_multi_scalar(DOUBLE, DOUBLE, DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_multi_scalar",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_multi_scalar(prices: pd.Series, tax: float, discount: float, fee: float) -> pd.Series:
+    # prices: pd.Series (vectorized)
+    # tax: float (scalar constant)
+    # discount: float (scalar constant)
+    # fee: float (scalar constant)
+    
+    # Calculate: (price * (1 - discount)) * (1 + tax) + fee
+    after_discount = prices * (1 - discount)
+    with_tax = after_discount * (1 + tax)
+    return with_tax + fee
+\$\$;
+        """
+        
+        qt_select_10 """
+        SELECT 
+            id,
+            price,
+            py_vec_multi_scalar(price, 0.1, 0.05, 5.0) AS final_price
+        FROM test_mixed_params_table
+        ORDER BY id
+        LIMIT 3;
+        """
+        
+        log.info("All mixed parameter tests passed!")
+        
+    } finally {
+        // Cleanup
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply_constant(DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_calc_total(DOUBLE, INT, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_apply_discount(DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_complex_calc(DOUBLE, INT, DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_prefix(STRING, STRING); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_int(INT, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_conditional_discount(DOUBLE, DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_scale_and_add(DOUBLE, DOUBLE, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_alternating(DOUBLE, DOUBLE, INT, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_multi_scalar(DOUBLE, DOUBLE, DOUBLE, DOUBLE); """
+        sql """ DROP TABLE IF EXISTS test_mixed_params_table; """
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_module.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_module.groovy
new file mode 100644
index 0000000..a22fd88
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_module.groovy

@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_module") {
+    def pyPath = """${context.file.parent}/udf_scripts/python_udf_module_test.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.12.0"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP FUNCTION IF EXISTS python_udf_ltv_score(BIGINT, BIGINT, DOUBLE); """
+        sql """
+        CREATE FUNCTION python_udf_ltv_score(BIGINT, BIGINT, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file"="file://${pyPath}",
+            "symbol" = "python_udf_module_test.main.safe_ltv",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        sql """ DROP TABLE IF EXISTS user_behavior_test; """
+        sql """
+        CREATE TABLE user_behavior_test (
+            user_id BIGINT,
+            days_since_last_action BIGINT,
+            total_actions BIGINT,
+            total_spend DOUBLE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(user_id)
+        DISTRIBUTED BY HASH(user_id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        sql """
+        INSERT INTO user_behavior_test VALUES
+        (1001, 5,   10,  500.0),  
+        (1002, 40,  1,   20.0),   
+        (1003, 15,  5,   300.0),  
+        (1004, -1,  3,   100.0),  
+        (1005, NULL, 2,  200.0),  
+        (1006, 7,   NULL, 150.0), 
+        (1007, 30,  0,   NULL),   
+        (1008, 0,   100, 5000.0),
+        (1009, 100, 2,   10.0),   
+        (1010, 8,   8,   800.0);  
+        """
+
+        qt_select """ SELECT 
+            user_id,
+            days_since_last_action,
+            total_actions,
+            total_spend,
+            python_udf_ltv_score(days_since_last_action, total_actions, total_spend) AS ltv_score
+        FROM user_behavior_test
+        ORDER BY user_id; """
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS python_udf_ltv_score(BIGINT, BIGINT, DOUBLE);")
+        try_sql("DROP TABLE IF EXISTS user_behavior_test;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_module_advanced.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_module_advanced.groovy
new file mode 100644
index 0000000..77001bf
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_module_advanced.groovy

@@ -0,0 +1,180 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_module_advanced") {
+    // Test advanced Python UDF features using Module mode
+    
+    def zipPath = """${context.file.parent}/udf_scripts/python_udf_module_test.zip"""
+    scp_udf_file_to_all_be(zipPath)
+    def runtime_version = "3.12.0"
+    log.info("Python Zip path: ${zipPath}".toString())
+    
+    try {
+        // Test 1: Use different module paths in zip package
+        sql """ DROP FUNCTION IF EXISTS py_module_ltv(BIGINT, BIGINT, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_module_ltv(BIGINT, BIGINT, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${zipPath}",
+            "symbol" = "python_udf_module_test.main.safe_ltv",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_select_module_ltv_normal """ SELECT py_module_ltv(10, 100, 5000.0) AS result; """
+        qt_select_module_ltv_null """ SELECT py_module_ltv(NULL, 100, 5000.0) AS result; """
+        qt_select_module_ltv_zero """ SELECT py_module_ltv(0, 0, 5000.0) AS result; """
+        
+        // Test 2: Use Module UDF in complex queries
+        sql """ DROP TABLE IF EXISTS customer_analytics; """
+        sql """
+        CREATE TABLE customer_analytics (
+            customer_id BIGINT,
+            days_inactive BIGINT,
+            total_orders BIGINT,
+            total_revenue DOUBLE,
+            customer_segment STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(customer_id)
+        DISTRIBUTED BY HASH(customer_id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO customer_analytics VALUES
+        (1001, 5, 50, 10000.0, 'Premium'),
+        (1002, 30, 10, 2000.0, 'Regular'),
+        (1003, 60, 5, 500.0, 'Inactive'),
+        (1004, 2, 100, 25000.0, 'VIP'),
+        (1005, 15, 25, 5000.0, 'Regular'),
+        (1006, NULL, 30, 6000.0, 'Regular'),
+        (1007, 10, NULL, 3000.0, 'Regular'),
+        (1008, 45, 8, NULL, 'Inactive'),
+        (1009, 0, 200, 50000.0, 'VIP'),
+        (1010, 90, 2, 100.0, 'Churned');
+        """
+        
+        qt_select_customer_analytics """ 
+        SELECT 
+            customer_id,
+            customer_segment,
+            days_inactive,
+            total_orders,
+            total_revenue,
+            py_module_ltv(days_inactive, total_orders, total_revenue) AS ltv_score
+        FROM customer_analytics
+        ORDER BY customer_id;
+        """
+        
+        // Test 3: Use Module UDF for group aggregation
+        qt_select_segment_analysis """ 
+        SELECT 
+            customer_segment,
+            COUNT(*) AS customer_count,
+            AVG(total_revenue) AS avg_revenue,
+            AVG(py_module_ltv(days_inactive, total_orders, total_revenue)) AS avg_ltv_score
+        FROM customer_analytics
+        GROUP BY customer_segment
+        ORDER BY customer_segment;
+        """
+        
+        // Test 4: Use Module UDF for filtering
+        qt_select_high_value_customers """ 
+        SELECT 
+            customer_id,
+            customer_segment,
+            total_revenue,
+            py_module_ltv(days_inactive, total_orders, total_revenue) AS ltv_score
+        FROM customer_analytics
+        WHERE py_module_ltv(days_inactive, total_orders, total_revenue) > 100
+        ORDER BY ltv_score DESC;
+        """
+        
+        // Test 5: Use Module UDF for sorting
+        qt_select_sorted_by_ltv """ 
+        SELECT 
+            customer_id,
+            customer_segment,
+            py_module_ltv(days_inactive, total_orders, total_revenue) AS ltv_score
+        FROM customer_analytics
+        ORDER BY py_module_ltv(days_inactive, total_orders, total_revenue) DESC
+        LIMIT 5;
+        """
+        
+        // Test 6: Use Module UDF with multiple conditions
+        qt_select_complex_query """ 
+        SELECT 
+            customer_id,
+            customer_segment,
+            days_inactive,
+            total_orders,
+            total_revenue,
+            py_module_ltv(days_inactive, total_orders, total_revenue) AS ltv_score,
+            CASE 
+                WHEN py_module_ltv(days_inactive, total_orders, total_revenue) > 200 THEN 'High Value'
+                WHEN py_module_ltv(days_inactive, total_orders, total_revenue) > 100 THEN 'Medium Value'
+                WHEN py_module_ltv(days_inactive, total_orders, total_revenue) IS NOT NULL THEN 'Low Value'
+                ELSE 'Unknown'
+            END AS value_category
+        FROM customer_analytics
+        ORDER BY ltv_score DESC;
+        """
+        
+        // Test 7: Use Module UDF with JOIN operations
+        sql """ DROP TABLE IF EXISTS customer_info; """
+        sql """
+        CREATE TABLE customer_info (
+            customer_id BIGINT,
+            customer_name STRING,
+            registration_date DATE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(customer_id)
+        DISTRIBUTED BY HASH(customer_id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO customer_info VALUES
+        (1001, 'Alice Johnson', '2023-01-15'),
+        (1002, 'Bob Smith', '2023-03-20'),
+        (1003, 'Charlie Brown', '2022-11-10'),
+        (1004, 'Diana Prince', '2023-05-01'),
+        (1005, 'Eve Wilson', '2023-02-14');
+        """
+        
+        qt_select_join_with_module_udf """ 
+        SELECT 
+            ci.customer_id,
+            ci.customer_name,
+            ca.customer_segment,
+            ca.total_revenue,
+            py_module_ltv(ca.days_inactive, ca.total_orders, ca.total_revenue) AS ltv_score
+        FROM customer_info ci
+        JOIN customer_analytics ca ON ci.customer_id = ca.customer_id
+        WHERE py_module_ltv(ca.days_inactive, ca.total_orders, ca.total_revenue) IS NOT NULL
+        ORDER BY ltv_score DESC;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_module_ltv(BIGINT, BIGINT, DOUBLE);")
+        try_sql("DROP TABLE IF EXISTS customer_analytics;")
+        try_sql("DROP TABLE IF EXISTS customer_info;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_module_scalar.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_module_scalar.groovy
new file mode 100644
index 0000000..af0a43f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_module_scalar.groovy

@@ -0,0 +1,818 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_module_scalar") {
+    // Comprehensive test for scalar Python UDF using module mode
+    
+    def pyPath = """${context.file.parent}/udf_scripts/python_udf_scalar_ops.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    
+    log.info("Python module path: ${pyPath}".toString())
+    
+    try {
+        // Create test table with diverse data types
+        sql """ DROP TABLE IF EXISTS scalar_module_test_table; """
+        sql """
+        CREATE TABLE scalar_module_test_table (
+            id INT,
+            int_a INT,
+            int_b INT,
+            int_c INT,
+            double_a DOUBLE,
+            double_b DOUBLE,
+            string_a STRING,
+            string_b STRING,
+            bool_a BOOLEAN,
+            bool_b BOOLEAN,
+            date_a DATE,
+            date_b DATE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO scalar_module_test_table VALUES
+        (1, 10, 20, 30, 100.0, 10.0, 'hello world', 'test@example.com', true, true, '2024-01-15', '2024-01-20'),
+        (2, 5, 15, 25, 200.0, 20.0, 'foo bar baz', 'user@domain.com', false, true, '2024-02-10', '2024-03-15'),
+        (3, 100, 50, 25, 150.0, 0.0, 'racecar', 'admin@test.org', true, false, '2023-12-01', '2024-01-01'),
+        (4, 7, 3, 11, 80.0, 5.0, 'a man a plan a canal panama', 'info@company.net', false, false, '2024-06-15', '2024-06-15'),
+        (5, 17, 19, 23, 300.0, 15.0, 'python udf test', 'contact@site.io', true, true, '2024-03-01', '2024-12-31');
+        """
+        
+        // ==================== Numeric Operations Tests ====================
+        
+        // Test 1: Add three numbers
+        sql """ DROP FUNCTION IF EXISTS py_add_three(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_add_three(INT, INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.add_three_numbers",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_add_three """ 
+        SELECT 
+            id,
+            int_a, int_b, int_c,
+            py_add_three(int_a, int_b, int_c) AS result
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 2: Safe division with precision
+        sql """ DROP FUNCTION IF EXISTS py_safe_div(DOUBLE, DOUBLE, INT); """
+        sql """
+        CREATE FUNCTION py_safe_div(DOUBLE, DOUBLE, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.safe_divide_with_precision",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_safe_div """ 
+        SELECT 
+            id,
+            double_a, double_b,
+            py_safe_div(double_a, double_b, 2) AS result
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 3: Calculate discount price
+        sql """ DROP FUNCTION IF EXISTS py_discount(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_discount(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.calculate_discount_price",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_discount """ 
+        SELECT 
+            id,
+            double_a,
+            py_discount(double_a, 10.0) AS price_10_off,
+            py_discount(double_a, 25.0) AS price_25_off
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 4: Compound interest
+        sql """ DROP FUNCTION IF EXISTS py_compound_interest(DOUBLE, DOUBLE, INT); """
+        sql """
+        CREATE FUNCTION py_compound_interest(DOUBLE, DOUBLE, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.compound_interest",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_compound_interest """ 
+        SELECT 
+            id,
+            double_a,
+            py_compound_interest(double_a, 5.0, 10) AS future_value
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 5: Calculate BMI
+        sql """ DROP FUNCTION IF EXISTS py_bmi(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_bmi(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.calculate_bmi",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_bmi """ 
+        SELECT 
+            id,
+            py_bmi(70.0, 1.75) AS bmi_normal,
+            py_bmi(90.0, 1.75) AS bmi_overweight
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 6: Fibonacci number
+        sql """ DROP FUNCTION IF EXISTS py_fibonacci(INT); """
+        sql """
+        CREATE FUNCTION py_fibonacci(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.fibonacci",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_fibonacci """ 
+        SELECT 
+            id,
+            int_a,
+            py_fibonacci(int_a) AS fib_result
+        FROM scalar_module_test_table
+        WHERE int_a <= 20
+        ORDER BY id;
+        """
+        
+        // Test 7: Is prime number
+        sql """ DROP FUNCTION IF EXISTS py_is_prime(INT); """
+        sql """
+        CREATE FUNCTION py_is_prime(INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.is_prime",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_is_prime """ 
+        SELECT 
+            id,
+            int_a, int_b, int_c,
+            py_is_prime(int_a) AS a_is_prime,
+            py_is_prime(int_b) AS b_is_prime,
+            py_is_prime(int_c) AS c_is_prime
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 8: GCD (Greatest Common Divisor)
+        sql """ DROP FUNCTION IF EXISTS py_gcd(INT, INT); """
+        sql """
+        CREATE FUNCTION py_gcd(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.gcd",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_gcd """ 
+        SELECT 
+            id,
+            int_a, int_b,
+            py_gcd(int_a, int_b) AS gcd_result
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 9: LCM (Least Common Multiple)
+        sql """ DROP FUNCTION IF EXISTS py_lcm(INT, INT); """
+        sql """
+        CREATE FUNCTION py_lcm(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.lcm",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_lcm """ 
+        SELECT 
+            id,
+            int_a, int_b,
+            py_lcm(int_a, int_b) AS lcm_result
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // ==================== String Operations Tests ====================
+        
+        // Test 10: Reverse string
+        sql """ DROP FUNCTION IF EXISTS py_reverse(STRING); """
+        sql """
+        CREATE FUNCTION py_reverse(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.reverse_string",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_reverse """ 
+        SELECT 
+            id,
+            string_a,
+            py_reverse(string_a) AS reversed
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 11: Count vowels
+        sql """ DROP FUNCTION IF EXISTS py_count_vowels(STRING); """
+        sql """
+        CREATE FUNCTION py_count_vowels(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.count_vowels",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_count_vowels """ 
+        SELECT 
+            id,
+            string_a,
+            py_count_vowels(string_a) AS vowel_count
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 12: Count words
+        sql """ DROP FUNCTION IF EXISTS py_count_words(STRING); """
+        sql """
+        CREATE FUNCTION py_count_words(STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.count_words",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_count_words """ 
+        SELECT 
+            id,
+            string_a,
+            py_count_words(string_a) AS word_count
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 13: Capitalize words
+        sql """ DROP FUNCTION IF EXISTS py_capitalize(STRING); """
+        sql """
+        CREATE FUNCTION py_capitalize(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.capitalize_words",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_capitalize """ 
+        SELECT 
+            id,
+            string_a,
+            py_capitalize(string_a) AS capitalized
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 14: Is palindrome
+        sql """ DROP FUNCTION IF EXISTS py_is_palindrome(STRING); """
+        sql """
+        CREATE FUNCTION py_is_palindrome(STRING) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.is_palindrome",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_is_palindrome """ 
+        SELECT 
+            id,
+            string_a,
+            py_is_palindrome(string_a) AS is_palindrome
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 15: String similarity
+        sql """ DROP FUNCTION IF EXISTS py_similarity(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_similarity(STRING, STRING) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.string_similarity",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_similarity """ 
+        SELECT 
+            id,
+            string_a,
+            py_similarity(string_a, 'hello') AS similarity_to_hello
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 16: Mask email
+        sql """ DROP FUNCTION IF EXISTS py_mask_email(STRING); """
+        sql """
+        CREATE FUNCTION py_mask_email(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.mask_email",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_mask_email """ 
+        SELECT 
+            id,
+            string_b,
+            py_mask_email(string_b) AS masked_email
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 17: Extract domain from email
+        sql """ DROP FUNCTION IF EXISTS py_extract_domain(STRING); """
+        sql """
+        CREATE FUNCTION py_extract_domain(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.extract_domain",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_extract_domain """ 
+        SELECT 
+            id,
+            string_b,
+            py_extract_domain(string_b) AS domain
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 18: Levenshtein distance
+        sql """ DROP FUNCTION IF EXISTS py_levenshtein(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_levenshtein(STRING, STRING) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.levenshtein_distance",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_levenshtein """ 
+        SELECT 
+            id,
+            string_a,
+            py_levenshtein(string_a, 'hello world') AS edit_distance
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // ==================== Date/Time Operations Tests ====================
+        
+        // Test 19: Days between dates
+        sql """ DROP FUNCTION IF EXISTS py_days_between(DATE, DATE); """
+        sql """
+        CREATE FUNCTION py_days_between(DATE, DATE) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.days_between_dates",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_days_between """ 
+        SELECT 
+            id,
+            date_a, date_b,
+            py_days_between(date_a, date_b) AS days_diff
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 20: Is weekend
+        sql """ DROP FUNCTION IF EXISTS py_is_weekend(DATE); """
+        sql """
+        CREATE FUNCTION py_is_weekend(DATE) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.is_weekend",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_is_weekend """ 
+        SELECT 
+            id,
+            date_a,
+            py_is_weekend(date_a) AS is_weekend
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 21: Get quarter
+        sql """ DROP FUNCTION IF EXISTS py_get_quarter(DATE); """
+        sql """
+        CREATE FUNCTION py_get_quarter(DATE) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.get_quarter",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_get_quarter """ 
+        SELECT 
+            id,
+            date_a,
+            py_get_quarter(date_a) AS quarter
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 22: Age in years
+        sql """ DROP FUNCTION IF EXISTS py_age(DATE, DATE); """
+        sql """
+        CREATE FUNCTION py_age(DATE, DATE) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.age_in_years",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_age """ 
+        SELECT 
+            id,
+            py_age('1990-01-01', date_a) AS age
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // ==================== Boolean/Conditional Operations Tests ====================
+        
+        // Test 23: Is in range
+        sql """ DROP FUNCTION IF EXISTS py_in_range(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_in_range(INT, INT, INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.is_in_range",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_in_range """ 
+        SELECT 
+            id,
+            int_a,
+            py_in_range(int_a, 10, 50) AS in_range_10_50
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 24: XOR operation
+        sql """ DROP FUNCTION IF EXISTS py_xor(BOOLEAN, BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_xor(BOOLEAN, BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.xor_operation",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_xor """ 
+        SELECT 
+            id,
+            bool_a, bool_b,
+            py_xor(bool_a, bool_b) AS xor_result
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // ==================== Complex/Mixed Operations Tests ====================
+        
+        // Test 25: Calculate grade
+        sql """ DROP FUNCTION IF EXISTS py_grade(DOUBLE); """
+        sql """
+        CREATE FUNCTION py_grade(DOUBLE) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.calculate_grade",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_grade """ 
+        SELECT 
+            id,
+            double_a,
+            py_grade(double_a) AS grade
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 26: Categorize age
+        sql """ DROP FUNCTION IF EXISTS py_categorize_age(INT); """
+        sql """
+        CREATE FUNCTION py_categorize_age(INT) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.categorize_age",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_categorize_age """ 
+        SELECT 
+            id,
+            int_a,
+            py_categorize_age(int_a) AS age_category
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 27: Calculate tax
+        sql """ DROP FUNCTION IF EXISTS py_tax(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_tax(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.calculate_tax",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_tax """ 
+        SELECT 
+            id,
+            double_a,
+            py_tax(double_a, 15.0) AS tax_15_percent
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 28: Truncate string with suffix
+        sql """ DROP FUNCTION IF EXISTS py_truncate(STRING, INT, STRING); """
+        sql """
+        CREATE FUNCTION py_truncate(STRING, INT, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_scalar_ops.truncate_string",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_truncate """ 
+        SELECT 
+            id,
+            string_a,
+            py_truncate(string_a, 10, '...') AS truncated
+        FROM scalar_module_test_table
+        ORDER BY id;
+        """
+        
+        // ==================== Edge Cases and NULL Handling Tests ====================
+        
+        // Test 29: NULL handling in numeric operations
+        sql """ DROP TABLE IF EXISTS null_test_table; """
+        sql """
+        CREATE TABLE null_test_table (
+            id INT,
+            val1 INT,
+            val2 INT,
+            val3 INT
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO null_test_table VALUES
+        (1, 10, 20, 30),
+        (2, NULL, 20, 30),
+        (3, 10, NULL, 30),
+        (4, 10, 20, NULL),
+        (5, NULL, NULL, NULL);
+        """
+        
+        qt_null_handling """ 
+        SELECT 
+            id,
+            val1, val2, val3,
+            py_add_three(val1, val2, val3) AS sum_result
+        FROM null_test_table
+        ORDER BY id;
+        """
+        
+        // Test 30: Empty string handling
+        sql """ DROP TABLE IF EXISTS string_edge_test; """
+        sql """
+        CREATE TABLE string_edge_test (
+            id INT,
+            str_val STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO string_edge_test VALUES
+        (1, 'normal string'),
+        (2, ''),
+        (3, '   '),
+        (4, 'a'),
+        (5, NULL);
+        """
+        
+        qt_string_edge """ 
+        SELECT 
+            id,
+            str_val,
+            py_reverse(str_val) AS reversed,
+            py_count_vowels(str_val) AS vowels,
+            py_count_words(str_val) AS words
+        FROM string_edge_test
+        ORDER BY id;
+        """
+        
+    } finally {
+        // Cleanup all functions
+        try_sql("DROP FUNCTION IF EXISTS py_add_three(INT, INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_safe_div(DOUBLE, DOUBLE, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_discount(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_compound_interest(DOUBLE, DOUBLE, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_bmi(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_fibonacci(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_is_prime(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_gcd(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_lcm(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_reverse(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_count_vowels(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_count_words(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_capitalize(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_is_palindrome(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_similarity(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_mask_email(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_extract_domain(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_levenshtein(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_days_between(DATE, DATE);")
+        try_sql("DROP FUNCTION IF EXISTS py_is_weekend(DATE);")
+        try_sql("DROP FUNCTION IF EXISTS py_get_quarter(DATE);")
+        try_sql("DROP FUNCTION IF EXISTS py_age(DATE, DATE);")
+        try_sql("DROP FUNCTION IF EXISTS py_in_range(INT, INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_xor(BOOLEAN, BOOLEAN);")
+        try_sql("DROP FUNCTION IF EXISTS py_grade(DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_categorize_age(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_tax(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_truncate(STRING, INT, STRING);")
+        
+        // Cleanup tables
+        try_sql("DROP TABLE IF EXISTS scalar_module_test_table;")
+        try_sql("DROP TABLE IF EXISTS null_test_table;")
+        try_sql("DROP TABLE IF EXISTS string_edge_test;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_module_vector.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_module_vector.groovy
new file mode 100644
index 0000000..c39e92c
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_module_vector.groovy

@@ -0,0 +1,429 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_module_vector") {
+    // Test vectorized Python UDF using module mode with pandas.Series
+    
+    def pyPath = """${context.file.parent}/udf_scripts/python_udf_vector_ops.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    
+    log.info("Python module path: ${pyPath}".toString())
+    
+    try {
+        // Create test table
+        sql """ DROP TABLE IF EXISTS vector_module_test_table; """
+        sql """
+        CREATE TABLE vector_module_test_table (
+            id INT,
+            int_a INT,
+            int_b INT,
+            double_a DOUBLE,
+            double_b DOUBLE,
+            string_a STRING,
+            string_b STRING,
+            bool_a BOOLEAN,
+            bool_b BOOLEAN
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO vector_module_test_table VALUES
+        (1, 10, 20, 1.5, 2.5, 'hello world', 'python udf', true, true),
+        (2, 30, 15, 3.5, 4.5, 'foo bar', 'test case', false, true),
+        (3, 50, 50, 5.5, 2.0, 'data science', 'machine learning', true, false),
+        (4, 5, 25, 7.5, 1.5, 'apache doris', 'database system', false, false),
+        (5, 100, 10, 9.5, 3.5, 'vector operations', 'pandas series', true, true);
+        """
+        
+        // Test 1: Vector addition with constant
+        sql """ DROP FUNCTION IF EXISTS py_vec_add_const(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_add_const(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_add_with_constant",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_add_const """ 
+        SELECT 
+            id,
+            int_a,
+            int_b,
+            py_vec_add_const(int_a, int_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 2: Vector multiplication and rounding
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply_round(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_multiply_round(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_multiply_and_round",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_multiply_round """ 
+        SELECT 
+            id,
+            double_a,
+            double_b,
+            py_vec_multiply_round(double_a, double_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 3: Vector string concatenation with separator
+        sql """ DROP FUNCTION IF EXISTS py_vec_concat_sep(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_vec_concat_sep(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_string_concat_with_separator",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_concat_sep """ 
+        SELECT 
+            id,
+            string_a,
+            string_b,
+            py_vec_concat_sep(string_a, string_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 4: Vector string title case
+        sql """ DROP FUNCTION IF EXISTS py_vec_title_case(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_title_case(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_string_title_case",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_title_case """ 
+        SELECT 
+            id,
+            string_a,
+            py_vec_title_case(string_a) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 5: Vector conditional value (max of two values)
+        sql """ DROP FUNCTION IF EXISTS py_vec_conditional(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_conditional(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_conditional_value",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_conditional """ 
+        SELECT 
+            id,
+            int_a,
+            int_b,
+            py_vec_conditional(int_a, int_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 6: Vector percentage calculation
+        sql """ DROP FUNCTION IF EXISTS py_vec_percentage(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_percentage(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_percentage_calculation",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_percentage """ 
+        SELECT 
+            id,
+            double_a,
+            double_b,
+            py_vec_percentage(double_a, double_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 7: Vector range check
+        sql """ DROP FUNCTION IF EXISTS py_vec_in_range(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_in_range(INT, INT, INT) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_is_in_range",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_in_range """ 
+        SELECT 
+            id,
+            int_a,
+            py_vec_in_range(int_a, 10, 50) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 8: Vector safe division
+        sql """ DROP FUNCTION IF EXISTS py_vec_safe_div(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_safe_div(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_safe_divide",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_safe_div """ 
+        SELECT 
+            id,
+            double_a,
+            double_b,
+            py_vec_safe_div(double_a, double_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 9: Vector exponential decay
+        sql """ DROP FUNCTION IF EXISTS py_vec_exp_decay(DOUBLE, INT); """
+        sql """
+        CREATE FUNCTION py_vec_exp_decay(DOUBLE, INT) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_exponential_decay",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_exp_decay """ 
+        SELECT 
+            id,
+            double_a,
+            int_a,
+            py_vec_exp_decay(double_a, int_a) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 10: Vector string extract first word
+        sql """ DROP FUNCTION IF EXISTS py_vec_first_word(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_first_word(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_string_extract_first_word",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_first_word """ 
+        SELECT 
+            id,
+            string_a,
+            py_vec_first_word(string_a) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 11: Vector absolute difference
+        sql """ DROP FUNCTION IF EXISTS py_vec_abs_diff(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_abs_diff(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_abs_difference",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_abs_diff """ 
+        SELECT 
+            id,
+            int_a,
+            int_b,
+            py_vec_abs_diff(int_a, int_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 12: Vector power operation
+        sql """ DROP FUNCTION IF EXISTS py_vec_power(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_power(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_power",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_power """ 
+        SELECT 
+            id,
+            double_a,
+            py_vec_power(double_a, 2.0) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 13: Vector boolean AND operation
+        sql """ DROP FUNCTION IF EXISTS py_vec_bool_and(BOOLEAN, BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_vec_bool_and(BOOLEAN, BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_boolean_and",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_bool_and """ 
+        SELECT 
+            id,
+            bool_a,
+            bool_b,
+            py_vec_bool_and(bool_a, bool_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 14: Vector boolean OR operation
+        sql """ DROP FUNCTION IF EXISTS py_vec_bool_or(BOOLEAN, BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_vec_bool_or(BOOLEAN, BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_boolean_or",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_bool_or """ 
+        SELECT 
+            id,
+            bool_a,
+            bool_b,
+            py_vec_bool_or(bool_a, bool_b) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+        // Test 15: Vector clip values
+        sql """ DROP FUNCTION IF EXISTS py_vec_clip(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_clip(INT, INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${pyPath}",
+            "symbol" = "python_udf_vector_ops.vec_clip_values",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        qt_vec_clip """ 
+        SELECT 
+            id,
+            int_a,
+            py_vec_clip(int_a, 20, 60) AS result
+        FROM vector_module_test_table
+        ORDER BY id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_vec_add_const(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_multiply_round(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_concat_sep(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_title_case(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_conditional(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_percentage(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_in_range(INT, INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_safe_div(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_exp_decay(DOUBLE, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_first_word(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_abs_diff(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_power(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_bool_and(BOOLEAN, BOOLEAN);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_bool_or(BOOLEAN, BOOLEAN);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_clip(INT, INT, INT);")
+        try_sql("DROP TABLE IF EXISTS vector_module_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_multiline_inline.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_multiline_inline.groovy
new file mode 100644
index 0000000..ff17fe2
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_multiline_inline.groovy

@@ -0,0 +1,211 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_multiline_inline") {
+    // Test complex multi-line inline Python code
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Test 1: Inline code with helper functions
+        sql """ DROP FUNCTION IF EXISTS py_complex_calculation(INT, INT); """
+        sql """
+        CREATE FUNCTION py_complex_calculation(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def helper_function(x):
+    return x * x
+
+def evaluate(a, b):
+    if a is None or b is None:
+        return None
+    result = helper_function(a) + helper_function(b)
+    return result
+\$\$;
+        """
+        
+        qt_select_complex_calc """ SELECT py_complex_calculation(3, 4) AS result; """
+        
+        // Test 2: Complex function with conditional logic
+        sql """ DROP FUNCTION IF EXISTS py_business_logic(STRING, DOUBLE, INT); """
+        sql """
+        CREATE FUNCTION py_business_logic(STRING, DOUBLE, INT) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(customer_type, amount, quantity):
+    if customer_type is None or amount is None or quantity is None:
+        return 'INVALID'
+    
+    # Calculate discount
+    discount = 0
+    if customer_type == 'VIP':
+        discount = 0.2
+    elif customer_type == 'PREMIUM':
+        discount = 0.15
+    elif customer_type == 'REGULAR':
+        discount = 0.1
+    else:
+        discount = 0
+    
+    # Bulk discount
+    if quantity >= 100:
+        discount += 0.05
+    elif quantity >= 50:
+        discount += 0.03
+    
+    # Calculate final price
+    final_amount = amount * (1 - discount)
+    
+    # Return result
+    if final_amount > 10000:
+        return f'HIGH:{final_amount:.2f}'
+    elif final_amount > 1000:
+        return f'MEDIUM:{final_amount:.2f}'
+    else:
+        return f'LOW:{final_amount:.2f}'
+\$\$;
+        """
+        
+        qt_select_business_logic_vip """ SELECT py_business_logic('VIP', 5000.0, 120) AS result; """
+        qt_select_business_logic_regular """ SELECT py_business_logic('REGULAR', 2000.0, 30) AS result; """
+        
+        // Test 3: Complex string processing logic
+        sql """ DROP FUNCTION IF EXISTS py_text_analyzer(STRING); """
+        sql """
+        CREATE FUNCTION py_text_analyzer(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(text):
+    if text is None:
+        return None
+    
+    # Collect statistics
+    length = len(text)
+    words = text.split()
+    word_count = len(words)
+    
+    # Count character types
+    upper_count = sum(1 for c in text if c.isupper())
+    lower_count = sum(1 for c in text if c.islower())
+    digit_count = sum(1 for c in text if c.isdigit())
+    
+    # Build result
+    result = f"len:{length},words:{word_count},upper:{upper_count},lower:{lower_count},digits:{digit_count}"
+    return result
+\$\$;
+        """
+        
+        qt_select_text_analyzer """ SELECT py_text_analyzer('Hello World 123') AS result; """
+        
+        // Test 4: Complex mathematical calculation function
+        sql """ DROP FUNCTION IF EXISTS py_statistics(DOUBLE, DOUBLE, DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_statistics(DOUBLE, DOUBLE, DOUBLE, DOUBLE) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v1, v2, v3, v4):
+    if any(x is None for x in [v1, v2, v3, v4]):
+        return None
+    
+    values = [v1, v2, v3, v4]
+    
+    # Calculate statistics
+    total = sum(values)
+    count = len(values)
+    mean = total / count
+    
+    # Calculate variance
+    variance = sum((x - mean) ** 2 for x in values) / count
+    
+    # Calculate standard deviation
+    import math
+    std_dev = math.sqrt(variance)
+    
+    # Find max and min values
+    max_val = max(values)
+    min_val = min(values)
+    
+    result = f"mean:{mean:.2f},std:{std_dev:.2f},max:{max_val:.2f},min:{min_val:.2f}"
+    return result
+\$\$;
+        """
+        
+        qt_select_statistics """ SELECT py_statistics(10.0, 20.0, 30.0, 40.0) AS result; """
+        
+        // Test 5: Test complex inline code on table data
+        sql """ DROP TABLE IF EXISTS multiline_test_table; """
+        sql """
+        CREATE TABLE multiline_test_table (
+            id INT,
+            customer_type STRING,
+            amount DOUBLE,
+            quantity INT,
+            description STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO multiline_test_table VALUES
+        (1, 'VIP', 15000.0, 150, 'Premium customer order'),
+        (2, 'PREMIUM', 8000.0, 80, 'Good customer'),
+        (3, 'REGULAR', 3000.0, 40, 'Regular order'),
+        (4, 'VIP', 500.0, 10, 'Small VIP order'),
+        (5, 'REGULAR', 12000.0, 200, 'Large regular order');
+        """
+        
+        qt_select_table_multiline """ 
+        SELECT 
+            id,
+            customer_type,
+            amount,
+            quantity,
+            py_business_logic(customer_type, amount, quantity) AS pricing_result,
+            py_text_analyzer(description) AS text_analysis
+        FROM multiline_test_table
+        ORDER BY id;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_complex_calculation(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_business_logic(STRING, DOUBLE, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_text_analyzer(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_statistics(DOUBLE, DOUBLE, DOUBLE, DOUBLE);")
+        try_sql("DROP TABLE IF EXISTS multiline_test_table;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_performance.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_performance.groovy
new file mode 100644
index 0000000..f7429f6
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_performance.groovy

@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_performance") {
+    // Test Python UDF performance and correctness with large data volumes
+    
+    def runtime_version = "3.10.12"
+    try {
+        // Create simple Python UDF
+        sql """ DROP FUNCTION IF EXISTS py_perf_double(INT); """
+        sql """
+        CREATE FUNCTION py_perf_double(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * 2
+\$\$;
+        """
+        
+        sql """ DROP FUNCTION IF EXISTS py_perf_concat(STRING, STRING); """
+        sql """
+        CREATE FUNCTION py_perf_concat(STRING, STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(s1, s2):
+    if s1 is None or s2 is None:
+        return None
+    return s1 + '_' + s2
+\$\$;
+        """
+        
+        // Create test table
+        sql """ DROP TABLE IF EXISTS performance_test_table; """
+        sql """
+        CREATE TABLE performance_test_table (
+            id INT,
+            value INT,
+            category STRING,
+            amount DOUBLE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 4
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        // Insert test data (large data volume)
+        sql """
+        INSERT INTO performance_test_table 
+        SELECT 
+            number AS id,
+            number % 1000 AS value,
+            CASE 
+                WHEN number % 4 = 0 THEN 'A'
+                WHEN number % 4 = 1 THEN 'B'
+                WHEN number % 4 = 2 THEN 'C'
+                ELSE 'D'
+            END AS category,
+            number * 1.5 AS amount
+        FROM numbers("number" = "10000");
+        """
+        
+        // Test 1: Simple UDF call performance
+        qt_select_perf_simple """ 
+        SELECT COUNT(*) AS total_count
+        FROM performance_test_table
+        WHERE py_perf_double(value) > 1000;
+        """
+        
+        // Test 2: UDF performance in aggregate queries
+        qt_select_perf_aggregate """ 
+        SELECT 
+            category,
+            COUNT(*) AS count,
+            AVG(py_perf_double(value)) AS avg_doubled_value
+        FROM performance_test_table
+        GROUP BY category
+        ORDER BY category;
+        """
+        
+        // Test 3: Multiple UDFs combined usage
+        qt_select_perf_multiple_udf """ 
+        SELECT 
+            category,
+            COUNT(*) AS count
+        FROM performance_test_table
+        WHERE py_perf_double(value) > 500
+        GROUP BY category
+        ORDER BY count DESC
+        LIMIT 10;
+        """
+        
+        // Test 4: String UDF performance
+        qt_select_perf_string """ 
+        SELECT 
+            category,
+            COUNT(DISTINCT py_perf_concat(category, CAST(value AS STRING))) AS unique_combinations
+        FROM performance_test_table
+        GROUP BY category
+        ORDER BY category;
+        """
+        
+        // Test 5: UDF with complex calculations
+        sql """ DROP FUNCTION IF EXISTS py_perf_complex(INT, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_perf_complex(INT, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def evaluate(v, a):
+    if v is None or a is None:
+        return None
+    result = (v * 1.5 + a * 0.8) / 2.0
+    return result
+\$\$;
+        """
+        
+        qt_select_perf_complex """ 
+        SELECT 
+            category,
+            AVG(py_perf_complex(value, amount)) AS avg_complex_result,
+            MAX(py_perf_complex(value, amount)) AS max_complex_result,
+            MIN(py_perf_complex(value, amount)) AS min_complex_result
+        FROM performance_test_table
+        GROUP BY category
+        ORDER BY category;
+        """
+        
+        // Test 6: UDF in nested queries
+        qt_select_perf_nested """ 
+        SELECT 
+            category,
+            avg_doubled
+        FROM (
+            SELECT 
+                category,
+                AVG(py_perf_double(value)) AS avg_doubled
+            FROM performance_test_table
+            GROUP BY category
+        ) t
+        WHERE avg_doubled > 500
+        ORDER BY avg_doubled DESC;
+        """
+        
+        // Test 7: Performance test with NULL value handling
+        sql """ DROP TABLE IF EXISTS performance_null_test; """
+        sql """
+        CREATE TABLE performance_null_test (
+            id INT,
+            value INT,
+            nullable_value INT
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 4
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        sql """
+        INSERT INTO performance_null_test 
+        SELECT 
+            number AS id,
+            number % 100 AS value,
+            CASE WHEN number % 5 = 0 THEN NULL ELSE number % 50 END AS nullable_value
+        FROM numbers("number" = "5000");
+        """
+        
+        qt_select_perf_null """ 
+        SELECT 
+            COUNT(*) AS total,
+            COUNT(py_perf_double(nullable_value)) AS non_null_count,
+            AVG(py_perf_double(nullable_value)) AS avg_result
+        FROM performance_null_test;
+        """
+        
+        // Test 8: Sorting performance
+        qt_select_perf_order """ 
+        SELECT 
+            id,
+            value,
+            py_perf_double(value) AS doubled_value
+        FROM performance_test_table
+        ORDER BY py_perf_double(value) DESC, id DESC
+        LIMIT 20;
+        """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_perf_double(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_perf_concat(STRING, STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_perf_complex(INT, DOUBLE);")
+        try_sql("DROP TABLE IF EXISTS performance_test_table;")
+        try_sql("DROP TABLE IF EXISTS performance_null_test;")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_performance_comparison.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_performance_comparison.groovy
new file mode 100644
index 0000000..35dc06c
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_performance_comparison.groovy

@@ -0,0 +1,239 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_performance_comparison") {
+    // Quick performance comparison: Scalar vs Vector Python UDF
+    // Lightweight test for quick performance checks
+    
+    def scalarPyPath = """${context.file.parent}/udf_scripts/python_udf_scalar_ops.zip"""
+    def vectorPyPath = """${context.file.parent}/udf_scripts/python_udf_vector_ops.zip"""
+    scp_udf_file_to_all_be(scalarPyPath)
+    scp_udf_file_to_all_be(vectorPyPath)
+    def runtime_version = "3.10.12"
+
+    sql "CREATE DATABASE IF NOT EXISTS test_pythonudf_performance_comparison"
+    sql "USE test_pythonudf_performance_comparison"
+    
+    // Quick test with smaller dataset
+    def TEST_ROWS = 100000  // 100K rows for quick testing
+    
+    log.info("=" * 80)
+    log.info("PYTHON UDF PERFORMANCE COMPARISON")
+    log.info("Quick test with ${TEST_ROWS} rows")
+    log.info("=" * 80)
+    
+    try {
+        // Create test table
+        sql """ DROP TABLE IF EXISTS perf_comparison_table; """
+        sql """
+        CREATE TABLE perf_comparison_table (
+            id INT,
+            val1 INT,
+            val2 INT,
+            price DOUBLE,
+            discount DOUBLE,
+            text STRING
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 10
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        // Load test data using streamLoad from CSV file
+        log.info("Loading ${TEST_ROWS} rows using streamLoad from CSV file...")
+        def loadStartTime = System.currentTimeMillis()
+        
+        streamLoad {
+            db 'test_pythonudf_performance_comparison'
+            table "perf_comparison_table"
+            
+            // Set column separator to tab
+            set 'column_separator', '\t'
+            
+            // File path relative to regression-test/data/pythonudf_p0/
+            file 'benchmark_data_100k.csv'
+            
+            time 60000 // 60 seconds timeout
+            
+            // Custom check callback
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+        
+        def loadEndTime = System.currentTimeMillis()
+        log.info("Data loaded in ${loadEndTime - loadStartTime} ms")
+        
+        sql "sync"
+        
+        def actualRows = sql "SELECT COUNT(*) FROM perf_comparison_table"
+        log.info("Verified row count: ${actualRows[0][0]}\nData ready. Starting performance tests...\n")
+        
+        // Define test cases
+        def testCases = [
+            [
+                name: "Integer Multiplication",
+                scalar_symbol: "python_udf_scalar_ops.multiply_with_default",
+                vector_symbol: "python_udf_vector_ops.multiply_by_constant",
+                params: "(INT, INT, INT)",
+                returns: "INT",
+                query: "SELECT COUNT(*) FROM (SELECT id, {UDF}(val1, 10, 1) AS result FROM perf_comparison_table) t"
+            ],
+            [
+                name: "Price Calculation",
+                scalar_symbol: "python_udf_scalar_ops.calculate_discount_price",
+                vector_symbol: "python_udf_vector_ops.calculate_discount",
+                params: "(DOUBLE, DOUBLE)",
+                returns: "DOUBLE",
+                query: "SELECT COUNT(*) FROM (SELECT id, {UDF}(price, discount) AS result FROM perf_comparison_table) t"
+            ],
+            [
+                name: "String Length",
+                scalar_symbol: "python_udf_scalar_ops.string_length_custom",
+                vector_symbol: "python_udf_vector_ops.string_length",
+                params: "(STRING)",
+                returns: "INT",
+                query: "SELECT COUNT(*) FROM (SELECT id, {UDF}(text) AS result FROM perf_comparison_table) t"
+            ]
+        ]
+        
+        def results = []
+        
+        testCases.each { testCase ->
+            log.info("-" * 80)
+            log.info("Test: ${testCase.name}")
+            log.info("-" * 80)
+            
+            // Test Scalar UDF
+            sql """ DROP FUNCTION IF EXISTS py_scalar_test${testCase.params}; """
+            sql """
+            CREATE FUNCTION py_scalar_test${testCase.params}
+            RETURNS ${testCase.returns}
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file" = "file://${scalarPyPath}",
+                "symbol" = "${testCase.scalar_symbol}",
+                "runtime_version" = "${runtime_version}",
+                "always_nullable" = "true"
+            );
+            """
+            
+            def scalarQuery = testCase.query.replace("{UDF}", "py_scalar_test")
+            
+            // Warm up
+            sql scalarQuery
+            
+            // Actual test - run 3 times and take average
+            def scalarTimes = []
+            for (int i = 0; i < 3; i++) {
+                def start = System.currentTimeMillis()
+                sql scalarQuery
+                def end = System.currentTimeMillis()
+                scalarTimes.add(end - start)
+            }
+            def scalarAvg = scalarTimes.sum() / scalarTimes.size()
+            
+            log.info("  Scalar UDF: ${scalarTimes} ms, Avg: ${scalarAvg} ms")
+            
+            // Test Vector UDF
+            sql """ DROP FUNCTION IF EXISTS py_vector_test${testCase.params}; """
+            sql """
+            CREATE FUNCTION py_vector_test${testCase.params}
+            RETURNS ${testCase.returns}
+            PROPERTIES (
+                "type" = "PYTHON_UDF",
+                "file" = "file://${vectorPyPath}",
+                "symbol" = "${testCase.vector_symbol}",
+                "runtime_version" = "${runtime_version}",
+                "always_nullable" = "true",
+                "vectorized" = "true"
+            );
+            """
+            
+            def vectorQuery = testCase.query.replace("{UDF}", "py_vector_test")
+            
+            // Warm up
+            sql vectorQuery
+            
+            // Actual test - run 3 times and take average
+            def vectorTimes = []
+            for (int i = 0; i < 3; i++) {
+                def start = System.currentTimeMillis()
+                sql vectorQuery
+                def end = System.currentTimeMillis()
+                vectorTimes.add(end - start)
+            }
+            def vectorAvg = vectorTimes.sum() / vectorTimes.size()
+            
+            log.info("  Vector UDF: ${vectorTimes} ms, Avg: ${vectorAvg} ms")
+            
+            def speedup = scalarAvg / vectorAvg
+            def improvement = ((scalarAvg - vectorAvg) / scalarAvg * 100)
+            
+            log.info("  Speedup: ${String.format('%.2f', speedup)}x")
+            log.info("  Improvement: ${String.format('%.1f', improvement)}%")
+            
+            results.add([
+                name: testCase.name,
+                scalar: scalarAvg,
+                vector: vectorAvg,
+                speedup: speedup,
+                improvement: improvement
+            ])
+            
+            // Cleanup
+            sql """ DROP FUNCTION IF EXISTS py_scalar_test${testCase.params}; """
+            sql """ DROP FUNCTION IF EXISTS py_vector_test${testCase.params}; """
+        }
+        
+        // Print summary
+        def summary = new StringBuilder()
+        summary.append("\n" + "=" * 80 + "\n")
+        summary.append("PERFORMANCE COMPARISON SUMMARY\n")
+        summary.append("=" * 80 + "\n")
+        summary.append(String.format("%-30s %12s %12s %10s %12s", "Test Case", "Scalar(ms)", "Vector(ms)", "Speedup", "Improvement") + "\n")
+        summary.append("-" * 80 + "\n")
+        
+        results.each { r ->
+            summary.append(String.format("%-30s %12.1f %12.1f %9.2fx %11.1f%%", 
+                r.name, r.scalar, r.vector, r.speedup, r.improvement) + "\n")
+        }
+        
+        def avgSpeedup = results.collect { it.speedup }.sum() / results.size()
+        def avgImprovement = results.collect { it.improvement }.sum() / results.size()
+        
+        summary.append("-" * 80 + "\n")
+        summary.append(String.format("%-30s %12s %12s %9.2fx %11.1f%%", 
+            "AVERAGE", "-", "-", avgSpeedup, avgImprovement) + "\n")
+        summary.append("=" * 80)
+        
+        log.info(summary.toString())
+        
+    } finally {
+        // Cleanup
+        try_sql("DROP TABLE IF EXISTS perf_comparison_table;")
+        try_sql("DROP DATABASE IF EXISTS test_pythonudf_performance_comparison;")
+        log.info("\nPerformance comparison completed.")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_ret_map.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_ret_map.groovy
new file mode 100644
index 0000000..7567a78
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_ret_map.groovy

@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_ret_map") {
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        try_sql("DROP FUNCTION IF EXISTS retii(map<int,int>);")
+        try_sql("DROP FUNCTION IF EXISTS retss(map<String,String>);")
+        try_sql("DROP FUNCTION IF EXISTS retid(map<int,Double>);")
+        try_sql("DROP FUNCTION IF EXISTS retidss(int ,double);")
+        try_sql("DROP TABLE IF EXISTS db")
+        try_sql("DROP TABLE IF EXISTS dbss")
+        sql """
+             CREATE TABLE IF NOT EXISTS db(
+                        `id` INT NULL COMMENT "",
+                        `i` INT NULL COMMENT "",
+   						`d` Double NULL COMMENT "",
+   					    `mii` Map<INT, INT> NULL COMMENT "",
+   					    `mid` Map<INT, Double> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2");
+        """
+        sql """ INSERT INTO db VALUES(1, 10,1.1,{1:1,10:1,100:1},{1:1.1,11:11.1});   """
+        sql """ INSERT INTO db VALUES(2, 20,2.2,{2:2,20:2,200:2},{2:2.2,22:22.2});   """
+
+        sql """
+              CREATE TABLE IF NOT EXISTS dbss(
+              `id` INT NULL COMMENT "",
+   					`m` Map<String, String> NULL COMMENT ""
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`id`)
+            DISTRIBUTED BY HASH(`id`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "storage_format" = "V2");
+        """
+
+        sql """ INSERT INTO dbss VALUES(1,{"abc":"efg","h":"i"}); """
+        sql """ INSERT INTO dbss VALUES(2,{"j":"k"}); """
+
+
+        sql """
+          
+        CREATE FUNCTION retii(map<int,int>) RETURNS map<int,int> PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="map_ret_int_int_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); 
+        
+        """
+
+        sql """
+          
+        CREATE FUNCTION retss(map<String,String>) RETURNS map<String,String> PROPERTIES (
+                    "file"="file://${pyPath}",
+                    "symbol"="map_ret_string_string_test.evaluate",
+                    "type"="PYTHON_UDF",
+                    "always_nullable" = "true",
+                    "runtime_version" = "${runtime_version}"
+        ); 
+        
+        """
+
+
+        sql """
+          
+            CREATE FUNCTION retid(map<int,Double>) RETURNS map<int,Double> PROPERTIES (
+                        "file"="file://${pyPath}",
+                        "symbol"="map_ret_int_double_test.evaluate",
+                        "type"="PYTHON_UDF",
+                        "always_nullable" = "true",
+                        "runtime_version" = "${runtime_version}"
+            ); 
+        
+        """
+
+        sql """
+          
+        CREATE FUNCTION retidss(int ,double) RETURNS map<String,String> PROPERTIES (
+                    "file"="file://${pyPath}",
+                    "symbol"="map_int_double_ret_string_string_test.evaluate",
+                    "type"="PYTHON_UDF",
+                    "always_nullable" = "true",
+                    "runtime_version" = "${runtime_version}"
+        ); 
+        
+        """
+
+        qt_select_1 """ select mid , retid(mid) from db order by id; """
+
+        qt_select_2 """ select mii , retii(mii) from db order by id; """
+
+        qt_select_3 """ select i,d,retidss(i,d) from db order by id; """
+
+        qt_select_4 """ select m,retss(m) from dbss order by id; """
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS retii(map<int,int>);")
+        try_sql("DROP FUNCTION IF EXISTS retss(map<String,String>);")
+        try_sql("DROP FUNCTION IF EXISTS retid(map<int,Double>);")
+        try_sql("DROP FUNCTION IF EXISTS retidss(int ,double);")
+        try_sql("DROP TABLE IF EXISTS db")
+        try_sql("DROP TABLE IF EXISTS dbss")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_runtime_version.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_runtime_version.groovy
new file mode 100644
index 0000000..675942f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_runtime_version.groovy

@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_runtime_version") {
+    // Test different configurations of runtime_version parameter
+    
+    // Disabled temporarily
+    return
+    
+    try {
+        // Test 1: Specify short version number (x.xx format) with inline code
+        sql """ DROP FUNCTION IF EXISTS py_version_test_short(INT); """
+        sql """
+        CREATE FUNCTION py_version_test_short(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "3.12"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * 2
+\$\$;
+        """
+        
+        qt_select_version_short """ SELECT py_version_test_short(21) AS result; """
+        
+        // Test 2: Specify full version number (x.xx.xx format) with inline code
+        sql """ DROP FUNCTION IF EXISTS py_version_test_full(INT); """
+        sql """
+        CREATE FUNCTION py_version_test_full(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "3.12.10"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x * 3
+\$\$;
+        """
+        
+        qt_select_version_full """ SELECT py_version_test_full(10) AS result; """
+        
+        // Test 3: Do not specify runtime_version (use default)
+        sql """ DROP FUNCTION IF EXISTS py_version_test_default(INT); """
+        sql """
+        CREATE FUNCTION py_version_test_default(INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate"
+        )
+        AS \$\$
+def evaluate(x):
+    if x is None:
+        return None
+    return x + 100
+\$\$;
+        """
+        
+        qt_select_version_default """ SELECT py_version_test_default(50) AS result; """
+        
+        // Test 4: String function with runtime_version
+        sql """ DROP FUNCTION IF EXISTS py_version_string_test(STRING); """
+        sql """
+        CREATE FUNCTION py_version_string_test(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "evaluate",
+            "runtime_version" = "3.12"
+        )
+        AS \$\$
+def evaluate(s):
+    if s is None:
+        return None
+    return s.upper()
+\$\$;
+        """
+        
+        qt_select_version_string """ SELECT py_version_string_test('hello') AS result; """
+        
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS py_version_test_short(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_version_test_full(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_version_test_default(INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_version_string_test(STRING);")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_schema_check.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_schema_check.groovy
new file mode 100644
index 0000000..07d6183
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_schema_check.groovy

@@ -0,0 +1,544 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_schema_check") {
+    // Test type compatibility in Python UDF
+    // Users can specify compatible types instead of exact matching types
+    // For example: TINYINT can be used where INT is expected
+    
+    def runtime_version = "3.10.12"
+    
+    try {
+        // Create test table with various integer types
+        sql """ DROP TABLE IF EXISTS test_type_compat_table; """
+        sql """
+        CREATE TABLE test_type_compat_table (
+            id INT,
+            tiny_val TINYINT,
+            small_val SMALLINT,
+            int_val INT,
+            big_val BIGINT,
+            float_val FLOAT,
+            double_val DOUBLE,
+            str_val STRING,
+            bool_val BOOLEAN,
+            date_val DATE
+        ) ENGINE=OLAP
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        // Insert test data
+        sql """
+        INSERT INTO test_type_compat_table VALUES
+        (1, 10, 100, 1000, 10000, 1.5, 10.5, 'test1', true, '2024-01-01'),
+        (2, 20, 200, 2000, 20000, 2.5, 20.5, 'test2', false, '2024-01-02'),
+        (3, 30, 300, 3000, 30000, 3.5, 30.5, 'test3', true, '2024-01-03'),
+        (4, 40, 400, 4000, 40000, 4.5, 40.5, 'test4', false, '2024-01-04'),
+        (5, 50, 500, 5000, 50000, 5.5, 50.5, 'test5', true, '2024-01-05');
+        """
+        
+        // ==================== Test 1: Integer Type Promotion (TINYINT -> INT) ====================
+        log.info("=== Test 1: TINYINT can be used where INT is expected ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_add_int(INT, INT); """
+        sql """
+        CREATE FUNCTION py_add_int(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_add_int",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_add_int(a, b):
+    if a is None or b is None:
+        return None
+    return a + b
+\$\$;
+        """
+        
+        // Pass TINYINT where INT is expected
+        qt_select_1 """
+        SELECT 
+            id,
+            tiny_val,
+            int_val,
+            py_add_int(tiny_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 2: Integer Type Promotion (SMALLINT -> INT) ====================
+        log.info("=== Test 2: SMALLINT can be used where INT is expected ===")
+        
+        qt_select_2 """
+        SELECT 
+            id,
+            small_val,
+            int_val,
+            py_add_int(small_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 3: Integer Type Promotion (INT -> BIGINT) ====================
+        log.info("=== Test 3: INT can be used where BIGINT is expected ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_add_bigint(BIGINT, BIGINT); """
+        sql """
+        CREATE FUNCTION py_add_bigint(BIGINT, BIGINT) 
+        RETURNS BIGINT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_add_bigint",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_add_bigint(a, b):
+    return a + b
+\$\$;
+        """
+        
+        qt_select_3 """
+        SELECT 
+            id,
+            int_val,
+            big_val,
+            py_add_bigint(int_val, big_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 4: Float Type Promotion (FLOAT -> DOUBLE) ====================
+        log.info("=== Test 4: FLOAT can be used where DOUBLE is expected ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_add_double(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_add_double(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_add_double",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_add_double(a, b):
+    return a + b
+\$\$;
+        """
+        
+        qt_select_4 """
+        SELECT 
+            id,
+            float_val,
+            double_val,
+            py_add_double(float_val, double_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 5: Mixed Integer Types ====================
+        log.info("=== Test 5: Mixed integer types (TINYINT, SMALLINT, INT) ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_sum_three(INT, INT, INT); """
+        sql """
+        CREATE FUNCTION py_sum_three(INT, INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_sum_three",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_sum_three(a, b, c):
+    return a + b + c
+\$\$;
+        """
+        
+        qt_select_5 """
+        SELECT 
+            id,
+            tiny_val,
+            small_val,
+            int_val,
+            py_sum_three(tiny_val, small_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 6: Vectorized UDF with Type Promotion ====================
+        log.info("=== Test 6: Vectorized UDF with integer type promotion ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_multiply(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_multiply",
+            "runtime_version" = "${runtime_version}",
+            "vectorized" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_multiply(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a * b
+\$\$;
+        """
+        
+        // Use TINYINT and SMALLINT where INT is expected
+        qt_select_6 """
+        SELECT 
+            id,
+            tiny_val,
+            small_val,
+            py_vec_multiply(tiny_val, small_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 7: Vectorized UDF with Float Promotion ====================
+        log.info("=== Test 7: Vectorized UDF with float type promotion ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_divide(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_divide(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_divide",
+            "runtime_version" = "${runtime_version}",
+            "vectorized" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_divide(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a / b
+\$\$;
+        """
+        
+        // Use FLOAT where DOUBLE is expected
+        qt_select_7 """
+        SELECT 
+            id,
+            float_val,
+            double_val,
+            py_vec_divide(double_val, float_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 8: Mixed Types in Vectorized UDF ====================
+        log.info("=== Test 8: Mixed integer and float types in vectorized UDF ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_calc(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_calc(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_calc",
+            "runtime_version" = "${runtime_version}",
+            "vectorized" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_calc(a: pd.Series, b: pd.Series) -> pd.Series:
+    return a * 2.0 + b
+\$\$;
+        """
+        
+        // Use INT and FLOAT where DOUBLE is expected
+        qt_select_8 """
+        SELECT 
+            id,
+            int_val,
+            float_val,
+            py_vec_calc(int_val, float_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 9: String Type Compatibility ====================
+        log.info("=== Test 9: String type compatibility ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_string_upper(STRING); """
+        sql """
+        CREATE FUNCTION py_string_upper(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_string_upper",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_string_upper(s):
+    return s.upper() if s else None
+\$\$;
+        """
+        
+        qt_select_9 """
+        SELECT 
+            id,
+            str_val,
+            py_string_upper(str_val) AS upper_str
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 10: Boolean Type ====================
+        log.info("=== Test 10: Boolean type compatibility ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_bool_not(BOOLEAN); """
+        sql """
+        CREATE FUNCTION py_bool_not(BOOLEAN) 
+        RETURNS BOOLEAN 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_bool_not",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_bool_not(b):
+    return not b if b is not None else None
+\$\$;
+        """
+        
+        qt_select_10 """
+        SELECT 
+            id,
+            bool_val,
+            py_bool_not(bool_val) AS negated
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 11: Complex Type Promotion Chain ====================
+        log.info("=== Test 11: Complex type promotion chain (TINYINT -> BIGINT) ===")
+        
+        qt_select_11 """
+        SELECT 
+            id,
+            tiny_val,
+            big_val,
+            py_add_bigint(tiny_val, big_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 12: Vectorized with Mixed Scalar and Series ====================
+        log.info("=== Test 12: Vectorized UDF with type promotion and mixed params ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_vec_scale(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_vec_scale(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_vec_scale",
+            "runtime_version" = "${runtime_version}",
+            "vectorized" = "true"
+        )
+        AS \$\$
+import pandas as pd
+
+def py_vec_scale(values: pd.Series, factor: float) -> pd.Series:
+    return values * factor
+\$\$;
+        """
+        
+        // Use INT (promoted to DOUBLE) with scalar FLOAT
+        qt_select_12 """
+        SELECT 
+            id,
+            int_val,
+            py_vec_scale(int_val, 1.5) AS scaled
+        FROM test_type_compat_table
+        ORDER BY id;
+        """
+        
+        // ==================== Test 13: Type Incompatibility - STRING to INT ====================
+        log.info("=== Test 13: Type incompatibility - STRING cannot be used where INT is expected ===")
+        
+        qt_select_13 """
+        SELECT 
+            id,
+            str_val,
+            py_add_int(str_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 14: Type Incompatibility - BIGINT to INT ====================
+        log.info("=== Test 14: Type incompatibility - BIGINT cannot be downcast to INT ===")
+        
+        qt_select_14 """
+        SELECT 
+            id,
+            big_val,
+            py_add_int(big_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 15: Type Incompatibility - DOUBLE to FLOAT ====================
+        log.info("=== Test 15: Type incompatibility - DOUBLE cannot be downcast to FLOAT ===")
+        
+        sql """ DROP FUNCTION IF EXISTS py_add_float(FLOAT, FLOAT); """
+        sql """
+        CREATE FUNCTION py_add_float(FLOAT, FLOAT) 
+        RETURNS FLOAT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "symbol" = "py_add_float",
+            "runtime_version" = "${runtime_version}"
+        )
+        AS \$\$
+def py_add_float(a, b):
+    return a + b
+\$\$;
+        """
+        
+        qt_select_15 """
+        SELECT 
+            id,
+            double_val,
+            py_add_float(double_val, float_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 16: Type Incompatibility - BOOLEAN to INT ====================
+        log.info("=== Test 16: Type incompatibility - BOOLEAN cannot be used where INT is expected ===")
+        
+        qt_select_16 """
+        SELECT 
+            id,
+            bool_val,
+            py_add_int(bool_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 17: Type Incompatibility - DATE to STRING ====================
+        log.info("=== Test 17: Type incompatibility - DATE cannot be directly used where STRING is expected ===")
+        
+        qt_select_17 """
+        SELECT 
+            id,
+            date_val,
+            py_string_upper(date_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 18: Type Incompatibility - INT to BOOLEAN ====================
+        log.info("=== Test 18: Type incompatibility - INT cannot be used where BOOLEAN is expected ===")
+        
+        qt_select_18 """
+        SELECT 
+            id,
+            int_val,
+            py_bool_not(int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 19: Type Incompatibility in Vectorized UDF - STRING to INT ====================
+        log.info("=== Test 19: Type incompatibility in vectorized UDF - STRING to INT ===")
+        
+        qt_select_19 """
+        SELECT 
+            id,
+            str_val,
+            py_vec_multiply(str_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 20: Type Incompatibility - Mixed incompatible types ====================
+        log.info("=== Test 20: Type incompatibility - Mixed incompatible types ===")
+        
+        qt_select_20 """
+        SELECT 
+            id,
+            str_val,
+            bool_val,
+            py_add_int(str_val, bool_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        // ==================== Test 21: Wrong number of arguments ====================
+        log.info("=== Test 21: Wrong number of arguments ===")
+        
+        test {
+            sql """
+            SELECT 
+                id,
+                py_add_int(int_val) AS result
+            FROM test_type_compat_table
+            ORDER BY id
+            LIMIT 1;
+            """
+            exception "Can not found function 'py_add_int' which has 1 arity. Candidate functions are: [py_add_int(INT, INT)]"
+        }
+        
+        // ==================== Test 22: Type Incompatibility - FLOAT to INT ====================
+        log.info("=== Test 22: Type incompatibility - FLOAT cannot be used where INT is expected ===")
+        
+        qt_select_22 """
+        SELECT 
+            id,
+            float_val,
+            py_add_int(float_val, int_val) AS result
+        FROM test_type_compat_table
+        ORDER BY id
+        LIMIT 1;
+        """
+        
+        log.info("All type compatibility tests (including negative tests) passed!")
+        
+    } finally {
+        // Cleanup
+        sql """ DROP FUNCTION IF EXISTS py_add_int(INT, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_add_bigint(BIGINT, BIGINT); """
+        sql """ DROP FUNCTION IF EXISTS py_add_double(DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_add_float(FLOAT, FLOAT); """
+        sql """ DROP FUNCTION IF EXISTS py_sum_three(INT, INT, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply(INT, INT); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_divide(DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_calc(DOUBLE, DOUBLE); """
+        sql """ DROP FUNCTION IF EXISTS py_string_upper(STRING); """
+        sql """ DROP FUNCTION IF EXISTS py_bool_not(BOOLEAN); """
+        sql """ DROP FUNCTION IF EXISTS py_vec_scale(DOUBLE, DOUBLE); """
+        sql """ DROP TABLE IF EXISTS test_type_compat_table; """
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_stress.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_stress.groovy
new file mode 100644
index 0000000..e3b0ff8
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_stress.groovy

@@ -0,0 +1,314 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_stress") {
+    // Stress test for Python UDF - configurable dataset size
+    // This test is designed to push Python UDF to its limits
+    
+    def scalarPyPath = """${context.file.parent}/udf_scripts/python_udf_scalar_ops.zip"""
+    def vectorPyPath = """${context.file.parent}/udf_scripts/python_udf_vector_ops.zip"""
+    scp_udf_file_to_all_be(scalarPyPath)
+    scp_udf_file_to_all_be(vectorPyPath)
+    def runtime_version = "3.10.12"
+
+    sql "CREATE DATABASE IF NOT EXISTS test_pythonudf_stress"
+    sql "USE test_pythonudf_stress"
+    
+    // Configuration: Adjust these for different stress levels
+    def TOTAL_ROWS = 5000000      // 5 million rows (change to 10M, 50M for extreme stress)
+    def BATCH_SIZE = 50000        // Insert batch size
+    def BUCKETS = 32              // Number of buckets for distribution
+    
+    log.info("\n" + "=" * 80 + "\nPYTHON UDF STRESS TEST\n" + "=" * 80 + "\nConfiguration:\n" +
+        "  Total Rows: ${TOTAL_ROWS}\n" +
+        "  Batch Size: ${BATCH_SIZE}\n" +
+        "  Buckets: ${BUCKETS}\n" +
+        "=" * 80)
+    
+    try {
+        // ==================== Create Stress Test Table ====================
+        sql """ DROP TABLE IF EXISTS python_udf_stress_table; """
+        sql """
+        CREATE TABLE python_udf_stress_table (
+            id BIGINT,
+            category INT,
+            value1 INT,
+            value2 INT,
+            price DOUBLE,
+            discount DOUBLE,
+            name STRING,
+            description STRING,
+            email STRING,
+            is_active BOOLEAN,
+            created_date DATE
+        ) ENGINE=OLAP 
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS ${BUCKETS}
+        PROPERTIES("replication_num" = "1");
+        """
+        
+        log.info("Loading ${TOTAL_ROWS} rows using streamLoad from CSV file...")
+        def loadStartTime = System.currentTimeMillis()
+        
+        streamLoad {
+            db 'test_pythonudf_stress'
+            table "python_udf_stress_table"
+            set 'column_separator', '\t'
+            file 'benchmark_data_5m.csv'
+            time 300000 // 300 seconds (5 minutes) timeout
+            
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                assertEquals("success", json.Status.toLowerCase())
+                assertEquals(json.NumberTotalRows, json.NumberLoadedRows)
+                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+            }
+        }
+        
+        def loadEndTime = System.currentTimeMillis()
+        log.info("Data loaded in ${loadEndTime - loadStartTime} ms (${String.format('%.2f', TOTAL_ROWS / ((loadEndTime - loadStartTime) / 1000.0))} rows/sec)")
+        
+        sql "sync"
+        
+        def rowCount = sql "SELECT COUNT(*) FROM python_udf_stress_table"
+        log.info("Verified row count: ${rowCount[0][0]}")
+        
+        // ==================== Define UDFs ====================
+        
+        // Scalar UDF - Simple
+        sql """ DROP FUNCTION IF EXISTS py_calc_final_price(DOUBLE, DOUBLE); """
+        sql """
+        CREATE FUNCTION py_calc_final_price(DOUBLE, DOUBLE) 
+        RETURNS DOUBLE 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.calculate_discount_price",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        // Scalar UDF - Complex
+        sql """ DROP FUNCTION IF EXISTS py_extract_domain(STRING); """
+        sql """
+        CREATE FUNCTION py_extract_domain(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${scalarPyPath}",
+            "symbol" = "python_udf_scalar_ops.extract_domain",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        // Vector UDF - Simple
+        sql """ DROP FUNCTION IF EXISTS py_vec_multiply(INT, INT); """
+        sql """
+        CREATE FUNCTION py_vec_multiply(INT, INT) 
+        RETURNS INT 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${vectorPyPath}",
+            "symbol" = "python_udf_vector_ops.multiply_by_constant",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        // Vector UDF - String
+        sql """ DROP FUNCTION IF EXISTS py_vec_upper(STRING); """
+        sql """
+        CREATE FUNCTION py_vec_upper(STRING) 
+        RETURNS STRING 
+        PROPERTIES (
+            "type" = "PYTHON_UDF",
+            "file" = "file://${vectorPyPath}",
+            "symbol" = "python_udf_vector_ops.to_uppercase",
+            "runtime_version" = "${runtime_version}",
+            "always_nullable" = "true"
+        );
+        """
+        
+        log.info("=" * 80)
+        log.info("STRESS TEST EXECUTION")
+        log.info("=" * 80)
+        
+        // ==================== Stress Test 1: Full Table Scan with Scalar UDF ====================
+        log.info("Test 1: Full table scan with scalar UDF (${TOTAL_ROWS} rows)")
+        
+        def test1Start = System.currentTimeMillis()
+        def result1 = sql """
+        SELECT COUNT(*), AVG(final_price)
+        FROM (
+            SELECT id, py_calc_final_price(price, discount) AS final_price
+            FROM python_udf_stress_table
+        ) t;
+        """
+        def test1End = System.currentTimeMillis()
+        def test1Time = test1End - test1Start
+        log.info("  Result: ${result1[0]}")
+        log.info("  Time: ${test1Time} ms")
+        log.info("  Throughput: ${String.format('%.2f', TOTAL_ROWS / (test1Time / 1000.0))} rows/sec")
+        
+        // ==================== Stress Test 2: Full Table Scan with Vector UDF ====================
+        log.info("Test 2: Full table scan with vector UDF (${TOTAL_ROWS} rows)")
+        
+        def test2Start = System.currentTimeMillis()
+        def result2 = sql """
+        SELECT COUNT(*), AVG(result)
+        FROM (
+            SELECT id, py_vec_multiply(value1, 10) AS result
+            FROM python_udf_stress_table
+        ) t;
+        """
+        def test2End = System.currentTimeMillis()
+        def test2Time = test2End - test2Start
+        log.info("  Result: ${result2[0]}")
+        log.info("  Time: ${test2Time} ms")
+        log.info("  Throughput: ${String.format('%.2f', TOTAL_ROWS / (test2Time / 1000.0))} rows/sec")
+        log.info("  Speedup vs Scalar: ${String.format('%.2f', test1Time / (test2Time * 1.0))}x")
+        
+        // ==================== Stress Test 3: String Processing with Vector UDF ====================
+        log.info("Test 3: String processing with vector UDF (${TOTAL_ROWS} rows)")
+        
+        def test3Start = System.currentTimeMillis()
+        def result3 = sql """
+        SELECT COUNT(DISTINCT upper_name)
+        FROM (
+            SELECT py_vec_upper(name) AS upper_name
+            FROM python_udf_stress_table
+        ) t;
+        """
+        def test3End = System.currentTimeMillis()
+        def test3Time = test3End - test3Start
+        log.info("  Result: ${result3[0]}")
+        log.info("  Time: ${test3Time} ms")
+        log.info("  Throughput: ${String.format('%.2f', TOTAL_ROWS / (test3Time / 1000.0))} rows/sec")
+        
+        // ==================== Stress Test 4: Complex Aggregation ====================
+        log.info("Test 4: Complex aggregation with multiple UDFs")
+        
+        def test4Start = System.currentTimeMillis()
+        def result4 = sql """
+        SELECT 
+            category,
+            COUNT(*) AS cnt,
+            AVG(py_calc_final_price(price, discount)) AS avg_final_price,
+            AVG(py_vec_multiply(value1, 10)) AS avg_multiplied
+        FROM python_udf_stress_table
+        GROUP BY category
+        ORDER BY category
+        LIMIT 20;
+        """
+        def test4End = System.currentTimeMillis()
+        def test4Time = test4End - test4Start
+        log.info("  Processed ${result4.size()} groups")
+        log.info("  Time: ${test4Time} ms")
+        
+        // ==================== Stress Test 5: Join with UDF ====================
+        log.info("Test 5: Self-join with UDF (limited to 1M rows)")
+        
+        def test5Start = System.currentTimeMillis()
+        def result5 = sql """
+        SELECT COUNT(*)
+        FROM (
+            SELECT 
+                a.id,
+                py_vec_multiply(a.value1, b.value2) AS result
+            FROM python_udf_stress_table a
+            JOIN python_udf_stress_table b ON a.category = b.category
+            WHERE a.id < 100000 AND b.id < 100000
+        ) t;
+        """
+        def test5End = System.currentTimeMillis()
+        def test5Time = test5End - test5Start
+        log.info("  Result: ${result5[0]}")
+        log.info("  Time: ${test5Time} ms")
+        
+        // ==================== Stress Test 6: Concurrent UDF Calls ====================
+        log.info("Test 6: Multiple UDFs in single query")
+        
+        def test6Start = System.currentTimeMillis()
+        def result6 = sql """
+        SELECT COUNT(*)
+        FROM (
+            SELECT 
+                id,
+                py_calc_final_price(price, discount) AS final_price,
+                py_extract_domain(email) AS domain,
+                py_vec_multiply(value1, 5) AS vec_result,
+                py_vec_upper(name) AS upper_name
+            FROM python_udf_stress_table
+            LIMIT 500000
+        ) t;
+        """
+        def test6End = System.currentTimeMillis()
+        def test6Time = test6End - test6Start
+        log.info("  Result: ${result6[0]}")
+        log.info("  Time: ${test6Time} ms")
+        log.info("  Throughput: ${String.format('%.2f', 500000 / (test6Time / 1000.0))} rows/sec")
+        
+        // ==================== Stress Test 7: Filter with UDF ====================
+        log.info("Test 7: Filter with UDF predicate")
+        
+        def test7Start = System.currentTimeMillis()
+        def result7 = sql """
+        SELECT COUNT(*)
+        FROM python_udf_stress_table
+        WHERE py_vec_multiply(value1, 2) > 1000
+        LIMIT 100000;
+        """
+        def test7End = System.currentTimeMillis()
+        def test7Time = test7End - test7Start
+        log.info("  Result: ${result7[0]}")
+        log.info("  Time: ${test7Time} ms")
+        
+        // ==================== Final Summary ====================
+        log.info("=" * 80 + "\nSTRESS TEST SUMMARY\n" + "=" * 80 + "\nDataset: ${TOTAL_ROWS} rows\n\n" +
+            "Test Results:\n" +
+            "  1. Scalar UDF full scan:        ${test1Time} ms (${String.format('%.2f', TOTAL_ROWS / (test1Time / 1000.0))} rows/sec)\n" +
+            "  2. Vector UDF full scan:        ${test2Time} ms (${String.format('%.2f', TOTAL_ROWS / (test2Time / 1000.0))} rows/sec)\n" +
+            "  3. Vector string processing:    ${test3Time} ms (${String.format('%.2f', TOTAL_ROWS / (test3Time / 1000.0))} rows/sec)\n" +
+            "  4. Complex aggregation:         ${test4Time} ms\n" +
+            "  5. Join with UDF:               ${test5Time} ms\n" +
+            "  6. Multiple UDFs:               ${test6Time} ms (${String.format('%.2f', 500000 / (test6Time / 1000.0))} rows/sec)\n" +
+            "  7. Filter with UDF:             ${test7Time} ms\n\n" +
+            "Performance Metrics:\n" +
+            "  Vector vs Scalar speedup:       ${String.format('%.2fx', test1Time / (test2Time * 1.0))}\n" +
+            "  Total test time:                ${(test7End - test1Start) / 1000.0} seconds\n" +
+            "=" * 80)
+        
+    } finally {
+        // Cleanup
+        log.info("Cleaning up stress test resources...")
+        
+        try_sql("DROP FUNCTION IF EXISTS py_calc_final_price(DOUBLE, DOUBLE);")
+        try_sql("DROP FUNCTION IF EXISTS py_extract_domain(STRING);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_multiply(INT, INT);")
+        try_sql("DROP FUNCTION IF EXISTS py_vec_upper(STRING);")
+        
+        try_sql("DROP TABLE IF EXISTS python_udf_stress_table;")
+        try_sql("DROP DATABASE IF EXISTS test_pythonudf_stress;")
+        log.info("Stress test cleanup completed.")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/test_pythonudf_string.groovy b/regression-test/suites/pythonudf_p0/test_pythonudf_string.groovy
new file mode 100644
index 0000000..358023d
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/test_pythonudf_string.groovy

@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_string") {
+    def tableName = "test_pythonudf_string"
+    def pyPath = """${context.file.parent}/udf_scripts/pyudf.zip"""
+    scp_udf_file_to_all_be(pyPath)
+    def runtime_version = "3.10.12"
+    log.info("Python Zip path: ${pyPath}".toString())
+    try {
+        sql """ DROP TABLE IF EXISTS test_pythonudf_string """
+        sql """ DROP TABLE IF EXISTS test_pythonudf_string_2 """
+        sql """
+        CREATE TABLE IF NOT EXISTS test_pythonudf_string (
+            `user_id`     INT         NOT NULL COMMENT "用户id",
+            `char_col`    CHAR        NOT NULL COMMENT "",
+            `varchar_col` VARCHAR(10) NOT NULL COMMENT "",
+            `string_col`  STRING      NOT NULL COMMENT ""
+            )
+            DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+        """
+        StringBuilder sb = new StringBuilder()
+        int i = 1
+        for (; i < 9; i ++) {
+            sb.append("""
+                (${i}, '${i}','abcdefg${i}','poiuytre${i}abcdefg'),
+            """)
+        }
+        sb.append("""
+                (${i}, '${i}','abcdefg${i}','poiuytre${i}abcdefg')
+            """)
+        sql """ INSERT INTO test_pythonudf_string VALUES
+             ${sb.toString()}
+            """
+        sql """ create table test_pythonudf_string_2 like test_pythonudf_string """
+        sql """ insert into test_pythonudf_string_2 select * from test_pythonudf_string; """
+        qt_select_default """ SELECT * FROM test_pythonudf_string t ORDER BY user_id; """
+        qt_select_default_2 """ SELECT * FROM test_pythonudf_string_2 t ORDER BY user_id; """
+
+        File path = new File(pyPath)
+        if (!path.exists()) {
+            throw new IllegalStateException("""${pyPath} doesn't exist! """)
+        }
+
+        sql """ CREATE FUNCTION python_udf_string_test(string, int, int) RETURNS string PROPERTIES (
+            "file"="file://${pyPath}",
+            "symbol"="string_test.evaluate",
+            "type"="PYTHON_UDF",
+            "always_nullable" = "true",
+            "runtime_version" = "${runtime_version}"
+        ); """
+
+        qt_select """ SELECT python_udf_string_test(varchar_col, 2, 3) result FROM test_pythonudf_string ORDER BY result; """
+        qt_select """ SELECT python_udf_string_test(string_col, 2, 3)  result FROM test_pythonudf_string ORDER BY result; """
+        qt_select """ SELECT python_udf_string_test('abcdef', 2, 3), python_udf_string_test('abcdefg', 2, 3) result FROM test_pythonudf_string ORDER BY result; """
+
+        qt_select_4 """ 
+            SELECT
+                COALESCE(
+                    python_udf_string_test(test_pythonudf_string.varchar_col, 2, 3),
+                    'not1'
+                ),
+                COALESCE(
+                    python_udf_string_test(test_pythonudf_string.varchar_col, 2, 3),
+                    'not2'
+                )
+            FROM
+                test_pythonudf_string
+                JOIN test_pythonudf_string_2 ON test_pythonudf_string.user_id = test_pythonudf_string_2.user_id order by 1,2;
+        """
+    } finally {
+        try_sql("DROP FUNCTION IF EXISTS python_udf_string_test(string, int, int);")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_string")
+        try_sql("DROP TABLE IF EXISTS test_pythonudf_string_2")
+    }
+}

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/array_int_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/array_int_test.py
new file mode 100644
index 0000000..ef30209
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/array_int_test.py

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(res):
+    value = 0
+    for data in res:
+        if data is not None:
+            value += data
+    return value
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_int_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_int_test.py
new file mode 100644
index 0000000..7781d78
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_int_test.py

@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(res):
+    value = 0
+    for data in res:
+        if data is not None:
+            value += data
+    result = []
+    result.append(value)
+    return result
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_string_test.py
new file mode 100644
index 0000000..92864bc
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/array_return_array_string_test.py

@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(res):
+    value = ""
+    for data in res:
+        if data is not None:
+            value += data
+    result = []
+    result.append(value)
+    return result
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/array_string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/array_string_test.py
new file mode 100644
index 0000000..ede02c1
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/array_string_test.py

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(res):
+    value = ""
+    for data in res:
+        if data is not None:
+            value += data
+    return value
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/assert_equal_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/assert_equal_test.py
new file mode 100644
index 0000000..43501d1
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/assert_equal_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(val1, val2):
+    if val1 != val2:
+        raise RuntimeError("Assertion Not Met :: ! ( " + str(val1) + " == " + str(val2) + " )")
+    else:
+        return str(val1) + " == " + str(val2)
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/assert_lessthan_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/assert_lessthan_test.py
new file mode 100644
index 0000000..b4ca8ff
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/assert_lessthan_test.py

@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(smaller, bigger):
+    if smaller is None or bigger is None:
+        raise RuntimeError("Null values found :: " + str(smaller) + " < " + str(bigger))
+    if not (smaller < bigger):
+        raise RuntimeError("Assertion Not Met :: ! ( " + str(smaller) + " < " + str(bigger) + " )")
+    else:
+        return str(smaller) + " < " + str(bigger)
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/boolean_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/boolean_test.py
new file mode 100644
index 0000000..b6443e3
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/boolean_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(arg):
+    if arg is True:
+        return False
+    else:
+        return True
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/double_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/double_test.py
new file mode 100644
index 0000000..8667bc0
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/double_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(arg1, arg2):
+    return arg1 + arg2
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/float_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/float_test.py
new file mode 100644
index 0000000..3b2d726f
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/float_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(arg1, arg2):
+    return arg1 - arg2
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/int_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/int_test.py
new file mode 100644
index 0000000..b96f6b0
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/int_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(arg):
+    return int(arg + 1)
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_int_double_ret_string_string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_int_double_ret_string_string_test.py
new file mode 100644
index 0000000..f8be7d9
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_int_double_ret_string_string_test.py

@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(i, d):
+    ans = {}
+    ans["114" + str(i)] = "514" + str(d)
+    return ans
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_int_int_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_int_int_test.py
new file mode 100644
index 0000000..87e27ec
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_int_int_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(hashMap):
+    mul = 0
+    for key, value in hashMap.items():
+        mul += key * value
+    return mul
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_double_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_double_test.py
new file mode 100644
index 0000000..3fc0028
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_double_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(mid):
+    ans = {}
+    for key, value in mid.items():
+        ans[key * 10] = value * 10
+    return ans
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_int_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_int_test.py
new file mode 100644
index 0000000..5e57f3d
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_int_int_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(mii):
+    ans = {}
+    for key, value in mii.items():
+        ans[key * 10] = value * 10
+    return ans
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_string_string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_string_string_test.py
new file mode 100644
index 0000000..b6eb3a3
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_ret_string_string_test.py

@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(mp):
+    ans = {}
+    for key, value in mp.items():
+        ans[key + "114"] = value + "514"
+    return ans
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/map_string_string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/map_string_string_test.py
new file mode 100644
index 0000000..2121c92
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/map_string_string_test.py

@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(hashMap):
+    sb = []
+    sortSet = set()
+
+    for key, value in hashMap.items():
+        sortSet.add(key + value)
+
+    for item in sorted(sortSet):
+        sb.append(item)
+
+    ans = ''.join(sb)
+    return ans
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_array_type.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_array_type.py
new file mode 100644
index 0000000..6d8af80
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_array_type.py

@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def array_to_csv_impl(int_arr, str_arr, nested_arr):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(safe_str(item) for item in arr) + ']'
+    
+    def format_nested_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(format_array(inner) for inner in arr) + ']'
+    
+    parts = [
+        format_array(int_arr),
+        format_array(str_arr),
+        format_nested_array(nested_arr)
+    ]
+    return '|'.join(parts)

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_data_type.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_data_type.py
new file mode 100644
index 0000000..4786e97
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_data_type.py

@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def row_to_csv_all_impl(
+    bool_col, tinyint_col, smallint_col, int_col, bigint_col, largeint_col,
+    float_col, double_col, decimal32_col, decimal64_col, decimal128_col,
+    date_col, datetime_col, char_col, varchar_col, string_col
+):
+    cols = [
+        bool_col, tinyint_col, smallint_col, int_col, bigint_col, largeint_col,
+        float_col, double_col, decimal32_col, decimal64_col, decimal128_col,
+        date_col, datetime_col, char_col, varchar_col, string_col
+    ]
+    
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    return ','.join(safe_str(col) for col in cols)
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_map_type.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_map_type.py
new file mode 100644
index 0000000..bd6f099
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_map_type.py

@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def map_to_csv_impl(map1, map2):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_map(m):
+        if m is None:
+            return 'NULL'
+        # Doris passes MAP as Python dict
+        items = [f"{safe_str(k)}:{safe_str(v)}" for k, v in m.items()]
+        return '{' + ','.join(sorted(items)) + '}'
+    
+    return '|'.join([format_map(map1), format_map(map2)])
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_module_test.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_module_test.zip
new file mode 100644
index 0000000..6dc6d95
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_module_test.zip
Binary files differ

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.py
new file mode 100644
index 0000000..95de4dc
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.py

@@ -0,0 +1,413 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Scalar Python UDF operations - row-by-row processing
+"""
+
+import math
+import re
+from datetime import datetime, timedelta
+from decimal import Decimal
+
+
+# ==================== Numeric Operations ====================
+
+def add_three_numbers(a, b, c):
+    """Add three numbers"""
+    if a is None or b is None or c is None:
+        return None
+    return a + b + c
+
+
+def multiply_with_default(a, b, default=1):
+    """Multiply two numbers, return default if any is None"""
+    if a is None or b is None:
+        return default
+    return a * b
+
+
+def safe_divide_with_precision(numerator, denominator, precision=2):
+    """Safe division with specified decimal precision"""
+    if numerator is None or denominator is None or denominator == 0:
+        return None
+    result = numerator / denominator
+    return round(result, precision)
+
+
+def calculate_discount_price(original_price, discount_percent):
+    """Calculate price after discount"""
+    if original_price is None or discount_percent is None:
+        return None
+    if discount_percent < 0 or discount_percent > 100:
+        return original_price
+    return original_price * (1 - discount_percent / 100)
+
+
+def compound_interest(principal, rate, years):
+    """Calculate compound interest: P * (1 + r)^t"""
+    if principal is None or rate is None or years is None:
+        return None
+    if principal <= 0 or rate < 0 or years < 0:
+        return None
+    return principal * math.pow(1 + rate / 100, years)
+
+
+def calculate_bmi(weight_kg, height_m):
+    """Calculate Body Mass Index"""
+    if weight_kg is None or height_m is None or height_m <= 0:
+        return None
+    return round(weight_kg / (height_m * height_m), 2)
+
+
+def fibonacci(n):
+    """Calculate nth Fibonacci number"""
+    if n is None or n < 0:
+        return None
+    if n <= 1:
+        return n
+    a, b = 0, 1
+    for _ in range(2, n + 1):
+        a, b = b, a + b
+    return b
+
+
+def is_prime(n):
+    """Check if a number is prime"""
+    if n is None or n < 2:
+        return False
+    if n == 2:
+        return True
+    if n % 2 == 0:
+        return False
+    for i in range(3, int(math.sqrt(n)) + 1, 2):
+        if n % i == 0:
+            return False
+    return True
+
+
+def gcd(a, b):
+    """Calculate Greatest Common Divisor"""
+    if a is None or b is None:
+        return None
+    a, b = abs(a), abs(b)
+    while b:
+        a, b = b, a % b
+    return a
+
+
+def lcm(a, b):
+    """Calculate Least Common Multiple"""
+    if a is None or b is None or a == 0 or b == 0:
+        return None
+    return abs(a * b) // gcd(a, b)
+
+
+# ==================== String Operations ====================
+
+def reverse_string(s):
+    """Reverse a string"""
+    if s is None:
+        return None
+    return s[::-1]
+
+
+def count_vowels(s):
+    """Count number of vowels in a string"""
+    if s is None:
+        return None
+    vowels = 'aeiouAEIOU'
+    return sum(1 for char in s if char in vowels)
+
+
+def count_words(s):
+    """Count number of words in a string"""
+    if s is None:
+        return None
+    return len(s.split())
+
+
+def string_length_custom(s):
+    """Calculate string length (custom implementation for testing)"""
+    if s is None:
+        return None
+    return len(s)
+
+
+def capitalize_words(s):
+    """Capitalize first letter of each word"""
+    if s is None:
+        return None
+    return ' '.join(word.capitalize() for word in s.split())
+
+
+def remove_whitespace(s):
+    """Remove all whitespace from string"""
+    if s is None:
+        return None
+    return ''.join(s.split())
+
+
+def extract_numbers(s):
+    """Extract all numbers from string and concatenate"""
+    if s is None:
+        return None
+    numbers = re.findall(r'\d+', s)
+    return ','.join(numbers) if numbers else ''
+
+
+def is_palindrome(s):
+    """Check if string is a palindrome (case-insensitive)"""
+    if s is None:
+        return None
+    cleaned = ''.join(c.lower() for c in s if c.isalnum())
+    return cleaned == cleaned[::-1]
+
+
+def string_similarity(s1, s2):
+    """Calculate simple string similarity (0-100)"""
+    if s1 is None or s2 is None:
+        return None
+    if s1 == s2:
+        return 100.0
+    # Simple character overlap ratio
+    set1, set2 = set(s1.lower()), set(s2.lower())
+    if not set1 or not set2:
+        return 0.0
+    intersection = len(set1 & set2)
+    union = len(set1 | set2)
+    return round(intersection / union * 100, 2)
+
+
+def mask_email(email):
+    """Mask email address: user@domain.com -> u***@domain.com"""
+    if email is None or '@' not in email:
+        return None
+    parts = email.split('@')
+    if len(parts[0]) <= 1:
+        return email
+    masked_user = parts[0][0] + '***'
+    return f"{masked_user}@{parts[1]}"
+
+
+def extract_domain(email):
+    """Extract domain from email address"""
+    if email is None or '@' not in email:
+        return None
+    return email.split('@')[1]
+
+
+def truncate_string(s, max_length, suffix='...'):
+    """Truncate string to max length with suffix"""
+    if s is None:
+        return None
+    if len(s) <= max_length:
+        return s
+    return s[:max_length - len(suffix)] + suffix
+
+
+# ==================== Date/Time Operations ====================
+
+def days_between_dates(date1_str, date2_str):
+    """Calculate days between two dates (YYYY-MM-DD format)"""
+    if date1_str is None or date2_str is None:
+        return None
+    try:
+        d1 = datetime.strptime(str(date1_str), '%Y-%m-%d')
+        d2 = datetime.strptime(str(date2_str), '%Y-%m-%d')
+        return abs((d2 - d1).days)
+    except:
+        return None
+
+
+def is_weekend(date_str):
+    """Check if date is weekend (Saturday or Sunday)"""
+    if date_str is None:
+        return None
+    try:
+        date = datetime.strptime(str(date_str), '%Y-%m-%d')
+        return date.weekday() >= 5  # 5=Saturday, 6=Sunday
+    except:
+        return None
+
+
+def get_quarter(date_str):
+    """Get quarter (1-4) from date"""
+    if date_str is None:
+        return None
+    try:
+        date = datetime.strptime(str(date_str), '%Y-%m-%d')
+        return (date.month - 1) // 3 + 1
+    except:
+        return None
+
+
+def age_in_years(birth_date_str, current_date_str):
+    """Calculate age in years"""
+    if birth_date_str is None or current_date_str is None:
+        return None
+    try:
+        birth = datetime.strptime(str(birth_date_str), '%Y-%m-%d')
+        current = datetime.strptime(str(current_date_str), '%Y-%m-%d')
+        age = current.year - birth.year
+        if (current.month, current.day) < (birth.month, birth.day):
+            age -= 1
+        return age
+    except:
+        return None
+
+
+# ==================== Boolean/Conditional Operations ====================
+
+def is_in_range(value, min_val, max_val):
+    """Check if value is in range [min_val, max_val]"""
+    if value is None or min_val is None or max_val is None:
+        return None
+    return min_val <= value <= max_val
+
+
+def xor_operation(a, b):
+    """XOR operation on two booleans"""
+    if a is None or b is None:
+        return None
+    return (a or b) and not (a and b)
+
+
+def all_true(*args):
+    """Check if all arguments are True"""
+    if any(arg is None for arg in args):
+        return None
+    return all(args)
+
+
+def any_true(*args):
+    """Check if any argument is True"""
+    if any(arg is None for arg in args):
+        return None
+    return any(args)
+
+
+def count_true(*args):
+    """Count number of True values"""
+    if any(arg is None for arg in args):
+        return None
+    return sum(1 for arg in args if arg)
+
+
+# ==================== Complex/Mixed Operations ====================
+
+def calculate_grade(score):
+    """Convert numeric score to letter grade"""
+    if score is None:
+        return None
+    if score >= 90:
+        return 'A'
+    elif score >= 80:
+        return 'B'
+    elif score >= 70:
+        return 'C'
+    elif score >= 60:
+        return 'D'
+    else:
+        return 'F'
+
+
+def categorize_age(age):
+    """Categorize age into groups"""
+    if age is None:
+        return None
+    if age < 0:
+        return 'Invalid'
+    elif age < 13:
+        return 'Child'
+    elif age < 20:
+        return 'Teenager'
+    elif age < 60:
+        return 'Adult'
+    else:
+        return 'Senior'
+
+
+def calculate_tax(income, tax_rate):
+    """Calculate tax with progressive rates"""
+    if income is None or tax_rate is None:
+        return None
+    if income <= 0:
+        return 0.0
+    return round(income * tax_rate / 100, 2)
+
+
+def format_phone_number(phone):
+    """Format phone number: 1234567890 -> (123) 456-7890"""
+    if phone is None:
+        return None
+    digits = ''.join(c for c in str(phone) if c.isdigit())
+    if len(digits) != 10:
+        return phone
+    return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
+
+
+def validate_credit_card_luhn(card_number):
+    """Validate credit card using Luhn algorithm"""
+    if card_number is None:
+        return False
+    digits = [int(d) for d in str(card_number) if d.isdigit()]
+    if not digits:
+        return False
+    
+    checksum = 0
+    for i, digit in enumerate(reversed(digits)):
+        if i % 2 == 1:
+            digit *= 2
+            if digit > 9:
+                digit -= 9
+        checksum += digit
+    return checksum % 10 == 0
+
+
+def json_extract_value(json_str, key):
+    """Extract value from simple JSON string"""
+    if json_str is None or key is None:
+        return None
+    try:
+        import json
+        data = json.loads(json_str)
+        return str(data.get(key, ''))
+    except:
+        return None
+
+
+def levenshtein_distance(s1, s2):
+    """Calculate Levenshtein distance between two strings"""
+    if s1 is None or s2 is None:
+        return None
+    if len(s1) < len(s2):
+        return levenshtein_distance(s2, s1)
+    if len(s2) == 0:
+        return len(s1)
+    
+    previous_row = range(len(s2) + 1)
+    for i, c1 in enumerate(s1):
+        current_row = [i + 1]
+        for j, c2 in enumerate(s2):
+            insertions = previous_row[j + 1] + 1
+            deletions = current_row[j] + 1
+            substitutions = previous_row[j] + (c1 != c2)
+            current_row.append(min(insertions, deletions, substitutions))
+        previous_row = current_row
+    
+    return previous_row[-1]

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.zip
new file mode 100644
index 0000000..15192ef
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_scalar_ops.zip
Binary files differ

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_struct_type.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_struct_type.py
new file mode 100644
index 0000000..b785691
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_struct_type.py

@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def struct_to_csv_impl(person, point):
+    def safe_str(x):
+        return 'NULL' if x is None else str(x)
+    
+    def format_array(arr):
+        if arr is None:
+            return 'NULL'
+        return '[' + ','.join(safe_str(item) for item in arr) + ']'
+    
+    def format_struct_dict(s, field_names):
+        if s is None:
+            return 'NULL'
+        parts = []
+        for field in field_names:
+            val = s.get(field)
+            parts.append(safe_str(val))
+        return '(' + ','.join(parts) + ')'
+    
+    person_str = format_struct_dict(person, ['name', 'age', 'salary'])
+    
+    if point is None:
+        point_str = 'NULL'
+    else:
+        x_val = safe_str(point.get('x'))
+        y_val = safe_str(point.get('y'))
+        tags_val = format_array(point.get('tags'))
+        point_str = f"({x_val},{y_val},{tags_val})"
+    
+    return '|'.join([person_str, point_str])
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.py b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.py
new file mode 100644
index 0000000..31dd411
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.py

@@ -0,0 +1,168 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Vector Python UDF operations using pandas.Series
+"""
+
+import pandas as pd
+import numpy as np
+
+
+def add_constant(a: pd.Series, constant: pd.Series) -> pd.Series:
+    """Add a constant to series"""
+    # constant is a series but we use the first value
+    const_val = constant.iloc[0] if len(constant) > 0 else 0
+    return a + const_val
+
+
+def multiply_by_constant(a: pd.Series, constant: pd.Series) -> pd.Series:
+    """Multiply series by a constant"""
+    const_val = constant.iloc[0] if len(constant) > 0 else 1
+    return a * const_val
+
+
+def calculate_discount(price: pd.Series, discount_percent: pd.Series) -> pd.Series:
+    """Calculate price after discount"""
+    return price * (1 - discount_percent)
+
+
+def string_length(s: pd.Series) -> pd.Series:
+    """Calculate length of each string in series"""
+    return s.str.len()
+
+
+def to_uppercase(s: pd.Series) -> pd.Series:
+    """Convert strings to uppercase"""
+    return s.str.upper()
+
+
+def vec_add_with_constant(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Add two series and add a constant"""
+    return a + b + 100
+
+
+def vec_multiply_and_round(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Multiply two series and round to 2 decimal places"""
+    return (a * b).round(2)
+
+
+def vec_string_concat_with_separator(s1: pd.Series, s2: pd.Series) -> pd.Series:
+    """Concatenate two string series with a separator"""
+    return s1 + ' | ' + s2
+
+
+def vec_string_title_case(s: pd.Series) -> pd.Series:
+    """Convert string series to title case"""
+    return s.str.title()
+
+
+def vec_conditional_value(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Return a if a > b, else return b"""
+    return pd.Series(np.where(a > b, a, b))
+
+
+def vec_percentage_calculation(part: pd.Series, total: pd.Series) -> pd.Series:
+    """Calculate percentage: (part / total) * 100"""
+    return (part / total * 100).round(2)
+
+
+def vec_is_in_range(value: pd.Series, min_val: pd.Series, max_val: pd.Series) -> pd.Series:
+    """Check if value is between min_val and max_val"""
+    return (value >= min_val) & (value <= max_val)
+
+
+def vec_safe_divide(numerator: pd.Series, denominator: pd.Series) -> pd.Series:
+    """Safe division, return 0 when denominator is 0 or None"""
+    result = numerator / denominator
+    # Replace inf and -inf with 0
+    result = result.replace([np.inf, -np.inf], 0)
+    # Fill NaN with 0
+    return result.fillna(0)
+
+
+def vec_exponential_decay(value: pd.Series, days: pd.Series) -> pd.Series:
+    """Calculate exponential decay: value * exp(-days/30)"""
+    return value * np.exp(-days / 30.0)
+
+
+def vec_string_extract_first_word(s: pd.Series) -> pd.Series:
+    """Extract the first word from a string"""
+    return s.str.split().str[0]
+
+
+def vec_normalize_to_range(value: pd.Series) -> pd.Series:
+    """Normalize values to 0-1 range using min-max normalization"""
+    min_val = value.min()
+    max_val = value.max()
+    if max_val == min_val:
+        return pd.Series([0.5] * len(value))
+    return (value - min_val) / (max_val - min_val)
+
+
+def vec_moving_average(value: pd.Series) -> pd.Series:
+    """Calculate 3-point moving average"""
+    return value.rolling(window=3, min_periods=1).mean()
+
+
+def vec_z_score(value: pd.Series) -> pd.Series:
+    """Calculate z-score: (value - mean) / std"""
+    mean = value.mean()
+    std = value.std()
+    if std == 0 or pd.isna(std):
+        return pd.Series([0.0] * len(value))
+    return (value - mean) / std
+
+
+def vec_clip_values(value: pd.Series, min_val: pd.Series, max_val: pd.Series) -> pd.Series:
+    """Clip values to be within min_val and max_val"""
+    return value.clip(lower=min_val, upper=max_val)
+
+
+def vec_boolean_and(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Logical AND operation on two boolean series"""
+    return a & b
+
+
+def vec_boolean_or(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Logical OR operation on two boolean series"""
+    return a | b
+
+
+def vec_string_contains(s: pd.Series, pattern: pd.Series) -> pd.Series:
+    """Check if string contains pattern (case-insensitive)"""
+    # For simplicity, use the first pattern value for all rows
+    if len(pattern) > 0 and not pd.isna(pattern.iloc[0]):
+        pattern_str = str(pattern.iloc[0])
+        return s.str.contains(pattern_str, case=False, na=False)
+    return pd.Series([False] * len(s))
+
+
+def vec_abs_difference(a: pd.Series, b: pd.Series) -> pd.Series:
+    """Calculate absolute difference between two series"""
+    return (a - b).abs()
+
+
+def vec_power(base: pd.Series, exponent: pd.Series) -> pd.Series:
+    """Calculate base raised to the power of exponent"""
+    return base ** exponent
+
+
+def vec_log_transform(value: pd.Series) -> pd.Series:
+    """Calculate natural logarithm, return 0 for non-positive values"""
+    result = np.log(value)
+    return result.replace([np.inf, -np.inf], 0).fillna(0)

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.zip b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.zip
new file mode 100644
index 0000000..3efd381
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/python_udf_vector_ops.zip
Binary files differ

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/pyudf.zip b/regression-test/suites/pythonudf_p0/udf_scripts/pyudf.zip
new file mode 100644
index 0000000..b4ed70a
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/pyudf.zip
Binary files differ

diff --git a/regression-test/suites/pythonudf_p0/udf_scripts/string_test.py b/regression-test/suites/pythonudf_p0/udf_scripts/string_test.py
new file mode 100644
index 0000000..3505617
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udf_scripts/string_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(arg1, a, b):
+    return arg1[:a] + "*" * (len(arg1) - a - b) + arg1[-b:]
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/array_int_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/array_int_test.py
new file mode 100644
index 0000000..78c1fce
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/array_int_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(count):
+    for i in range(3):
+        yield [1, 2, 3]
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/array_string_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/array_string_test.py
new file mode 100644
index 0000000..7fb1f02
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/array_string_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(count):
+    for i in range(3):
+        yield ['Hi', 'DataMind', 'Good']
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/double_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/double_test.py
new file mode 100644
index 0000000..275c493
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/double_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(val):
+    yield val * 10
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/float_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/float_test.py
new file mode 100644
index 0000000..de321ba
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/float_test.py

@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(val):
+    yield val - 10
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/int_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/int_test.py
new file mode 100644
index 0000000..15ccedb
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/int_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(count):
+    for i in range(3):
+        yield count
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/map_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/map_test.py
new file mode 100644
index 0000000..290da85
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/map_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(val):
+    for i in range(3):
+        yield val
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/string_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/string_test.py
new file mode 100644
index 0000000..78939b8
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/string_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(value, separator):
+    for part in value.split(separator):
+        yield part
\ No newline at end of file

diff --git a/regression-test/suites/pythonudf_p0/udtf_scripts/struct_test.py b/regression-test/suites/pythonudf_p0/udtf_scripts/struct_test.py
new file mode 100644
index 0000000..1a93ba3
--- /dev/null
+++ b/regression-test/suites/pythonudf_p0/udtf_scripts/struct_test.py

@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+def evaluate(val):
+    for i in range(3):
+        yield 1, 0.112, "Hello, DataMind"
\ No newline at end of file
commit	426abc004959e3149fb8948293ae6a7bf04f1efa	[log] [tgz]
author	yangshijie <sjyang2022@zju.edu.cn>	Mon Nov 10 18:05:27 2025 +0800
committer	GitHub <noreply@github.com>	Mon Nov 10 18:05:27 2025 +0800
tree	661707d4d1527fa093ae8b0e75402c1e1abecd02
parent	fd47c36ceb87b809db4d58653db9006e1cad7196 [diff]