blob: 57cd3149547e9ae2730baadf079961d283d49ae9 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/user_function_cache.h"
#include <gtest/gtest.h>
#include <minizip/zip.h>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <string>
#include <vector>
#include "common/status.h"
namespace doris {
class UserFunctionCacheTest : public ::testing::Test {
protected:
UserFunctionCache ufc;
std::string test_dir_;
void SetUp() override {
// Save original DORIS_HOME
original_doris_home_ = getenv("DORIS_HOME");
// Create a unique test directory
test_dir_ = std::filesystem::temp_directory_path().string() + "/ufc_test_" +
std::to_string(getpid());
std::filesystem::create_directories(test_dir_);
}
void TearDown() override {
// Restore original DORIS_HOME
if (original_doris_home_) {
setenv("DORIS_HOME", original_doris_home_, 1);
} else {
unsetenv("DORIS_HOME");
}
// Clean up test directory
if (!test_dir_.empty() && std::filesystem::exists(test_dir_)) {
std::filesystem::remove_all(test_dir_);
}
}
// Helper function to create a zip file with specified files
bool create_zip_file(const std::string& zip_path,
const std::vector<std::pair<std::string, std::string>>& files) {
zipFile zf = zipOpen(zip_path.c_str(), APPEND_STATUS_CREATE);
if (zf == nullptr) {
return false;
}
for (const auto& [filename, content] : files) {
zip_fileinfo zi = {};
if (zipOpenNewFileInZip(zf, filename.c_str(), &zi, nullptr, 0, nullptr, 0, nullptr,
Z_DEFLATED, Z_DEFAULT_COMPRESSION) != ZIP_OK) {
zipClose(zf, nullptr);
return false;
}
if (!content.empty()) {
if (zipWriteInFileInZip(zf, content.c_str(),
static_cast<unsigned int>(content.size())) != ZIP_OK) {
zipCloseFileInZip(zf);
zipClose(zf, nullptr);
return false;
}
}
zipCloseFileInZip(zf);
}
zipClose(zf, nullptr);
return true;
}
// Helper function to create a simple text file
void create_text_file(const std::string& path, const std::string& content) {
std::ofstream ofs(path);
ofs << content;
ofs.close();
}
private:
const char* original_doris_home_ = nullptr;
};
TEST_F(UserFunctionCacheTest, SplitStringByChecksumTest) {
// Test valid string format
std::string valid_str =
"7119053928154065546.20c8228267b6c9ce620fddb39467d3eb.postgresql-42.5.0.jar";
auto result = ufc._split_string_by_checksum(valid_str);
ASSERT_EQ(result.size(), 4);
EXPECT_EQ(result[0], "7119053928154065546");
EXPECT_EQ(result[1], "20c8228267b6c9ce620fddb39467d3eb");
EXPECT_EQ(result[2], "postgresql-42.5.0");
EXPECT_EQ(result[3], "jar");
}
// Test _get_real_url method
TEST_F(UserFunctionCacheTest, TestGetRealUrlWithAbsoluteUrl) {
std::string result_url;
// Test with absolute URL (contains ":/")
Status status = ufc._get_real_url("http://example.com/udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "http://example.com/udf.jar");
// Test with S3 URL
status = ufc._get_real_url("s3://bucket/path/udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "s3://bucket/path/udf.jar");
// Test with file URL
status = ufc._get_real_url("file:///path/to/udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "file:///path/to/udf.jar");
// Test with https URL
status = ufc._get_real_url("https://secure.example.com/udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "https://secure.example.com/udf.jar");
}
TEST_F(UserFunctionCacheTest, TestGetRealUrlWithRelativeUrl) {
std::string result_url;
// Set DORIS_HOME for testing
setenv("DORIS_HOME", "/tmp/test_doris", 1);
// Test with relative URL (no ":/" found) - should call _check_and_return_default_java_udf_url
Status status = ufc._get_real_url("my-udf.jar", &result_url);
// This should process successfully in non-cloud mode
EXPECT_TRUE(status.ok() || !result_url.empty());
}
// Test _check_and_return_default_java_udf_url method
TEST_F(UserFunctionCacheTest, TestCheckAndReturnDefaultJavaUdfUrlNonCloudMode) {
// Set DORIS_HOME environment variable for testing
setenv("DORIS_HOME", "/tmp/doris_test", 1);
std::string result_url;
Status status = ufc._check_and_return_default_java_udf_url("test-udf.jar", &result_url);
// In non-cloud mode, should return file:// URL
EXPECT_TRUE(status.ok());
EXPECT_TRUE(result_url.find("file://") == 0);
EXPECT_TRUE(result_url.find("test-udf.jar") != std::string::npos);
EXPECT_TRUE(result_url.find("/plugins/java_udf/") != std::string::npos);
}
TEST_F(UserFunctionCacheTest, TestPathConstruction) {
// Set DORIS_HOME environment variable
setenv("DORIS_HOME", "/test/doris", 1);
std::string result_url;
Status status = ufc._check_and_return_default_java_udf_url("my-udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "file:///test/doris/plugins/java_udf/my-udf.jar");
// Test with nested path
status = ufc._check_and_return_default_java_udf_url("nested/path/udf.jar", &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, "file:///test/doris/plugins/java_udf/nested/path/udf.jar");
}
TEST_F(UserFunctionCacheTest, TestEdgeCases) {
setenv("DORIS_HOME", "/tmp/test", 1);
std::string result_url;
// Test empty URL
Status status = ufc._get_real_url("", &result_url);
// Empty string should be treated as relative URL
EXPECT_TRUE(status.ok());
// Test URL with just colon (no slash after)
status = ufc._get_real_url("invalid:url", &result_url);
// Should be treated as relative URL since it doesn't contain ":/"
EXPECT_TRUE(status.ok());
// Test URL with spaces
status = ufc._get_real_url("my udf.jar", &result_url);
// Should be treated as relative URL
EXPECT_TRUE(status.ok());
}
TEST_F(UserFunctionCacheTest, TestUrlDetectionLogic) {
std::string result_url;
// Test various URL patterns that should be detected as absolute
std::vector<std::string> absolute_urls = {
"http://example.com/file.jar", "https://example.com/file.jar", "s3://bucket/file.jar",
"file:///local/file.jar", "ftp://server/file.jar"};
for (const auto& url : absolute_urls) {
Status status = ufc._get_real_url(url, &result_url);
EXPECT_TRUE(status.ok());
EXPECT_EQ(result_url, url);
}
// Test patterns that should be treated as relative
std::vector<std::string> relative_urls = {"file.jar", "path/file.jar", "invalid:no-slash", ""};
setenv("DORIS_HOME", "/tmp", 1);
for (const auto& url : relative_urls) {
Status status = ufc._get_real_url(url, &result_url);
// Should process through _check_and_return_default_java_udf_url
EXPECT_TRUE(status.ok());
if (!url.empty()) {
EXPECT_TRUE(result_url.find(url) != std::string::npos);
}
}
}
TEST_F(UserFunctionCacheTest, TestConsistentBehavior) {
setenv("DORIS_HOME", "/consistent/test", 1);
std::string result_url1, result_url2;
// Multiple calls should return consistent results
Status status1 = ufc._check_and_return_default_java_udf_url("same-udf.jar", &result_url1);
Status status2 = ufc._check_and_return_default_java_udf_url("same-udf.jar", &result_url2);
EXPECT_TRUE(status1.ok());
EXPECT_TRUE(status2.ok());
EXPECT_EQ(result_url1, result_url2);
}
// _unzip_lib
TEST_F(UserFunctionCacheTest, UnzipLibSuccessWithFiles) {
// Create a zip file with some Python files
std::string zip_path = test_dir_ + "/test_udf.zip";
std::vector<std::pair<std::string, std::string>> files = {{"main.py", "print('hello world')"},
{"utils.py", "def helper(): pass"},
{"config.txt", "config=value"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
ASSERT_TRUE(std::filesystem::exists(zip_path));
// Unzip the file
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
// Verify extracted files
std::string unzip_dir = test_dir_ + "/test_udf";
EXPECT_TRUE(std::filesystem::exists(unzip_dir));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/main.py"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/utils.py"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/config.txt"));
// Verify file content
std::ifstream ifs(unzip_dir + "/main.py");
std::string content((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
EXPECT_EQ(content, "print('hello world')");
}
TEST_F(UserFunctionCacheTest, UnzipLibSuccessWithDirectories) {
// Create a zip file with directories
std::string zip_path = test_dir_ + "/test_dir.zip";
std::vector<std::pair<std::string, std::string>> files = {
{"subdir/", ""}, // Directory entry (ends with /)
{"subdir/script.py", "import os"},
{"subdir/nested/", ""},
{"subdir/nested/deep.py", "x = 1"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
std::string unzip_dir = test_dir_ + "/test_dir";
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/subdir"));
EXPECT_TRUE(std::filesystem::is_directory(unzip_dir + "/subdir"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/subdir/script.py"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/subdir/nested"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/subdir/nested/deep.py"));
}
TEST_F(UserFunctionCacheTest, UnzipLibSkipsMacOSXFiles) {
// Create a zip file with __MACOSX entries (common in Mac-created zips)
std::string zip_path = test_dir_ + "/mac_zip.zip";
std::vector<std::pair<std::string, std::string>> files = {
{"script.py", "print('test')"},
{"__MACOSX/", ""},
{"__MACOSX/._script.py", "mac metadata"},
{"__MACOSX/nested/._file", "more metadata"},
{"real_file.py", "real content"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
std::string unzip_dir = test_dir_ + "/mac_zip";
// Real files should exist
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/script.py"));
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/real_file.py"));
// __MACOSX directory should NOT exist
EXPECT_FALSE(std::filesystem::exists(unzip_dir + "/__MACOSX"));
}
TEST_F(UserFunctionCacheTest, UnzipLibFailsForNonExistentFile) {
std::string non_existent_path = test_dir_ + "/non_existent.zip";
Status status = ufc._unzip_lib(non_existent_path);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.to_string().find("Failed to open zip file") != std::string::npos);
}
TEST_F(UserFunctionCacheTest, UnzipLibFailsForInvalidZipFile) {
// Create a file that is not a valid zip
std::string invalid_zip_path = test_dir_ + "/invalid.zip";
create_text_file(invalid_zip_path, "This is not a zip file content");
Status status = ufc._unzip_lib(invalid_zip_path);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.to_string().find("Failed to open zip file") != std::string::npos);
}
TEST_F(UserFunctionCacheTest, UnzipLibFailsForEmptyFile) {
// Create an empty file
std::string empty_zip_path = test_dir_ + "/empty.zip";
create_text_file(empty_zip_path, "");
Status status = ufc._unzip_lib(empty_zip_path);
EXPECT_FALSE(status.ok());
}
TEST_F(UserFunctionCacheTest, UnzipLibHandlesEmptyZip) {
// Create an empty but valid zip file
std::string zip_path = test_dir_ + "/empty_valid.zip";
std::vector<std::pair<std::string, std::string>> files = {};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
std::string unzip_dir = test_dir_ + "/empty_valid";
EXPECT_TRUE(std::filesystem::exists(unzip_dir));
}
TEST_F(UserFunctionCacheTest, UnzipLibHandlesLargeFileContent) {
// Create a zip with a larger file
std::string zip_path = test_dir_ + "/large.zip";
std::string large_content(100000, 'x'); // 100KB of 'x'
std::vector<std::pair<std::string, std::string>> files = {{"large.py", large_content}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
std::string unzip_dir = test_dir_ + "/large";
std::ifstream ifs(unzip_dir + "/large.py");
std::string content((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
EXPECT_EQ(content.size(), 100000);
}
TEST_F(UserFunctionCacheTest, UnzipLibMultipleFilesInSameDirectory) {
std::string zip_path = test_dir_ + "/multi.zip";
std::vector<std::pair<std::string, std::string>> files = {{"file1.py", "content1"},
{"file2.py", "content2"},
{"file3.py", "content3"},
{"file4.py", "content4"},
{"file5.py", "content5"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._unzip_lib(zip_path);
EXPECT_TRUE(status.ok()) << status.to_string();
std::string unzip_dir = test_dir_ + "/multi";
for (int i = 1; i <= 5; i++) {
EXPECT_TRUE(std::filesystem::exists(unzip_dir + "/file" + std::to_string(i) + ".py"));
}
}
// _check_cache_is_python_udf
TEST_F(UserFunctionCacheTest, CheckCacheIsPythonUdfSuccess) {
// Create a zip file with Python files
std::string zip_name = "python_udf.zip";
std::string zip_path = test_dir_ + "/" + zip_name;
std::vector<std::pair<std::string, std::string>> files = {{"main.py", "def udf(): pass"},
{"helper.py", "def helper(): pass"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._check_cache_is_python_udf(test_dir_, zip_name);
EXPECT_TRUE(status.ok()) << status.to_string();
}
TEST_F(UserFunctionCacheTest, CheckCacheIsPythonUdfFailsNoPythonFile) {
// Create a zip file without Python files
std::string zip_name = "no_python.zip";
std::string zip_path = test_dir_ + "/" + zip_name;
std::vector<std::pair<std::string, std::string>> files = {
{"config.txt", "config=value"}, {"data.json", "{\"key\": \"value\"}"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._check_cache_is_python_udf(test_dir_, zip_name);
EXPECT_FALSE(status.ok());
EXPECT_TRUE(status.to_string().find("No Python file found") != std::string::npos);
}
TEST_F(UserFunctionCacheTest, CheckCacheIsPythonUdfWithNestedPythonFile) {
// Create a zip file with Python file in nested directory
std::string zip_name = "nested_py.zip";
std::string zip_path = test_dir_ + "/" + zip_name;
std::vector<std::pair<std::string, std::string>> files = {
{"subdir/", ""}, {"subdir/module.py", "x = 1"}, {"readme.txt", "readme"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
// Note: The current implementation only checks top-level directory
// This test documents the current behavior
Status status = ufc._check_cache_is_python_udf(test_dir_, zip_name);
EXPECT_FALSE(status.ok());
EXPECT_EQ(status.to_string(),
"[INTERNAL_ERROR]No Python file found in the unzipped directory.");
}
TEST_F(UserFunctionCacheTest, CheckCacheIsPythonUdfInvalidZip) {
// Create an invalid zip file
std::string zip_name = "invalid_py.zip";
std::string zip_path = test_dir_ + "/" + zip_name;
create_text_file(zip_path, "not a zip file");
Status status = ufc._check_cache_is_python_udf(test_dir_, zip_name);
EXPECT_FALSE(status.ok());
}
// _make_lib_file
TEST_F(UserFunctionCacheTest, MakeLibFilePyZip) {
// Initialize the cache with a lib dir
ufc._lib_dir = test_dir_;
std::string lib_file =
ufc._make_lib_file(12345, "abc123checksum", LibType::PY_ZIP, "my_python_udf.zip");
// Expected format: lib_dir/shard/function_id.checksum.file_name
int expected_shard = 12345 % 128;
std::string expected = test_dir_ + "/" + std::to_string(expected_shard) +
"/12345.abc123checksum.my_python_udf.zip";
EXPECT_EQ(lib_file, expected);
}
TEST_F(UserFunctionCacheTest, MakeLibFileJar) {
ufc._lib_dir = test_dir_;
std::string lib_file =
ufc._make_lib_file(99999, "md5checksum", LibType::JAR, "my_java_udf.jar");
int expected_shard = 99999 % 128;
std::string expected =
test_dir_ + "/" + std::to_string(expected_shard) + "/99999.md5checksum.my_java_udf.jar";
EXPECT_EQ(lib_file, expected);
}
TEST_F(UserFunctionCacheTest, MakeLibFileSo) {
ufc._lib_dir = test_dir_;
std::string lib_file = ufc._make_lib_file(100, "somechecksum", LibType::SO, "ignored.so");
int expected_shard = 100 % 128;
// SO files don't include the file_name, just .so extension
std::string expected =
test_dir_ + "/" + std::to_string(expected_shard) + "/100.somechecksum.so";
EXPECT_EQ(lib_file, expected);
}
// _load_entry_from_lib with PY_ZIP
TEST_F(UserFunctionCacheTest, LoadEntryFromLibPyZip) {
// First create a valid Python zip file with correct naming
std::string sub_dir = test_dir_ + "/0";
std::filesystem::create_directories(sub_dir);
// File name format: function_id.checksum.filename.zip
std::string zip_name = "12345.abc123def456abc123def456abc1.my_udf.zip";
std::string zip_path = sub_dir + "/" + zip_name;
std::vector<std::pair<std::string, std::string>> files = {{"main.py", "def udf(): pass"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._load_entry_from_lib(sub_dir, zip_name);
EXPECT_TRUE(status.ok()) << status.to_string();
// Verify entry was added to the map
EXPECT_EQ(ufc._entry_map.count(12345), 1);
}
TEST_F(UserFunctionCacheTest, LoadEntryFromLibZipWithoutPython) {
// Create a zip file without Python files - should fail
std::string sub_dir = test_dir_ + "/1";
std::filesystem::create_directories(sub_dir);
std::string zip_name = "67890.checksum1234checksum1234che.no_python.zip";
std::string zip_path = sub_dir + "/" + zip_name;
std::vector<std::pair<std::string, std::string>> files = {{"data.txt", "some data"}};
ASSERT_TRUE(create_zip_file(zip_path, files));
Status status = ufc._load_entry_from_lib(sub_dir, zip_name);
// Should fail because no Python file found
EXPECT_FALSE(status.ok());
}
TEST_F(UserFunctionCacheTest, LoadEntryFromLibInvalidFileName) {
std::string sub_dir = test_dir_ + "/2";
std::filesystem::create_directories(sub_dir);
// Invalid file name format (missing checksum)
std::string invalid_name = "invalid_name.zip";
std::string zip_path = sub_dir + "/" + invalid_name;
create_text_file(zip_path, "dummy");
Status status = ufc._load_entry_from_lib(sub_dir, invalid_name);
EXPECT_FALSE(status.ok());
}
} // namespace doris