| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "python_env.h" |
| |
| #include <fmt/core.h> |
| |
| #include <filesystem> |
| #include <memory> |
| #include <regex> |
| #include <vector> |
| |
| #include "common/status.h" |
| #include "udf/python/python_udf_server.h" |
| #include "util/string_util.h" |
| |
| namespace doris { |
| |
| namespace fs = std::filesystem; |
| |
| // extract python version by executing `python --version` and extract "3.9.16" from "Python 3.9.16" |
| // @param python_path: path to python executable, e.g. "/opt/miniconda3/envs/myenv/bin/python" |
| // @param version: extracted python version, e.g. "3.9.16" |
| static Status extract_python_version(const std::string& python_path, std::string* version) { |
| static std::regex python_version_re(R"(^Python (\d+\.\d+\.\d+))"); |
| |
| if (!fs::exists(python_path)) { |
| return Status::NotFound("Python executable not found: {}", python_path); |
| } |
| |
| std::string cmd = fmt::format("\"{}\" --version", python_path); |
| FILE* pipe = popen(cmd.c_str(), "r"); |
| if (!pipe) { |
| return Status::InternalError("Failed to run: {}", cmd); |
| } |
| |
| std::string result; |
| char buf[128]; |
| while (fgets(buf, sizeof(buf), pipe)) { |
| result += buf; |
| } |
| pclose(pipe); |
| |
| std::smatch match; |
| if (std::regex_search(result, match, python_version_re)) { |
| *version = match[1].str(); |
| return Status::OK(); |
| } |
| |
| return Status::InternalError("Failed to extract Python version from path: {}, result: {}", |
| python_path, result); |
| } |
| |
| PythonEnvironment::PythonEnvironment(const std::string& name, const PythonVersion& python_version) |
| : env_name(name), python_version(python_version) {} |
| |
| std::string PythonEnvironment::to_string() const { |
| return fmt::format( |
| "[env_name: {}, env_base_path: {}, python_base_path: {}, python_full_version: {}]", |
| env_name, python_version.base_path, python_version.executable_path, |
| python_version.full_version); |
| } |
| |
| bool PythonEnvironment::is_valid() const { |
| if (!python_version.is_valid()) return false; |
| |
| auto perms = fs::status(python_version.executable_path).permissions(); |
| if ((perms & fs::perms::owner_exec) == fs::perms::none) { |
| return false; |
| } |
| |
| std::string version; |
| if (!extract_python_version(python_version.executable_path, &version).ok()) { |
| LOG(WARNING) << "Failed to extract python version from path: " |
| << python_version.executable_path; |
| return false; |
| } |
| |
| return python_version.full_version == version; |
| } |
| |
| // Scan for environments under the /{conda_root_path}/envs directory from the conda root. |
| Status PythonEnvironment::scan_from_conda_root_path(const fs::path& conda_root_path, |
| std::vector<PythonEnvironment>* environments) { |
| DCHECK(!conda_root_path.empty() && environments != nullptr); |
| |
| fs::path envs_dir = conda_root_path / "envs"; |
| if (!fs::exists(envs_dir) || !fs::is_directory(envs_dir)) { |
| return Status::NotFound("Conda envs directory not found: {}", envs_dir.string()); |
| } |
| |
| for (const auto& entry : fs::directory_iterator(envs_dir)) { |
| if (!entry.is_directory()) continue; |
| |
| std::string env_name = entry.path().filename(); // e.g. "myenv" |
| std::string env_base_path = entry.path(); // e.g. "/opt/miniconda3/envs/myenv" |
| std::string python_path = |
| env_base_path + "/bin/python"; // e.g. "/{env_base_path}/bin/python" |
| std::string python_full_version; // e.g. "3.9.16" |
| RETURN_IF_ERROR(extract_python_version(python_path, &python_full_version)); |
| size_t pos = python_full_version.find_last_of('.'); |
| |
| if (UNLIKELY(pos == std::string::npos)) { |
| return Status::InvalidArgument("Invalid python version: {}", python_full_version); |
| } |
| |
| PythonVersion python_version(python_full_version, env_base_path, python_path); |
| PythonEnvironment conda_env(env_name, python_version); |
| |
| if (UNLIKELY(!conda_env.is_valid())) { |
| LOG(WARNING) << "Invalid conda environment: " << conda_env.to_string(); |
| continue; |
| } |
| |
| environments->push_back(std::move(conda_env)); |
| } |
| |
| if (environments->empty()) { |
| return Status::NotFound("No conda python environments found"); |
| } |
| |
| return Status::OK(); |
| } |
| |
| Status PythonEnvironment::scan_from_venv_root_path( |
| const fs::path& venv_root_path, const std::vector<std::string>& interpreter_paths, |
| std::vector<PythonEnvironment>* environments) { |
| DCHECK(!venv_root_path.empty() && environments != nullptr); |
| |
| for (const auto& interpreter_path : interpreter_paths) { |
| if (!fs::exists(interpreter_path) || !fs::is_regular_file(interpreter_path)) { |
| return Status::NotFound("Interpreter path not found: {}", interpreter_path); |
| } |
| std::string python_full_version; |
| RETURN_IF_ERROR(extract_python_version(interpreter_path, &python_full_version)); |
| size_t pos = python_full_version.find_last_of('.'); |
| if (UNLIKELY(pos == std::string::npos)) { |
| return Status::InvalidArgument("Invalid python version: {}", python_full_version); |
| } |
| // Extract major.minor version (e.g., "3.12" from "3.12.0") |
| std::string python_major_minor_version = python_full_version.substr(0, pos); |
| |
| std::string env_name = fmt::format("python{}", python_full_version); // e.g. "python3.9.16" |
| std::string env_base_path = fmt::format("{}/{}", venv_root_path.string(), |
| env_name); // e.g. "/opt/venv/python3.9.16" |
| std::string python_path = |
| fmt::format("{}/bin/python", env_base_path); // e.g. "/{venv_base_path}/bin/python" |
| |
| if (!fs::exists(env_base_path) || !fs::exists(python_path)) { |
| fs::create_directories(env_base_path); |
| std::string create_venv_cmd = |
| fmt::format("{} -m venv {}", interpreter_path, env_base_path); |
| |
| if (system(create_venv_cmd.c_str()) != 0 || !fs::exists(python_path)) { |
| return Status::RuntimeError("Failed to create python virtual environment, cmd: {}", |
| create_venv_cmd); |
| } |
| } |
| |
| // Use major.minor version for site-packages path (e.g., "python3.12") |
| std::string python_dependency_path = fmt::format("{}/lib/python{}/site-packages", |
| env_base_path, python_major_minor_version); |
| |
| if (!fs::exists(python_dependency_path)) { |
| return Status::NotFound("Python dependency path not found: {}", python_dependency_path); |
| } |
| |
| PythonVersion python_version(python_full_version, env_base_path, python_path); |
| PythonEnvironment venv_env(env_name, python_version); |
| |
| if (UNLIKELY(!venv_env.is_valid())) { |
| LOG(WARNING) << "Invalid venv environment: " << venv_env.to_string(); |
| continue; |
| } |
| |
| environments->push_back(std::move(venv_env)); |
| } |
| |
| if (environments->empty()) { |
| return Status::NotFound("No venv python environments found"); |
| } |
| |
| return Status::OK(); |
| } |
| |
| Status PythonEnvScanner::get_versions(std::vector<PythonVersion>* versions) const { |
| DCHECK(versions != nullptr); |
| if (_envs.empty()) { |
| return Status::InternalError("not found available version"); |
| } |
| for (const auto& env : _envs) { |
| versions->push_back(env.python_version); |
| } |
| return Status::OK(); |
| } |
| |
| Status PythonEnvScanner::get_version(const std::string& runtime_version, |
| PythonVersion* version) const { |
| if (_envs.empty()) { |
| return Status::InternalError("not found available version"); |
| } |
| std::string_view runtime_version_view(runtime_version); |
| runtime_version_view = trim(runtime_version_view); |
| for (const auto& env : _envs) { |
| if (env.python_version.full_version == runtime_version_view) { |
| *version = env.python_version; |
| return Status::OK(); |
| } |
| } |
| return Status::NotFound("not found runtime version: {}", runtime_version); |
| } |
| |
| Status CondaEnvScanner::scan() { |
| RETURN_IF_ERROR(PythonEnvironment::scan_from_conda_root_path(_env_root_path, &_envs)); |
| return Status::OK(); |
| } |
| |
| std::string CondaEnvScanner::to_string() const { |
| std::stringstream ss; |
| ss << "Conda environments: "; |
| for (const auto& conda_env : _envs) { |
| ss << conda_env.to_string() << ", "; |
| } |
| return ss.str(); |
| } |
| |
| Status VenvEnvScanner::scan() { |
| RETURN_IF_ERROR(PythonEnvironment::scan_from_venv_root_path(_env_root_path, _interpreter_paths, |
| &_envs)); |
| return Status::OK(); |
| } |
| |
| std::string VenvEnvScanner::to_string() const { |
| std::stringstream ss; |
| ss << "Venv environments: "; |
| for (const auto& venv_env : _envs) { |
| ss << venv_env.to_string() << ", "; |
| } |
| return ss.str(); |
| } |
| |
| Status PythonVersionManager::init(PythonEnvType env_type, const fs::path& python_root_path, |
| const std::string& python_venv_interpreter_paths) { |
| switch (env_type) { |
| case PythonEnvType::CONDA: { |
| if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) { |
| return Status::InvalidArgument("Invalid conda root path: {}", |
| python_root_path.string()); |
| } |
| _env_scanner = std::make_unique<CondaEnvScanner>(python_root_path); |
| break; |
| } |
| case PythonEnvType::VENV: { |
| if (!fs::exists(python_root_path) || !fs::is_directory(python_root_path)) { |
| return Status::InvalidArgument("Invalid venv root path: {}", python_root_path.string()); |
| } |
| std::vector<std::string> interpreter_paths = split(python_venv_interpreter_paths, ":"); |
| if (interpreter_paths.empty()) { |
| return Status::InvalidArgument("Invalid python interpreter paths: {}", |
| python_venv_interpreter_paths); |
| } |
| _env_scanner = std::make_unique<VenvEnvScanner>(python_root_path, interpreter_paths); |
| break; |
| } |
| default: |
| return Status::NotSupported("Unsupported python runtime type: {}", |
| static_cast<int>(env_type)); |
| } |
| std::vector<PythonVersion> versions; |
| RETURN_IF_ERROR(_env_scanner->scan()); |
| RETURN_IF_ERROR(_env_scanner->get_versions(&versions)); |
| RETURN_IF_ERROR(PythonUDFServerManager::instance().init(versions)); |
| return Status::OK(); |
| } |
| |
| } // namespace doris |