| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // This file is copied from |
| // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/cpu-info.cpp |
| // and modified by Doris |
| |
| #include "util/cpu_info.h" |
| |
| #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) |
| #elif defined(__GNUC__) && defined(__ARM_NEON__) |
| /* GCC-compatible compiler, targeting ARM with NEON */ |
| #include <arm_neon.h> |
| #elif defined(__GNUC__) && defined(__IWMMXT__) |
| /* GCC-compatible compiler, targeting ARM with WMMX */ |
| #include <mmintrin.h> |
| #elif (defined(__GNUC__) || defined(__xlC__)) && (defined(__VEC__) || defined(__ALTIVEC__)) |
| /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */ |
| #include <altivec.h> |
| #elif defined(__GNUC__) && defined(__SPE__) |
| /* GCC-compatible compiler, targeting PowerPC with SPE */ |
| #include <spe.h> |
| #endif |
| |
| #ifndef __APPLE__ |
| #include <sys/sysinfo.h> |
| #else |
| #include <sys/sysctl.h> |
| #endif |
| |
| #include <gen_cpp/Metrics_types.h> |
| #include <sched.h> |
| #include <stdlib.h> |
| #include <unistd.h> |
| |
| #include <algorithm> |
| #include <boost/algorithm/string/predicate.hpp> |
| #include <boost/algorithm/string/trim.hpp> |
| // IWYU pragma: no_include <bits/chrono.h> |
| #include <chrono> // IWYU pragma: keep |
| #include <filesystem> |
| #include <fstream> |
| |
| #include "absl/strings/substitute.h" |
| #include "common/config.h" |
| #include "common/env_config.h" |
| #include "gflags/gflags.h" |
| #include "util/cgroup_util.h" |
| #include "util/pretty_printer.h" |
| |
| using boost::algorithm::contains; |
| using boost::algorithm::trim; |
| namespace fs = std::filesystem; |
| using std::max; |
| |
| DECLARE_bool(abort_on_config_error); |
| DEFINE_int32(num_cores, 0, |
| "(Advanced) If > 0, it sets the number of cores available to" |
| " Impala. Setting it to 0 means Impala will use all available cores on the machine" |
| " according to /proc/cpuinfo."); |
| |
| namespace doris { |
| // Helper function to warn if a given file does not contain an expected string as its |
| // first line. If the file cannot be opened, no error is reported. |
| void WarnIfFileNotEqual(const std::string& filename, const std::string& expected, |
| const std::string& warning_text) { |
| std::ifstream file(filename); |
| if (!file) return; |
| std::string line; |
| getline(file, line); |
| if (line != expected) { |
| LOG(ERROR) << "Expected " << expected << ", actual " << line << std::endl << warning_text; |
| } |
| } |
| } // namespace doris |
| |
| namespace doris { |
| |
| bool CpuInfo::initialized_ = false; |
| int64_t CpuInfo::hardware_flags_ = 0; |
| int64_t CpuInfo::original_hardware_flags_; |
| int64_t CpuInfo::cycles_per_ms_; |
| int CpuInfo::num_cores_ = 1; |
| int CpuInfo::max_num_cores_ = 1; |
| std::string CpuInfo::model_name_ = "unknown"; |
| int CpuInfo::max_num_numa_nodes_; |
| std::unique_ptr<int[]> CpuInfo::core_to_numa_node_; |
| std::vector<std::vector<int>> CpuInfo::numa_node_to_cores_; |
| std::vector<int> CpuInfo::numa_node_core_idx_; |
| |
| static struct { |
| std::string name; |
| int64_t flag; |
| } flag_mappings[] = { |
| {"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1}, {"sse4_2", CpuInfo::SSE4_2}, |
| {"popcnt", CpuInfo::POPCNT}, {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2}, |
| }; |
| |
| // Helper function to parse for hardware flags. |
| // values contains a list of space-separated flags. check to see if the flags we |
| // care about are present. |
| // Returns a bitmap of flags. |
| int64_t ParseCPUFlags(const std::string& values) { |
| int64_t flags = 0; |
| for (auto& flag_mapping : flag_mappings) { |
| if (contains(values, flag_mapping.name)) { |
| flags |= flag_mapping.flag; |
| } |
| } |
| return flags; |
| } |
| |
| void CpuInfo::init() { |
| if (initialized_) return; |
| std::string line; |
| std::string name; |
| std::string value; |
| |
| float max_mhz = 0; |
| int physical_num_cores = 0; |
| |
| // maybe use std::thread::hardware_concurrency()? |
| // Read from /proc/cpuinfo |
| std::ifstream cpuinfo("/proc/cpuinfo"); |
| while (cpuinfo) { |
| getline(cpuinfo, line); |
| size_t colon = line.find(':'); |
| if (colon != std::string::npos) { |
| name = line.substr(0, colon - 1); |
| value = line.substr(colon + 1, std::string::npos); |
| trim(name); |
| trim(value); |
| if (name == "flags") { |
| hardware_flags_ |= ParseCPUFlags(value); |
| } else if (name == "cpu MHz") { |
| // Every core will report a different speed. We'll take the max, assuming |
| // that when impala is running, the core will not be in a lower power state. |
| // TODO: is there a more robust way to do this, such as |
| // Window's QueryPerformanceFrequency() |
| float mhz = atof(value.c_str()); |
| max_mhz = max(mhz, max_mhz); |
| } else if (name == "processor") { |
| ++physical_num_cores; |
| } else if (name == "model name") { |
| model_name_ = value; |
| } |
| } |
| } |
| |
| int num_cores = CGroupUtil::get_cgroup_limited_cpu_number(physical_num_cores); |
| if (max_mhz != 0) { |
| cycles_per_ms_ = int64_t(max_mhz) * 1000; |
| } else { |
| cycles_per_ms_ = 1000000; |
| } |
| original_hardware_flags_ = hardware_flags_; |
| |
| if (num_cores > 0) { |
| num_cores_ = num_cores; |
| } else { |
| num_cores_ = 1; |
| } |
| if (config::num_cores > 0) { |
| num_cores_ = config::num_cores; |
| } |
| |
| #ifdef __APPLE__ |
| size_t len = sizeof(max_num_cores_); |
| sysctlbyname("hw.logicalcpu", &max_num_cores_, &len, nullptr, 0); |
| #else |
| max_num_cores_ = get_nprocs_conf(); |
| #endif |
| |
| // Print a warning if something is wrong with sched_getcpu(). |
| #ifdef HAVE_SCHED_GETCPU |
| if (sched_getcpu() == -1) { |
| LOG(WARNING) << "Kernel does not support sched_getcpu(). Performance may be impacted."; |
| } |
| #else |
| LOG(WARNING) << "Built on a system without sched_getcpu() support. Performance may" |
| << " be impacted."; |
| #endif |
| |
| _init_numa(); |
| initialized_ = true; |
| } |
| |
| void CpuInfo::_init_numa() { |
| // Use the NUMA info in the /sys filesystem. which is part of the Linux ABI: |
| // see https://www.kernel.org/doc/Documentation/ABI/stable/sysfs-devices-node and |
| // https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu |
| // The filesystem entries are only present if the kernel was compiled with NUMA support. |
| core_to_numa_node_.reset(new int[max_num_cores_]); |
| |
| if (!fs::is_directory("/sys/devices/system/node")) { |
| LOG(WARNING) << "/sys/devices/system/node is not present - no NUMA support"; |
| // Assume a single NUMA node. |
| max_num_numa_nodes_ = 1; |
| std::fill_n(core_to_numa_node_.get(), max_num_cores_, 0); |
| _init_numa_node_to_cores(); |
| return; |
| } |
| |
| // Search for node subdirectories - node0, node1, node2, etc to determine possible |
| // NUMA nodes. |
| fs::directory_iterator dir_it("/sys/devices/system/node"); |
| max_num_numa_nodes_ = 0; |
| for (; dir_it != fs::directory_iterator(); ++dir_it) { |
| const std::string filename = dir_it->path().filename().string(); |
| if (filename.find("node") == 0) ++max_num_numa_nodes_; |
| } |
| if (max_num_numa_nodes_ == 0) { |
| LOG(WARNING) << "Could not find nodes in /sys/devices/system/node"; |
| max_num_numa_nodes_ = 1; |
| } |
| |
| // Check which NUMA node each core belongs to based on the existence of a symlink |
| // to the node subdirectory. |
| for (int core = 0; core < max_num_cores_; ++core) { |
| bool found_numa_node = false; |
| for (int node = 0; node < max_num_numa_nodes_; ++node) { |
| if (fs::exists(absl::Substitute("/sys/devices/system/cpu/cpu$0/node$1", core, node))) { |
| core_to_numa_node_[core] = node; |
| found_numa_node = true; |
| break; |
| } |
| } |
| if (!found_numa_node) { |
| LOG(WARNING) << "Could not determine NUMA node for core " << core |
| << " from /sys/devices/system/cpu/"; |
| core_to_numa_node_[core] = 0; |
| } |
| } |
| _init_numa_node_to_cores(); |
| } |
| |
| void CpuInfo::_init_fake_numa_for_test(int max_num_numa_nodes, |
| const std::vector<int>& core_to_numa_node) { |
| DCHECK_EQ(max_num_cores_, core_to_numa_node.size()); |
| max_num_numa_nodes_ = max_num_numa_nodes; |
| for (int i = 0; i < max_num_cores_; ++i) { |
| core_to_numa_node_[i] = core_to_numa_node[i]; |
| } |
| numa_node_to_cores_.clear(); |
| _init_numa_node_to_cores(); |
| } |
| |
| void CpuInfo::_init_numa_node_to_cores() { |
| DCHECK(numa_node_to_cores_.empty()); |
| numa_node_to_cores_.resize(max_num_numa_nodes_); |
| numa_node_core_idx_.resize(max_num_cores_); |
| for (int core = 0; core < max_num_cores_; ++core) { |
| std::vector<int>* cores_of_node = &numa_node_to_cores_[core_to_numa_node_[core]]; |
| numa_node_core_idx_[core] = cores_of_node->size(); |
| cores_of_node->push_back(core); |
| } |
| } |
| |
| void CpuInfo::verify_cpu_requirements() { |
| if (!CpuInfo::is_supported(CpuInfo::SSSE3)) { |
| LOG(ERROR) << "CPU does not support the Supplemental SSE3 (SSSE3) instruction set. " |
| << "This setup is generally unsupported and Impala might be unstable."; |
| } |
| } |
| |
| void CpuInfo::verify_performance_governor() { |
| for (int cpu_id = 0; cpu_id < CpuInfo::num_cores(); ++cpu_id) { |
| const std::string governor_file = |
| absl::Substitute("/sys/devices/system/cpu/cpu$0/cpufreq/scaling_governor", cpu_id); |
| const std::string warning_text = absl::Substitute( |
| "WARNING: CPU $0 is not using 'performance' governor. Note that changing the " |
| "governor to 'performance' will reset the no_turbo setting to 0.", |
| cpu_id); |
| WarnIfFileNotEqual(governor_file, "performance", warning_text); |
| } |
| } |
| |
| void CpuInfo::verify_turbo_disabled() { |
| WarnIfFileNotEqual( |
| "/sys/devices/system/cpu/intel_pstate/no_turbo", "1", |
| "WARNING: CPU turbo is enabled. This setting can change the clock frequency of CPU " |
| "cores during the benchmark run, which can lead to inaccurate results. You can " |
| "disable CPU turbo by writing a 1 to " |
| "/sys/devices/system/cpu/intel_pstate/no_turbo. Note that changing the governor to " |
| "'performance' will reset this to 0."); |
| } |
| |
| void CpuInfo::enable_feature(long flag, bool enable) { |
| DCHECK(initialized_); |
| if (!enable) { |
| hardware_flags_ &= ~flag; |
| } else { |
| // Can't turn something on that can't be supported |
| DCHECK((original_hardware_flags_ & flag) != 0); |
| hardware_flags_ |= flag; |
| } |
| } |
| |
| int CpuInfo::get_current_core() { |
| // sched_getcpu() is not supported on some old kernels/glibcs (like the versions that |
| // shipped with CentOS 5). In that case just pretend we're always running on CPU 0 |
| // so that we can build and run with degraded perf. |
| #ifdef HAVE_SCHED_GETCPU |
| int cpu = sched_getcpu(); |
| if (cpu < 0) return 0; |
| if (cpu >= max_num_cores_) { |
| LOG_FIRST_N(WARNING, 5) << "sched_getcpu() return value " << cpu |
| << ", which is greater than get_nprocs_conf() retrun value " |
| << max_num_cores_ << ", now is " << get_nprocs_conf(); |
| cpu %= max_num_cores_; |
| } |
| return cpu; |
| #else |
| return 0; |
| #endif |
| } |
| |
| void CpuInfo::_get_cache_info(long cache_sizes[NUM_CACHE_LEVELS], |
| long cache_line_sizes[NUM_CACHE_LEVELS]) { |
| #ifdef __APPLE__ |
| // On Mac OS X use sysctl() to get the cache sizes |
| size_t len = 0; |
| sysctlbyname("hw.cachesize", nullptr, &len, nullptr, 0); |
| uint64_t* data = static_cast<uint64_t*>(malloc(len)); |
| sysctlbyname("hw.cachesize", data, &len, nullptr, 0); |
| #ifndef __arm64__ |
| DCHECK(len / sizeof(uint64_t) >= 3); |
| for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) { |
| cache_sizes[i] = data[i]; |
| } |
| #else |
| for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) { |
| cache_sizes[i] = data[i + 1]; |
| } |
| #endif |
| size_t linesize; |
| size_t sizeof_linesize = sizeof(linesize); |
| sysctlbyname("hw.cachelinesize", &linesize, &sizeof_linesize, nullptr, 0); |
| for (size_t i = 0; i < NUM_CACHE_LEVELS; ++i) cache_line_sizes[i] = linesize; |
| #else |
| // Call sysconf to query for the cache sizes |
| // Note: on some systems (e.g. RHEL 5 on AWS EC2), this returns 0 instead of the |
| // actual cache line size. |
| cache_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_SIZE); |
| cache_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_SIZE); |
| cache_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_SIZE); |
| |
| cache_line_sizes[L1_CACHE] = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); |
| cache_line_sizes[L2_CACHE] = sysconf(_SC_LEVEL2_CACHE_LINESIZE); |
| cache_line_sizes[L3_CACHE] = sysconf(_SC_LEVEL3_CACHE_LINESIZE); |
| #endif |
| } |
| |
| std::string CpuInfo::debug_string() { |
| DCHECK(initialized_); |
| std::stringstream stream; |
| long cache_sizes[NUM_CACHE_LEVELS]; |
| long cache_line_sizes[NUM_CACHE_LEVELS]; |
| _get_cache_info(cache_sizes, cache_line_sizes); |
| |
| std::string L1 = absl::Substitute( |
| "L1 Cache: $0 (Line: $1)", |
| PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L1_CACHE]), TUnit::BYTES), |
| PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L1_CACHE]), TUnit::BYTES)); |
| std::string L2 = absl::Substitute( |
| "L2 Cache: $0 (Line: $1)", |
| PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L2_CACHE]), TUnit::BYTES), |
| PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L2_CACHE]), TUnit::BYTES)); |
| std::string L3 = |
| cache_sizes[L3_CACHE] |
| ? absl::Substitute( |
| "L3 Cache: $0 (Line: $1)", |
| PrettyPrinter::print(static_cast<int64_t>(cache_sizes[L3_CACHE]), |
| TUnit::BYTES), |
| PrettyPrinter::print(static_cast<int64_t>(cache_line_sizes[L3_CACHE]), |
| TUnit::BYTES)) |
| : ""; |
| stream << "Cpu Info:" << std::endl |
| << " Model: " << model_name_ << std::endl |
| << " Cores: " << num_cores_ << std::endl |
| << " Max Possible Cores: " << max_num_cores_ << std::endl |
| << " " << L1 << std::endl |
| << " " << L2 << std::endl |
| << " " << L3 << std::endl |
| << " Hardware Supports:" << std::endl; |
| for (auto& flag_mapping : flag_mappings) { |
| if (is_supported(flag_mapping.flag)) { |
| stream << " " << flag_mapping.name << std::endl; |
| } |
| } |
| stream << " Numa Nodes: " << max_num_numa_nodes_ << std::endl; |
| stream << " Numa Nodes of Cores:"; |
| for (int core = 0; core < max_num_cores_; ++core) { |
| stream << " " << core << "->" << core_to_numa_node_[core] << " |"; |
| } |
| stream << std::endl; |
| return stream.str(); |
| } |
| |
| } // namespace doris |