| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // This file is copied from |
| // https://github.com/apache/impala/blob/branch-2.9.0/be/src/util/perf-counters.cpp |
| // and modified by Doris |
| |
| #include "util/perf_counters.h" |
| |
| #include <linux/perf_event.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/syscall.h> |
| #include <unistd.h> |
| |
| #include <boost/algorithm/string/trim.hpp> |
| #include <fstream> // IWYU pragma: keep |
| #include <iomanip> |
| #include <iostream> |
| #include <unordered_map> |
| #include <utility> |
| |
| #include "gutil/stringprintf.h" |
| #include "gutil/strings/substitute.h" |
| #include "util/pretty_printer.h" |
| #include "util/string_parser.hpp" |
| #include "util/string_util.h" |
| |
| namespace doris { |
| |
| #define COUNTER_SIZE (sizeof(void*)) |
| #define PRETTY_PRINT_WIDTH 13 |
| |
| static std::unordered_map<std::string, std::string> _process_state; |
| |
| int64_t PerfCounters::_vm_rss = 0; |
| std::string PerfCounters::_vm_rss_str = ""; |
| int64_t PerfCounters::_vm_hwm = 0; |
| int64_t PerfCounters::_vm_size = 0; |
| int64_t PerfCounters::_vm_peak = 0; |
| |
| // This is the order of the counters in /proc/self/io |
| enum PERF_IO_IDX { |
| PROC_IO_READ = 0, |
| PROC_IO_WRITE, |
| PROC_IO_SYS_RREAD, |
| PROC_IO_SYS_WRITE, |
| PROC_IO_DISK_READ, |
| PROC_IO_DISK_WRITE, |
| PROC_IO_CANCELLED_WRITE, |
| PROC_IO_LAST_COUNTER, |
| }; |
| |
| // Wrapper around sys call. This syscall is hard to use and this is how it is recommended |
| // to be used. |
| static inline int sys_perf_event_open(struct perf_event_attr* attr, pid_t pid, int cpu, |
| int group_fd, unsigned long flags) { |
| attr->size = sizeof(*attr); |
| return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); |
| } |
| |
| // Remap PerfCounters::Counter to Linux kernel enums |
| static bool init_event_attr(perf_event_attr* attr, PerfCounters::Counter counter) { |
| memset(attr, 0, sizeof(perf_event_attr)); |
| |
| switch (counter) { |
| case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK: |
| attr->type = PERF_TYPE_SOFTWARE; |
| attr->config = PERF_COUNT_SW_CPU_CLOCK; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS: |
| attr->type = PERF_TYPE_SOFTWARE; |
| attr->config = PERF_COUNT_SW_PAGE_FAULTS; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES: |
| attr->type = PERF_TYPE_SOFTWARE; |
| attr->config = PERF_COUNT_SW_PAGE_FAULTS; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS: |
| attr->type = PERF_TYPE_SOFTWARE; |
| attr->config = PERF_COUNT_SW_CPU_MIGRATIONS; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_CPU_CYCLES; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_INSTRUCTIONS; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_CACHE_HIT: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_CACHE_REFERENCES; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_CACHE_MISSES; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_BRANCHES: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_BRANCH_MISSES; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES: |
| attr->type = PERF_TYPE_HARDWARE; |
| attr->config = PERF_COUNT_HW_BUS_CYCLES; |
| break; |
| |
| default: |
| return false; |
| } |
| |
| return true; |
| } |
| |
| static std::string get_counter_name(PerfCounters::Counter counter) { |
| switch (counter) { |
| case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK: |
| return "CPUTime"; |
| |
| case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS: |
| return "PageFaults"; |
| |
| case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES: |
| return "ContextSwitches"; |
| |
| case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS: |
| return "CPUMigrations"; |
| |
| case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES: |
| return "HWCycles"; |
| |
| case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS: |
| return "Instructions"; |
| |
| case PerfCounters::PERF_COUNTER_HW_CACHE_HIT: |
| return "CacheHit"; |
| |
| case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES: |
| return "CacheMiss"; |
| |
| case PerfCounters::PERF_COUNTER_HW_BRANCHES: |
| return "Branches"; |
| |
| case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES: |
| return "BranchMiss"; |
| |
| case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES: |
| return "BusCycles"; |
| |
| case PerfCounters::PERF_COUNTER_VM_USAGE: |
| return "VmUsage"; |
| |
| case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE: |
| return "PeakVmUsage"; |
| |
| case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE: |
| return "WorkingSet"; |
| |
| case PerfCounters::PERF_COUNTER_BYTES_READ: |
| return "BytesRead"; |
| |
| case PerfCounters::PERF_COUNTER_BYTES_WRITE: |
| return "BytesWritten"; |
| |
| case PerfCounters::PERF_COUNTER_DISK_READ: |
| return "DiskRead"; |
| |
| case PerfCounters::PERF_COUNTER_DISK_WRITE: |
| return "DiskWrite"; |
| |
| default: |
| return ""; |
| } |
| } |
| |
| bool PerfCounters::init_sys_counter(Counter counter) { |
| CounterData data; |
| data.counter = counter; |
| data.source = PerfCounters::SYS_PERF_COUNTER; |
| data.fd = -1; |
| perf_event_attr attr; |
| |
| if (!init_event_attr(&attr, counter)) { |
| return false; |
| } |
| |
| int fd = sys_perf_event_open(&attr, getpid(), -1, _group_fd, 0); |
| |
| if (fd < 0) { |
| return false; |
| } |
| |
| if (_group_fd == -1) { |
| _group_fd = fd; |
| } |
| |
| data.fd = fd; |
| |
| if (counter == PERF_COUNTER_SW_CPU_CLOCK) { |
| data.type = TUnit::TIME_NS; |
| } else { |
| data.type = TUnit::UNIT; |
| } |
| |
| _counters.push_back(data); |
| return true; |
| } |
| |
| bool PerfCounters::init_proc_self_io_counter(Counter counter) { |
| CounterData data; |
| data.counter = counter; |
| data.source = PerfCounters::PROC_SELF_IO; |
| data.type = TUnit::BYTES; |
| |
| switch (counter) { |
| case PerfCounters::PERF_COUNTER_BYTES_READ: |
| data.proc_io_line_number = PROC_IO_READ; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_BYTES_WRITE: |
| data.proc_io_line_number = PROC_IO_WRITE; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_DISK_READ: |
| data.proc_io_line_number = PROC_IO_DISK_READ; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_DISK_WRITE: |
| data.proc_io_line_number = PROC_IO_DISK_WRITE; |
| break; |
| |
| default: |
| return false; |
| } |
| |
| _counters.push_back(data); |
| return true; |
| } |
| |
| bool PerfCounters::init_proc_self_status_counter(Counter counter) { |
| CounterData data {}; |
| data.counter = counter; |
| data.source = PerfCounters::PROC_SELF_STATUS; |
| data.type = TUnit::BYTES; |
| |
| switch (counter) { |
| case PerfCounters::PERF_COUNTER_VM_USAGE: |
| data.proc_status_field = "VmSize"; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE: |
| data.proc_status_field = "VmPeak"; |
| break; |
| |
| case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE: |
| data.proc_status_field = "VmRS"; |
| break; |
| |
| default: |
| return false; |
| } |
| |
| _counters.push_back(data); |
| return true; |
| } |
| |
| bool PerfCounters::get_sys_counters(std::vector<int64_t>& buffer) { |
| for (int i = 0; i < _counters.size(); i++) { |
| if (_counters[i].source == SYS_PERF_COUNTER) { |
| int num_bytes = read(_counters[i].fd, &buffer[i], COUNTER_SIZE); |
| |
| if (num_bytes != COUNTER_SIZE) { |
| return false; |
| } |
| |
| if (_counters[i].type == TUnit::TIME_NS) { |
| buffer[i] /= 1000000; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| // Parse out IO counters from /proc/self/io. The file contains a list of |
| // (name,byte) pairs. |
| // For example: |
| // rchar: 210212 |
| // wchar: 94 |
| // syscr: 118 |
| // syscw: 3 |
| // read_bytes: 0 |
| // write_bytes: 0 |
| // cancelled_write_bytes: 0 |
| bool PerfCounters::get_proc_self_io_counters(std::vector<int64_t>& buffer) { |
| std::ifstream file("/proc/self/io", std::ios::in); |
| std::string buf; |
| int64_t values[PROC_IO_LAST_COUNTER]; |
| int ret = 0; |
| |
| for (int i = 0; i < PROC_IO_LAST_COUNTER; ++i) { |
| if (!file) { |
| ret = -1; |
| break; |
| } |
| |
| getline(file, buf); |
| size_t colon = buf.find(':'); |
| |
| if (colon == std::string::npos) { |
| ret = -1; |
| break; |
| } |
| |
| buf = buf.substr(colon + 1); |
| std::istringstream stream(buf); |
| stream >> values[i]; |
| } |
| |
| if (ret == 0) { |
| for (int i = 0; i < _counters.size(); ++i) { |
| if (_counters[i].source == PROC_SELF_IO) { |
| buffer[i] = values[_counters[i].proc_io_line_number]; |
| } |
| } |
| } |
| |
| if (file.is_open()) { |
| file.close(); |
| } |
| |
| return true; |
| } |
| |
| bool PerfCounters::get_proc_self_status_counters(std::vector<int64_t>& buffer) { |
| std::ifstream file("/proc/self/status", std::ios::in); |
| std::string buf; |
| |
| while (file) { |
| getline(file, buf); |
| |
| for (int i = 0; i < _counters.size(); ++i) { |
| if (_counters[i].source == PROC_SELF_STATUS) { |
| size_t field = buf.find(_counters[i].proc_status_field); |
| |
| if (field == std::string::npos) { |
| continue; |
| } |
| |
| size_t colon = field + _counters[i].proc_status_field.size() + 1; |
| buf = buf.substr(colon + 1); |
| std::istringstream stream(buf); |
| int64_t value; |
| stream >> value; |
| buffer[i] = value * 1024; // values in file are in kb |
| } |
| } |
| } |
| |
| if (file.is_open()) { |
| file.close(); |
| } |
| |
| return true; |
| } |
| |
| PerfCounters::PerfCounters() : _group_fd(-1) {} |
| |
| // Close all fds for the counters |
| PerfCounters::~PerfCounters() { |
| for (int i = 0; i < _counters.size(); ++i) { |
| if (_counters[i].source == SYS_PERF_COUNTER) { |
| close(_counters[i].fd); |
| } |
| } |
| } |
| |
| // Add here the default ones that are most useful |
| bool PerfCounters::add_default_counters() { |
| bool result = true; |
| result &= add_counter(PERF_COUNTER_SW_CPU_CLOCK); |
| // These hardware ones don't work on a vm, just ignore if they fail |
| // TODO: these don't work reliably and aren't that useful. Turn them off. |
| //add_counter(PERF_COUNTER_HW_INSTRUCTIONS); |
| //add_counter(PERF_COUNTER_HW_CPU_CYCLES); |
| //add_counter(PERF_COUNTER_HW_BRANCHES); |
| //add_counter(PERF_COUNTER_HW_BRANCH_MISSES); |
| //add_counter(PERF_COUNTER_HW_CACHE_MISSES); |
| add_counter(PERF_COUNTER_VM_USAGE); |
| add_counter(PERF_COUNTER_VM_PEAK_USAGE); |
| add_counter(PERF_COUNTER_RESIDENT_SET_SIZE); |
| result &= add_counter(PERF_COUNTER_DISK_READ); |
| return result; |
| } |
| |
| // Add a specific counter |
| bool PerfCounters::add_counter(Counter counter) { |
| // Ignore if it's already added. |
| for (int i = 0; i < _counters.size(); ++i) { |
| if (_counters[i].counter == counter) { |
| return true; |
| } |
| } |
| |
| bool result = false; |
| |
| switch (counter) { |
| case PerfCounters::PERF_COUNTER_SW_CPU_CLOCK: |
| case PerfCounters::PERF_COUNTER_SW_PAGE_FAULTS: |
| case PerfCounters::PERF_COUNTER_SW_CONTEXT_SWITCHES: |
| case PerfCounters::PERF_COUNTER_SW_CPU_MIGRATIONS: |
| case PerfCounters::PERF_COUNTER_HW_CPU_CYCLES: |
| case PerfCounters::PERF_COUNTER_HW_INSTRUCTIONS: |
| case PerfCounters::PERF_COUNTER_HW_CACHE_HIT: |
| case PerfCounters::PERF_COUNTER_HW_CACHE_MISSES: |
| case PerfCounters::PERF_COUNTER_HW_BRANCHES: |
| case PerfCounters::PERF_COUNTER_HW_BRANCH_MISSES: |
| case PerfCounters::PERF_COUNTER_HW_BUS_CYCLES: |
| result = init_sys_counter(counter); |
| break; |
| |
| case PerfCounters::PERF_COUNTER_BYTES_READ: |
| case PerfCounters::PERF_COUNTER_BYTES_WRITE: |
| case PerfCounters::PERF_COUNTER_DISK_READ: |
| case PerfCounters::PERF_COUNTER_DISK_WRITE: |
| result = init_proc_self_io_counter(counter); |
| break; |
| |
| case PerfCounters::PERF_COUNTER_VM_USAGE: |
| case PerfCounters::PERF_COUNTER_VM_PEAK_USAGE: |
| case PerfCounters::PERF_COUNTER_RESIDENT_SET_SIZE: |
| result = init_proc_self_status_counter(counter); |
| break; |
| |
| default: |
| return false; |
| } |
| |
| if (result) { |
| _counter_names.push_back(get_counter_name(counter)); |
| } |
| |
| return result; |
| } |
| |
| // Query all the counters right now and store the values in results |
| void PerfCounters::snapshot(const std::string& name) { |
| if (_counters.size() == 0) { |
| return; |
| } |
| |
| std::string fixed_name = name; |
| |
| if (fixed_name.size() == 0) { |
| std::stringstream ss; |
| ss << _snapshots.size() + 1; |
| fixed_name = ss.str(); |
| } |
| |
| std::vector<int64_t> buffer(_counters.size()); |
| |
| get_sys_counters(buffer); |
| get_proc_self_io_counters(buffer); |
| get_proc_self_status_counters(buffer); |
| |
| _snapshots.push_back(buffer); |
| _snapshot_names.push_back(fixed_name); |
| } |
| |
| const std::vector<int64_t>* PerfCounters::counters(int snapshot) const { |
| if (snapshot < 0 || snapshot >= _snapshots.size()) { |
| return nullptr; |
| } |
| |
| return &_snapshots[snapshot]; |
| } |
| |
| void PerfCounters::pretty_print(std::ostream* s) const { |
| std::ostream& stream = *s; |
| stream << std::setw(8) << "snapshot"; |
| |
| for (int i = 0; i < _counter_names.size(); ++i) { |
| stream << std::setw(PRETTY_PRINT_WIDTH) << _counter_names[i]; |
| } |
| |
| stream << std::endl; |
| |
| for (int s = 0; s < _snapshots.size(); s++) { |
| stream << std::setw(8) << _snapshot_names[s]; |
| const std::vector<int64_t>& snapshot = _snapshots[s]; |
| |
| for (int i = 0; i < snapshot.size(); ++i) { |
| stream << std::setw(PRETTY_PRINT_WIDTH) |
| << PrettyPrinter::print(snapshot[i], _counters[i].type); |
| } |
| |
| stream << std::endl; |
| } |
| |
| stream << std::endl; |
| } |
| |
| // Refactor below |
| |
| int PerfCounters::parse_int(const string& state_key) { |
| auto it = _process_state.find(state_key); |
| if (it != _process_state.end()) return atoi(it->second.c_str()); |
| return -1; |
| } |
| |
| int64_t PerfCounters::parse_int64(const string& state_key) { |
| auto it = _process_state.find(state_key); |
| if (it != _process_state.end()) { |
| StringParser::ParseResult result; |
| int64_t state_value = |
| StringParser::string_to_int<int64_t>(it->second.data(), it->second.size(), &result); |
| if (result == StringParser::PARSE_SUCCESS) return state_value; |
| } |
| return -1; |
| } |
| |
| string PerfCounters::parse_string(const string& state_key) { |
| auto it = _process_state.find(state_key); |
| if (it != _process_state.end()) return it->second; |
| return string(); |
| } |
| |
| int64_t PerfCounters::parse_bytes(const string& state_key) { |
| auto it = _process_state.find(state_key); |
| if (it != _process_state.end()) { |
| vector<string> fields = split(it->second, " "); |
| // We expect state_value such as, e.g., '16129508', '16129508 kB', '16129508 mB' |
| StringParser::ParseResult result; |
| int64_t state_value = |
| StringParser::string_to_int<int64_t>(fields[0].data(), fields[0].size(), &result); |
| if (result == StringParser::PARSE_SUCCESS) { |
| if (fields.size() < 2) return state_value; |
| if (fields[1].compare("kB") == 0) return state_value * 1024L; |
| } |
| } |
| return -1; |
| } |
| |
| void PerfCounters::refresh_proc_status() { |
| std::ifstream statusinfo("/proc/self/status", std::ios::in); |
| std::string line; |
| while (statusinfo.good() && !statusinfo.eof()) { |
| getline(statusinfo, line); |
| std::vector<std::string> fields = split(line, "\t"); |
| if (fields.size() < 2) continue; |
| boost::algorithm::trim(fields[1]); |
| std::string key = fields[0].substr(0, fields[0].size() - 1); |
| _process_state[strings::Substitute("status/$0", key)] = fields[1]; |
| } |
| |
| if (statusinfo.is_open()) statusinfo.close(); |
| |
| _vm_size = parse_bytes("status/VmSize"); |
| _vm_peak = parse_bytes("status/VmPeak"); |
| _vm_rss = parse_bytes("status/VmRSS"); |
| #ifdef ADDRESS_SANITIZER |
| _vm_rss_str = "[ASAN]" + PrettyPrinter::print(_vm_rss, TUnit::BYTES); |
| #else |
| _vm_rss_str = PrettyPrinter::print(_vm_rss, TUnit::BYTES); |
| #endif |
| _vm_hwm = parse_bytes("status/VmHWM"); |
| } |
| |
| void PerfCounters::get_proc_status(ProcStatus* out) { |
| out->vm_size = parse_bytes("status/VmSize"); |
| out->vm_peak = parse_bytes("status/VmPeak"); |
| out->vm_rss = parse_bytes("status/VmRSS"); |
| out->vm_hwm = parse_bytes("status/VmHWM"); |
| } |
| |
| } // namespace doris |