blob: a5868d43b981f89a78e03823f37b44baf73cfbda [file] [log] [blame]
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Default values
DATA=""
SERIALIZER=""
FILTER=""
DURATION=5
OUTPUT_DIR="profile_output"
# Parse arguments
usage() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Generate flamegraph/profile for C++ benchmarks"
echo ""
echo "Options:"
echo " --filter <pattern> Custom benchmark filter (regex pattern)"
echo " --data <struct|sample> Filter benchmark by data type"
echo " --serializer <fory|protobuf> Filter benchmark by serializer"
echo " --duration <seconds> Profiling duration (default: 5)"
echo " --output-dir <dir> Output directory (default: profile_output)"
echo " --help Show this help message"
echo ""
echo "Examples:"
echo " $0 # Profile all benchmarks"
echo " $0 --data struct --serializer fory # Profile Fory Struct benchmarks"
echo " $0 --serializer protobuf --duration 10"
echo " $0 --filter BM_Fory_Struct_Serialize # Profile specific benchmark"
echo ""
echo "Supported profiling tools (in order of preference):"
echo " - samply (recommended): cargo install samply"
echo " - perf (Linux)"
echo " - sample (macOS built-in)"
echo " - dtrace (macOS, may require sudo)"
exit 0
}
while [[ $# -gt 0 ]]; do
case $1 in
--filter)
FILTER="$2"
shift 2
;;
--data)
DATA="$2"
shift 2
;;
--serializer)
SERIALIZER="$2"
shift 2
;;
--duration)
DURATION="$2"
shift 2
;;
--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--help|-h)
usage
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
usage
;;
esac
done
# Build benchmark filter (only if --filter not provided)
if [[ -z "$FILTER" ]]; then
if [[ -n "$DATA" ]]; then
DATA_CAP="$(echo "${DATA:0:1}" | tr '[:lower:]' '[:upper:]')${DATA:1}"
FILTER="${DATA_CAP}"
fi
if [[ -n "$SERIALIZER" ]]; then
SER_CAP="$(echo "${SERIALIZER:0:1}" | tr '[:lower:]' '[:upper:]')${SERIALIZER:1}"
if [[ -n "$FILTER" ]]; then
FILTER="${SER_CAP}_${FILTER}"
else
FILTER="${SER_CAP}"
fi
fi
fi
# Check if benchmark exists
if [[ ! -f "build/fory_benchmark" ]]; then
echo -e "${RED}Benchmark not found. Run ./run.sh first to build.${NC}"
exit 1
fi
# Find FlameGraph tools
FLAMEGRAPH_DIR=""
if [[ -d "$HOME/FlameGraph" ]]; then
FLAMEGRAPH_DIR="$HOME/FlameGraph"
elif [[ -d "/usr/share/FlameGraph" ]]; then
FLAMEGRAPH_DIR="/usr/share/FlameGraph"
elif command -v flamegraph.pl &> /dev/null; then
FLAMEGRAPH_DIR="PATH"
else
# Auto-install FlameGraph tools
echo -e "${YELLOW}FlameGraph tools not found. Installing to ~/FlameGraph...${NC}"
git clone --depth 1 https://github.com/brendangregg/FlameGraph.git "$HOME/FlameGraph"
FLAMEGRAPH_DIR="$HOME/FlameGraph"
echo -e "${GREEN}FlameGraph installed successfully.${NC}"
fi
# Create output directory
mkdir -p "$OUTPUT_DIR"
cd build
# Build benchmark command
BENCH_CMD="./fory_benchmark --benchmark_min_time=${DURATION}s"
if [[ -n "$FILTER" ]]; then
BENCH_CMD="$BENCH_CMD --benchmark_filter=$FILTER"
fi
echo -e "${GREEN}=== Fory C++ Benchmark Profiler ===${NC}"
echo -e "Filter: ${FILTER:-all}"
echo -e "Duration: ${DURATION}s"
echo -e "Output: ${OUTPUT_DIR}"
echo ""
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# Detect OS
OS_TYPE="$(uname -s)"
# Try different profiling tools
if command -v samply &> /dev/null; then
echo -e "${YELLOW}Profiling with samply...${NC}"
echo -e "Running: samply record $BENCH_CMD"
samply record $BENCH_CMD
echo -e "${GREEN}Done! Samply should have opened in your browser.${NC}"
elif command -v perf &> /dev/null; then
echo -e "${YELLOW}Profiling with perf...${NC}"
PERF_DATA="../${OUTPUT_DIR}/perf_${TIMESTAMP}.data"
FLAMEGRAPH_SVG="../${OUTPUT_DIR}/flamegraph_${TIMESTAMP}.svg"
echo -e "Running: perf record -g --call-graph dwarf -o $PERF_DATA $BENCH_CMD"
perf record -g --call-graph dwarf -o "$PERF_DATA" $BENCH_CMD
echo -e "${GREEN}Profile saved to: $(realpath ${PERF_DATA})${NC}"
# Generate flamegraph SVG
echo -e "${YELLOW}Generating flamegraph SVG...${NC}"
if [[ "$FLAMEGRAPH_DIR" == "PATH" ]]; then
perf script -i "$PERF_DATA" | stackcollapse-perf.pl | flamegraph.pl > "$FLAMEGRAPH_SVG"
else
perf script -i "$PERF_DATA" | "$FLAMEGRAPH_DIR/stackcollapse-perf.pl" | "$FLAMEGRAPH_DIR/flamegraph.pl" > "$FLAMEGRAPH_SVG"
fi
echo -e "${GREEN}Flamegraph saved to: $(realpath ${FLAMEGRAPH_SVG})${NC}"
elif [[ "$OS_TYPE" == "Darwin" ]]; then
# macOS-specific profiling using sample command
echo -e "${YELLOW}Profiling on macOS using sample command...${NC}"
SAMPLE_OUTPUT="../${OUTPUT_DIR}/sample_${TIMESTAMP}.txt"
COLLAPSED_OUTPUT="../${OUTPUT_DIR}/collapsed_${TIMESTAMP}.txt"
FLAMEGRAPH_SVG="../${OUTPUT_DIR}/flamegraph_${TIMESTAMP}.svg"
# Start benchmark in background
echo -e "Starting benchmark: $BENCH_CMD"
$BENCH_CMD &
BENCH_PID=$!
# Wait a moment for the process to start
sleep 0.5
# Check if process is running
if ! kill -0 $BENCH_PID 2>/dev/null; then
echo -e "${RED}Benchmark process failed to start${NC}"
exit 1
fi
echo -e "Sampling process $BENCH_PID for ${DURATION} seconds..."
# Use sample command to profile (built-in on macOS)
sample $BENCH_PID $DURATION -file "$SAMPLE_OUTPUT" 2>/dev/null || true
# Wait for benchmark to complete
wait $BENCH_PID 2>/dev/null || true
if [[ -f "$SAMPLE_OUTPUT" ]]; then
echo -e "${GREEN}Sample output saved to: $(realpath ${SAMPLE_OUTPUT})${NC}"
# Convert sample output to collapsed format for flamegraph
echo -e "${YELLOW}Converting to flamegraph format...${NC}"
# Parse macOS sample output to collapsed stack format
# The sample output has a tree format with indentation and branch indicators
python3 - "$SAMPLE_OUTPUT" "$COLLAPSED_OUTPUT" << 'PYTHON_SCRIPT'
import sys
import re
def parse_sample_output(input_file, output_file):
"""
Parse macOS sample output tree format into collapsed stacks.
Sample format is a tree like:
3718 start (in dyld) + 6076 [0x...]
3718 main (in fory_benchmark) + 136 [0x...]
3718 benchmark::Run() (in fory_benchmark) + 48 [0x...]
+ 3611 BM_Serialize() (in fory_benchmark) + 316 [0x...]
+ ! 3301 fory::serialize() (in fory_benchmark) + 468 [0x...]
"""
stacks = {}
with open(input_file, 'r') as f:
lines = f.readlines()
# Find the "Call graph:" section
in_call_graph = False
stack_lines = []
for line in lines:
if 'Call graph:' in line:
in_call_graph = True
continue
if in_call_graph:
# Stop at next section
if line.strip() and not line.startswith(' ') and not any(c in line for c in ['+', '|', '!']):
if 'Thread_' not in line and 'Total number' in line:
break
stack_lines.append(line)
# Parse the tree structure
# Each frame has format: [indent/branch chars] COUNT FUNC_NAME (in MODULE) + OFFSET [ADDR] [FILE:LINE]
frame_pattern = re.compile(
r'^([\s+!:|]*)' # Branch indicators and indentation
r'(\d+)\s+' # Sample count
r'(.+?)' # Function name
r'\s+\(in\s+([^)]+)\)' # Module name
r'(?:\s+\+\s+[\d,]+)?' # Optional offset
r'(?:\s+\[0x[0-9a-fA-F,]+\])?' # Optional address
r'(?:\s+[\w./]+:\d+)?' # Optional file:line
)
# Track stack at each depth level
current_stack = [] # [(depth, func_name, count), ...]
for line in stack_lines:
match = frame_pattern.match(line)
if not match:
continue
prefix = match.group(1)
count = int(match.group(2))
func_name = match.group(3).strip()
module = match.group(4).strip()
# Skip unknown frames
if func_name == '???' or func_name.startswith('0x'):
continue
# Calculate depth based on prefix length (roughly 2 chars per level)
# Count actual indentation ignoring branch chars
depth = len(prefix.replace('+', ' ').replace('|', ' ').replace('!', ' ').replace(':', ' '))
depth = depth // 2
# Clean up function name for display
func_name = func_name.replace(';', ':')
# Pop stack until we're at the right depth
while current_stack and current_stack[-1][0] >= depth:
current_stack.pop()
# Push current frame
current_stack.append((depth, func_name, count))
# Build stack string (bottom to top for flamegraph)
stack_funcs = [f[1] for f in current_stack]
if stack_funcs:
stack_key = ';'.join(stack_funcs)
# Use the count at this leaf node
stacks[stack_key] = count
# Write collapsed format
with open(output_file, 'w') as f:
for stack, count in sorted(stacks.items(), key=lambda x: -x[1]):
if stack and count > 0:
f.write(f"{stack} {count}\n")
print(f"Extracted {len(stacks)} unique stack traces")
if stacks:
top_stacks = sorted(stacks.items(), key=lambda x: -x[1])[:5]
print("Top 5 hottest stacks:")
for stack, count in top_stacks:
# Show just the last few functions
funcs = stack.split(';')
short = ';'.join(funcs[-3:]) if len(funcs) > 3 else stack
print(f" {count}: ...{short}")
return len(stacks)
if __name__ == '__main__':
parse_sample_output(sys.argv[1], sys.argv[2])
PYTHON_SCRIPT
# Generate flamegraph
if [[ -s "$COLLAPSED_OUTPUT" ]]; then
if [[ "$FLAMEGRAPH_DIR" == "PATH" ]]; then
flamegraph.pl "$COLLAPSED_OUTPUT" > "$FLAMEGRAPH_SVG"
else
"$FLAMEGRAPH_DIR/flamegraph.pl" "$COLLAPSED_OUTPUT" > "$FLAMEGRAPH_SVG"
fi
echo -e "${GREEN}Flamegraph saved to: $(realpath ${FLAMEGRAPH_SVG})${NC}"
# Try to open in browser
if command -v open &> /dev/null; then
echo -e "${YELLOW}Opening flamegraph in browser...${NC}"
open "$FLAMEGRAPH_SVG"
fi
else
echo -e "${YELLOW}Could not generate flamegraph from sample output.${NC}"
echo -e "You can view the raw sample output at: $(realpath ${SAMPLE_OUTPUT})"
echo -e ""
echo -e "${YELLOW}Tip: For better profiling results, consider using samply:${NC}"
echo -e " cargo install samply"
echo -e " ./profile.sh --data struct --serializer fory"
fi
else
echo -e "${RED}Sample command failed to produce output${NC}"
exit 1
fi
else
echo -e "${RED}No profiling tool found. Please install one of:${NC}"
echo " - samply: cargo install samply (recommended, cross-platform)"
if [[ "$OS_TYPE" == "Darwin" ]]; then
echo " - sample: Built-in on macOS (should be available)"
else
echo " - perf (Linux): apt install linux-tools-generic"
fi
exit 1
fi
echo ""
echo -e "${GREEN}=== Profiling complete! ===${NC}"