blob: 32ac5a7df3b97cbb30d242a8343e8930919c6388 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "benchmark/benchmark.h"
#include "arrow/compute/api_scalar.h"
#include "arrow/compute/kernels/test_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/util/benchmark_util.h"
namespace arrow {
namespace compute {
constexpr auto kSeed = 0x94378165;
static void UnaryStringBenchmark(benchmark::State& state, const std::string& func_name,
const FunctionOptions* options = nullptr) {
const int64_t array_length = 1 << 20;
const int64_t value_min_size = 0;
const int64_t value_max_size = 32;
const double null_probability = 0.01;
random::RandomArrayGenerator rng(kSeed);
// NOTE: this produces only-Ascii data
auto values =
rng.String(array_length, value_min_size, value_max_size, null_probability);
// Make sure lookup tables are initialized before measuring
ABORT_NOT_OK(CallFunction(func_name, {values}, options));
for (auto _ : state) {
ABORT_NOT_OK(CallFunction(func_name, {values}, options));
}
state.SetItemsProcessed(state.iterations() * array_length);
state.SetBytesProcessed(state.iterations() * values->data()->buffers[2]->size());
}
static void AsciiLower(benchmark::State& state) {
UnaryStringBenchmark(state, "ascii_lower");
}
static void AsciiUpper(benchmark::State& state) {
UnaryStringBenchmark(state, "ascii_upper");
}
static void IsAlphaNumericAscii(benchmark::State& state) {
UnaryStringBenchmark(state, "ascii_is_alnum");
}
static void MatchSubstring(benchmark::State& state) {
MatchSubstringOptions options("abac");
UnaryStringBenchmark(state, "match_substring", &options);
}
static void SplitPattern(benchmark::State& state) {
SplitPatternOptions options("a");
UnaryStringBenchmark(state, "split_pattern", &options);
}
static void TrimSingleAscii(benchmark::State& state) {
TrimOptions options("a");
UnaryStringBenchmark(state, "ascii_trim", &options);
}
static void TrimManyAscii(benchmark::State& state) {
TrimOptions options("abcdefgABCDEFG");
UnaryStringBenchmark(state, "ascii_trim", &options);
}
#ifdef ARROW_WITH_UTF8PROC
static void Utf8Upper(benchmark::State& state) {
UnaryStringBenchmark(state, "utf8_upper");
}
static void Utf8Lower(benchmark::State& state) {
UnaryStringBenchmark(state, "utf8_lower");
}
static void IsAlphaNumericUnicode(benchmark::State& state) {
UnaryStringBenchmark(state, "utf8_is_alnum");
}
static void TrimSingleUtf8(benchmark::State& state) {
TrimOptions options("a");
UnaryStringBenchmark(state, "utf8_trim", &options);
}
static void TrimManyUtf8(benchmark::State& state) {
TrimOptions options("abcdefgABCDEFG");
UnaryStringBenchmark(state, "utf8_trim", &options);
}
#endif
BENCHMARK(AsciiLower);
BENCHMARK(AsciiUpper);
BENCHMARK(IsAlphaNumericAscii);
BENCHMARK(MatchSubstring);
BENCHMARK(SplitPattern);
BENCHMARK(TrimSingleAscii);
BENCHMARK(TrimManyAscii);
#ifdef ARROW_WITH_UTF8PROC
BENCHMARK(Utf8Lower);
BENCHMARK(Utf8Upper);
BENCHMARK(IsAlphaNumericUnicode);
BENCHMARK(TrimSingleUtf8);
BENCHMARK(TrimManyUtf8);
#endif
} // namespace compute
} // namespace arrow