blob: 868accc37447791fb57643b818ed89e95266f89b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "benchmark/benchmark.h"
#include <cstdint>
#include <string>
#include <vector>
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/trie.h"
namespace arrow {
namespace internal {
std::vector<std::string> AllNulls() {
return {"#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
"1.#QNAN", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"};
}
Trie MakeNullsTrie() {
auto nulls = AllNulls();
TrieBuilder builder;
for (const auto& str : AllNulls()) {
ABORT_NOT_OK(builder.Append(str));
}
return builder.Finish();
}
std::vector<std::string> Expand(const std::vector<std::string>& base, size_t n) {
std::vector<std::string> result;
result.reserve(n);
while (true) {
for (const auto& v : base) {
result.push_back(v);
if (result.size() == n) {
return result;
}
}
}
}
static void BenchmarkTrieLookups(benchmark::State& state, // NOLINT non-const reference
const std::vector<std::string>& strings) {
Trie trie = MakeNullsTrie();
int32_t total = 0;
auto lookups = Expand(strings, 100);
for (auto _ : state) {
for (const auto& s : lookups) {
total += trie.Find(s);
}
}
benchmark::DoNotOptimize(total);
state.SetItemsProcessed(state.iterations() * lookups.size());
}
static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference
BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
}
static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference
BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"});
}
BENCHMARK(TrieLookupFound);
BENCHMARK(TrieLookupNotFound);
#ifdef ARROW_WITH_BENCHMARKS_REFERENCE
static inline bool InlinedNullLookup(util::string_view s) {
// An inlined version of trie lookup for a specific set of strings
// (see AllNulls())
auto size = s.length();
auto data = s.data();
if (size == 0) {
return false;
}
if (size == 1) {
return false;
}
auto chars = reinterpret_cast<const char*>(data);
auto first = chars[0];
auto second = chars[1];
switch (first) {
case 'N': {
// "NA", "N/A", "NaN", "NULL"
if (size == 2) {
return second == 'A';
}
auto third = chars[2];
if (size == 3) {
return (second == '/' && third == 'A') || (second == 'a' && third == 'N');
}
if (size == 4) {
return (second == 'U' && third == 'L' && chars[3] == 'L');
}
return false;
}
case 'n': {
// "n/a", "nan", "null"
if (size == 2) {
return false;
}
auto third = chars[2];
if (size == 3) {
return (second == '/' && third == 'a') || (second == 'a' && third == 'n');
}
if (size == 4) {
return (second == 'u' && third == 'l' && chars[3] == 'l');
}
return false;
}
case '1': {
// '1.#IND', '1.#QNAN'
if (size == 6) {
// '#' is the most unlikely char here, check it first
return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'I' &&
chars[4] == 'N' && chars[5] == 'D');
}
if (size == 7) {
return (chars[2] == '#' && chars[1] == '.' && chars[3] == 'Q' &&
chars[4] == 'N' && chars[5] == 'A' && chars[6] == 'N');
}
return false;
}
case '-': {
switch (second) {
case 'N':
// "-NaN"
return (size == 4 && chars[2] == 'a' && chars[3] == 'N');
case 'n':
// "-nan"
return (size == 4 && chars[2] == 'a' && chars[3] == 'n');
case '1':
// "-1.#IND", "-1.#QNAN"
if (size == 7) {
return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'I' &&
chars[5] == 'N' && chars[6] == 'D');
}
if (size == 8) {
return (chars[3] == '#' && chars[2] == '.' && chars[4] == 'Q' &&
chars[5] == 'N' && chars[6] == 'A' && chars[7] == 'N');
}
return false;
default:
return false;
}
}
case '#': {
// "#N/A", "#N/A N/A", "#NA"
if (size < 3 || chars[1] != 'N') {
return false;
}
auto third = chars[2];
if (size == 3) {
return third == 'A';
}
if (size == 4) {
return third == '/' && chars[3] == 'A';
}
if (size == 8) {
return std::memcmp(data + 2, "/A N/A", 5) == 0;
}
return false;
}
default:
return false;
}
}
static void BenchmarkInlinedTrieLookups(
benchmark::State& state, // NOLINT non-const reference
const std::vector<std::string>& strings) {
int32_t total = 0;
auto lookups = Expand(strings, 100);
for (auto _ : state) {
for (const auto& s : lookups) {
total += InlinedNullLookup(s);
}
}
benchmark::DoNotOptimize(total);
state.SetItemsProcessed(state.iterations() * lookups.size());
}
static void InlinedTrieLookupFound(
benchmark::State& state) { // NOLINT non-const reference
BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"});
}
static void InlinedTrieLookupNotFound(
benchmark::State& state) { // NOLINT non-const reference
BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"});
}
BENCHMARK(InlinedTrieLookupFound);
BENCHMARK(InlinedTrieLookupNotFound);
#endif
} // namespace internal
} // namespace arrow