blob: c113c245fff3a9f9e0f3da0c46210445b44c9231 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "benchmark/benchmark.h"
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <type_traits>
#include <vector>
#include "arrow/status.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/random.h"
#include "arrow/type.h"
#include "arrow/util/formatting.h"
#include "arrow/util/string_view.h"
#include "arrow/util/value_parsing.h"
namespace arrow {
namespace internal {
template <typename c_int>
static std::vector<std::string> MakeIntStrings(int32_t num_items) {
using c_int_limits = std::numeric_limits<c_int>;
std::vector<std::string> base_strings = {"0",
"5",
c_int_limits::is_signed ? "-12" : "12",
"34",
"99",
c_int_limits::is_signed ? "-111" : "111",
std::to_string(c_int_limits::min()),
std::to_string(c_int_limits::max())};
std::vector<std::string> strings;
for (int32_t i = 0; i < num_items; ++i) {
strings.push_back(base_strings[i % base_strings.size()]);
}
return strings;
}
static std::vector<std::string> MakeFloatStrings(int32_t num_items) {
std::vector<std::string> base_strings = {"0.0", "5", "-12.3",
"98765430000", "3456.789", "0.0012345",
"2.34567e8", "-5.67e-8"};
std::vector<std::string> strings;
for (int32_t i = 0; i < num_items; ++i) {
strings.push_back(base_strings[i % base_strings.size()]);
}
return strings;
}
static std::vector<std::string> MakeTimestampStrings(int32_t num_items) {
std::vector<std::string> base_strings = {"2018-11-13 17:11:10", "2018-11-13 11:22:33",
"2016-02-29 11:22:33"};
std::vector<std::string> strings;
for (int32_t i = 0; i < num_items; ++i) {
strings.push_back(base_strings[i % base_strings.size()]);
}
return strings;
}
template <typename c_int, typename c_int_limits = std::numeric_limits<c_int>>
static typename std::enable_if<c_int_limits::is_signed, std::vector<c_int>>::type
MakeInts(int32_t num_items) {
std::vector<c_int> out;
// C++ doesn't guarantee that all integer types support std::uniform_int_distribution,
// so use a known type (int64_t)
randint<int64_t, c_int>(num_items, c_int_limits::min(), c_int_limits::max(), &out);
return out;
}
template <typename c_int, typename c_int_limits = std::numeric_limits<c_int>>
static typename std::enable_if<!c_int_limits::is_signed, std::vector<c_int>>::type
MakeInts(int32_t num_items) {
std::vector<c_int> out;
// See above.
randint<uint64_t, c_int>(num_items, c_int_limits::min(), c_int_limits::max(), &out);
return out;
}
template <typename c_float>
static std::vector<c_float> MakeFloats(int32_t num_items) {
std::vector<c_float> out;
random_real<double, c_float>(num_items, /*seed =*/42, -1e10, 1e10, &out);
return out;
}
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void IntegerParsing(benchmark::State& state) { // NOLINT non-const reference
auto strings = MakeIntStrings<C_TYPE>(1000);
while (state.KeepRunning()) {
C_TYPE total = 0;
for (const auto& s : strings) {
C_TYPE value;
if (!ParseValue<ARROW_TYPE>(s.data(), s.length(), &value)) {
std::cerr << "Conversion failed for '" << s << "'";
std::abort();
}
total = static_cast<C_TYPE>(total + value);
}
benchmark::DoNotOptimize(total);
}
state.SetItemsProcessed(state.iterations() * strings.size());
}
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void FloatParsing(benchmark::State& state) { // NOLINT non-const reference
auto strings = MakeFloatStrings(1000);
while (state.KeepRunning()) {
C_TYPE total = 0;
for (const auto& s : strings) {
C_TYPE value;
if (!ParseValue<ARROW_TYPE>(s.data(), s.length(), &value)) {
std::cerr << "Conversion failed for '" << s << "'";
std::abort();
}
total += value;
}
benchmark::DoNotOptimize(total);
}
state.SetItemsProcessed(state.iterations() * strings.size());
}
static void BenchTimestampParsing(
benchmark::State& state, TimeUnit::type unit,
const TimestampParser& parser) { // NOLINT non-const reference
using c_type = TimestampType::c_type;
auto strings = MakeTimestampStrings(1000);
for (auto _ : state) {
c_type total = 0;
for (const auto& s : strings) {
c_type value;
if (!parser(s.data(), s.length(), unit, &value)) {
std::cerr << "Conversion failed for '" << s << "'";
std::abort();
}
total += value;
}
benchmark::DoNotOptimize(total);
}
state.SetItemsProcessed(state.iterations() * strings.size());
}
template <TimeUnit::type UNIT>
static void TimestampParsingISO8601(
benchmark::State& state) { // NOLINT non-const reference
auto parser = TimestampParser::MakeISO8601();
BenchTimestampParsing(state, UNIT, *parser);
}
template <TimeUnit::type UNIT>
static void TimestampParsingStrptime(
benchmark::State& state) { // NOLINT non-const reference
auto parser = TimestampParser::MakeStrptime("%Y-%m-%d %H:%M:%S");
BenchTimestampParsing(state, UNIT, *parser);
}
struct DummyAppender {
Status operator()(util::string_view v) {
if (pos_ >= static_cast<int32_t>(v.size())) {
pos_ = 0;
}
total_ += v[pos_++];
return Status::OK();
}
int64_t total_ = 0;
int32_t pos_ = 0;
};
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void IntegerFormatting(benchmark::State& state) { // NOLINT non-const reference
std::vector<C_TYPE> values = MakeInts<C_TYPE>(1000);
StringFormatter<ARROW_TYPE> formatter;
while (state.KeepRunning()) {
DummyAppender appender;
for (const auto value : values) {
ABORT_NOT_OK(formatter(value, appender));
}
benchmark::DoNotOptimize(appender.total_);
}
state.SetItemsProcessed(state.iterations() * values.size());
}
template <typename ARROW_TYPE, typename C_TYPE = typename ARROW_TYPE::c_type>
static void FloatFormatting(benchmark::State& state) { // NOLINT non-const reference
std::vector<C_TYPE> values = MakeFloats<C_TYPE>(1000);
StringFormatter<ARROW_TYPE> formatter;
while (state.KeepRunning()) {
DummyAppender appender;
for (const auto value : values) {
ABORT_NOT_OK(formatter(value, appender));
}
benchmark::DoNotOptimize(appender.total_);
}
state.SetItemsProcessed(state.iterations() * values.size());
}
BENCHMARK_TEMPLATE(IntegerParsing, Int8Type);
BENCHMARK_TEMPLATE(IntegerParsing, Int16Type);
BENCHMARK_TEMPLATE(IntegerParsing, Int32Type);
BENCHMARK_TEMPLATE(IntegerParsing, Int64Type);
BENCHMARK_TEMPLATE(IntegerParsing, UInt8Type);
BENCHMARK_TEMPLATE(IntegerParsing, UInt16Type);
BENCHMARK_TEMPLATE(IntegerParsing, UInt32Type);
BENCHMARK_TEMPLATE(IntegerParsing, UInt64Type);
BENCHMARK_TEMPLATE(FloatParsing, FloatType);
BENCHMARK_TEMPLATE(FloatParsing, DoubleType);
BENCHMARK_TEMPLATE(TimestampParsingISO8601, TimeUnit::SECOND);
BENCHMARK_TEMPLATE(TimestampParsingISO8601, TimeUnit::MILLI);
BENCHMARK_TEMPLATE(TimestampParsingISO8601, TimeUnit::MICRO);
BENCHMARK_TEMPLATE(TimestampParsingISO8601, TimeUnit::NANO);
BENCHMARK_TEMPLATE(TimestampParsingStrptime, TimeUnit::MILLI);
BENCHMARK_TEMPLATE(IntegerFormatting, Int8Type);
BENCHMARK_TEMPLATE(IntegerFormatting, Int16Type);
BENCHMARK_TEMPLATE(IntegerFormatting, Int32Type);
BENCHMARK_TEMPLATE(IntegerFormatting, Int64Type);
BENCHMARK_TEMPLATE(IntegerFormatting, UInt8Type);
BENCHMARK_TEMPLATE(IntegerFormatting, UInt16Type);
BENCHMARK_TEMPLATE(IntegerFormatting, UInt32Type);
BENCHMARK_TEMPLATE(IntegerFormatting, UInt64Type);
BENCHMARK_TEMPLATE(FloatFormatting, FloatType);
BENCHMARK_TEMPLATE(FloatFormatting, DoubleType);
} // namespace internal
} // namespace arrow