blob: bb84f8eb59c9df4a0e117b6a8189223a4da145d1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "benchmark/benchmark.h"
#include <sstream>
#include <string>
#include "arrow/csv/chunker.h"
#include "arrow/csv/options.h"
#include "arrow/csv/parser.h"
#include "arrow/testing/gtest_util.h"
namespace arrow {
namespace csv {
// Linter stipulates:
// >> For a static/global string constant, use a C style string instead
const char* one_row = "abc,\"d,f\",12.34,\n";
const char* one_row_escaped = "abc,d\\,f,12.34,\n";
const auto num_rows = static_cast<int32_t>((1024 * 64) / strlen(one_row));
static std::string BuildCSVData(const std::string& row, int32_t repeat) {
std::stringstream ss;
for (int32_t i = 0; i < repeat; ++i) {
ss << row;
}
return ss.str();
}
static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const reference
const std::string& csv, ParseOptions options) {
Chunker chunker(options);
const uint32_t csv_size = static_cast<uint32_t>(csv.size());
while (state.KeepRunning()) {
uint32_t chunk_size = 0;
ABORT_NOT_OK(chunker.Process(csv.data(), csv_size, &chunk_size));
benchmark::DoNotOptimize(chunk_size);
}
state.SetBytesProcessed(state.iterations() * csv_size);
}
static void ChunkCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference
auto csv = BuildCSVData(one_row, num_rows);
auto options = ParseOptions::Defaults();
options.quoting = true;
options.escaping = false;
options.newlines_in_values = true;
BenchmarkCSVChunking(state, csv, options);
}
static void ChunkCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference
auto csv = BuildCSVData(one_row_escaped, num_rows);
auto options = ParseOptions::Defaults();
options.quoting = false;
options.escaping = true;
options.newlines_in_values = true;
BenchmarkCSVChunking(state, csv, options);
}
static void ChunkCSVNoNewlinesBlock(
benchmark::State& state) { // NOLINT non-const reference
auto csv = BuildCSVData(one_row_escaped, num_rows);
auto options = ParseOptions::Defaults();
options.quoting = true;
options.escaping = false;
options.newlines_in_values = false;
BenchmarkCSVChunking(state, csv, options);
// Provides better regression stability with timings rather than bogus
// bandwidth.
state.SetBytesProcessed(0);
}
static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const reference
const std::string& csv, int32_t rows,
ParseOptions options) {
BlockParser parser(options, -1, rows + 1);
const uint32_t csv_size = static_cast<uint32_t>(csv.size());
while (state.KeepRunning()) {
uint32_t parsed_size = 0;
ABORT_NOT_OK(parser.Parse(csv.data(), csv_size, &parsed_size));
// Include performance of visiting the parsed values, as that might
// vary depending on the parser's internal data structures.
bool dummy_quoted = false;
uint32_t dummy_size = 0;
auto visit = [&](const uint8_t* data, uint32_t size, bool quoted) {
dummy_size += size;
dummy_quoted ^= quoted;
return Status::OK();
};
for (int32_t col = 0; col < parser.num_cols(); ++col) {
ABORT_NOT_OK(parser.VisitColumn(col, visit));
benchmark::DoNotOptimize(dummy_size);
benchmark::DoNotOptimize(dummy_quoted);
}
}
state.SetBytesProcessed(state.iterations() * csv_size);
}
static void ParseCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference
auto csv = BuildCSVData(one_row, num_rows);
auto options = ParseOptions::Defaults();
options.quoting = true;
options.escaping = false;
BenchmarkCSVParsing(state, csv, num_rows, options);
}
static void ParseCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference
auto csv = BuildCSVData(one_row_escaped, num_rows);
auto options = ParseOptions::Defaults();
options.quoting = false;
options.escaping = true;
BenchmarkCSVParsing(state, csv, num_rows, options);
}
BENCHMARK(ChunkCSVQuotedBlock);
BENCHMARK(ChunkCSVEscapedBlock);
BENCHMARK(ChunkCSVNoNewlinesBlock);
BENCHMARK(ParseCSVQuotedBlock);
BENCHMARK(ParseCSVEscapedBlock);
} // namespace csv
} // namespace arrow