blob: dbe6b1d4f20927bf8f7f96a2c2ca8cf8c75ae3b5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstdint>
#include <string>
#include <thread>
#include <utility>
#include <vector>
#include <gtest/gtest.h>
#include "arrow/csv/options.h"
#include "arrow/csv/reader.h"
#include "arrow/csv/test_common.h"
#include "arrow/io/interfaces.h"
#include "arrow/io/memory.h"
#include "arrow/status.h"
#include "arrow/table.h"
#include "arrow/testing/future_util.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/future.h"
#include "arrow/util/thread_pool.h"
namespace arrow {
namespace csv {
// Allows the streaming reader to be used in tests that expect a table reader
class StreamingReaderAsTableReader : public TableReader {
public:
explicit StreamingReaderAsTableReader(std::shared_ptr<StreamingReader> reader)
: reader_(std::move(reader)) {}
virtual ~StreamingReaderAsTableReader() = default;
virtual Result<std::shared_ptr<Table>> Read() {
std::shared_ptr<Table> table;
RETURN_NOT_OK(reader_->ReadAll(&table));
return table;
}
virtual Future<std::shared_ptr<Table>> ReadAsync() {
return Future<std::shared_ptr<Table>>::MakeFinished(Read());
}
private:
std::shared_ptr<StreamingReader> reader_;
};
using TableReaderFactory =
std::function<Result<std::shared_ptr<TableReader>>(std::shared_ptr<io::InputStream>)>;
void StressTableReader(TableReaderFactory reader_factory) {
#ifdef ARROW_VALGRIND
const int NTASKS = 10;
const int NROWS = 100;
#else
const int NTASKS = 100;
const int NROWS = 1000;
#endif
ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS));
std::vector<Future<std::shared_ptr<Table>>> task_futures(NTASKS);
for (int i = 0; i < NTASKS; i++) {
auto input = std::make_shared<io::BufferReader>(table_buffer);
ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
task_futures[i] = reader->ReadAsync();
}
auto combined_future = All(task_futures);
combined_future.Wait();
ASSERT_OK_AND_ASSIGN(std::vector<Result<std::shared_ptr<Table>>> results,
combined_future.result());
for (auto&& result : results) {
ASSERT_OK_AND_ASSIGN(auto table, result);
ASSERT_EQ(NROWS, table->num_rows());
}
}
void StressInvalidTableReader(TableReaderFactory reader_factory) {
#ifdef ARROW_VALGRIND
const int NTASKS = 10;
const int NROWS = 100;
#else
const int NTASKS = 100;
const int NROWS = 1000;
#endif
ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS, false));
std::vector<Future<std::shared_ptr<Table>>> task_futures(NTASKS);
for (int i = 0; i < NTASKS; i++) {
auto input = std::make_shared<io::BufferReader>(table_buffer);
ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
task_futures[i] = reader->ReadAsync();
}
auto combined_future = All(task_futures);
combined_future.Wait();
ASSERT_OK_AND_ASSIGN(std::vector<Result<std::shared_ptr<Table>>> results,
combined_future.result());
for (auto&& result : results) {
ASSERT_RAISES(Invalid, result);
}
}
void TestNestedParallelism(std::shared_ptr<internal::ThreadPool> thread_pool,
TableReaderFactory reader_factory) {
const int NROWS = 1000;
ASSERT_OK_AND_ASSIGN(auto table_buffer, MakeSampleCsvBuffer(NROWS));
auto input = std::make_shared<io::BufferReader>(table_buffer);
ASSERT_OK_AND_ASSIGN(auto reader, reader_factory(input));
Future<std::shared_ptr<Table>> table_future;
auto read_task = [&reader, &table_future]() mutable {
table_future = reader->ReadAsync();
return Status::OK();
};
ASSERT_OK_AND_ASSIGN(auto future, thread_pool->Submit(read_task));
ASSERT_FINISHES_OK(future);
ASSERT_FINISHES_OK_AND_ASSIGN(auto table, table_future);
ASSERT_EQ(table->num_rows(), NROWS);
} // namespace csv
TableReaderFactory MakeSerialFactory() {
return [](std::shared_ptr<io::InputStream> input_stream) {
auto read_options = ReadOptions::Defaults();
read_options.block_size = 1 << 10;
read_options.use_threads = false;
return TableReader::Make(io::default_io_context(), input_stream, read_options,
ParseOptions::Defaults(), ConvertOptions::Defaults());
};
}
TEST(SerialReaderTests, Stress) { StressTableReader(MakeSerialFactory()); }
TEST(SerialReaderTests, StressInvalid) { StressInvalidTableReader(MakeSerialFactory()); }
TEST(SerialReaderTests, NestedParallelism) {
ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
TestNestedParallelism(thread_pool, MakeSerialFactory());
}
Result<TableReaderFactory> MakeAsyncFactory(
std::shared_ptr<internal::ThreadPool> thread_pool = nullptr) {
if (!thread_pool) {
ARROW_ASSIGN_OR_RAISE(thread_pool, internal::ThreadPool::Make(1));
}
return [thread_pool](std::shared_ptr<io::InputStream> input_stream)
-> Result<std::shared_ptr<TableReader>> {
ReadOptions read_options = ReadOptions::Defaults();
read_options.use_threads = true;
read_options.block_size = 1 << 10;
auto table_reader =
TableReader::Make(io::IOContext(thread_pool.get()), input_stream, read_options,
ParseOptions::Defaults(), ConvertOptions::Defaults());
return table_reader;
};
}
TEST(AsyncReaderTests, Stress) {
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
StressTableReader(table_factory);
}
TEST(AsyncReaderTests, StressInvalid) {
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory());
StressInvalidTableReader(table_factory);
}
TEST(AsyncReaderTests, NestedParallelism) {
ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeAsyncFactory(thread_pool));
TestNestedParallelism(thread_pool, table_factory);
}
Result<TableReaderFactory> MakeStreamingFactory() {
return [](std::shared_ptr<io::InputStream> input_stream)
-> Result<std::shared_ptr<TableReader>> {
auto read_options = ReadOptions::Defaults();
read_options.block_size = 1 << 10;
ARROW_ASSIGN_OR_RAISE(
auto streaming_reader,
StreamingReader::Make(io::default_io_context(), input_stream, read_options,
ParseOptions::Defaults(), ConvertOptions::Defaults()));
return std::make_shared<StreamingReaderAsTableReader>(std::move(streaming_reader));
};
}
TEST(StreamingReaderTests, Stress) {
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
StressTableReader(table_factory);
}
TEST(StreamingReaderTests, StressInvalid) {
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
StressInvalidTableReader(table_factory);
}
TEST(StreamingReaderTests, NestedParallelism) {
ASSERT_OK_AND_ASSIGN(auto thread_pool, internal::ThreadPool::Make(1));
ASSERT_OK_AND_ASSIGN(auto table_factory, MakeStreamingFactory());
TestNestedParallelism(thread_pool, table_factory);
}
} // namespace csv
} // namespace arrow