cpp/src/arrow/gpu/cuda_benchmark.cc - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "benchmark/benchmark.h"

 #include <cstdint>
 #include <memory>
 #include <vector>

 #include "arrow/array.h"
 #include "arrow/memory_pool.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"

 #include "arrow/gpu/cuda_api.h"

 namespace arrow {
 namespace cuda {

 constexpr int64_t kGpuNumber = 0;

 static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t total_bytes,
                                       const int64_t chunksize,
                                       const int64_t buffer_size) {
   CudaDeviceManager* manager;
   ABORT_NOT_OK(CudaDeviceManager::Instance().Value(&manager));
   std::shared_ptr<CudaContext> context;
   ABORT_NOT_OK(manager->GetContext(kGpuNumber).Value(&context));

   std::shared_ptr<CudaBuffer> device_buffer;
   ABORT_NOT_OK(context->Allocate(total_bytes).Value(&device_buffer));
   CudaBufferWriter writer(device_buffer);

   if (buffer_size > 0) {
     ABORT_NOT_OK(writer.SetBufferSize(buffer_size));
   }

   std::shared_ptr<ResizableBuffer> buffer;
   ASSERT_OK(MakeRandomByteBuffer(total_bytes, default_memory_pool(), &buffer));

   const uint8_t* host_data = buffer->data();
   while (state.KeepRunning()) {
     int64_t bytes_written = 0;
     ABORT_NOT_OK(writer.Seek(0));
     while (bytes_written < total_bytes) {
       int64_t bytes_to_write = std::min(chunksize, total_bytes - bytes_written);
       ABORT_NOT_OK(writer.Write(host_data + bytes_written, bytes_to_write));
       bytes_written += bytes_to_write;
     }
   }
   state.SetBytesProcessed(int64_t(state.iterations()) * total_bytes);
 }

 static void Writer_Buffered(benchmark::State& state) {
   // 128MB
   const int64_t kTotalBytes = 1 << 27;

   // 8MB
   const int64_t kBufferSize = 1 << 23;

   CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), kBufferSize);
 }

 static void Writer_Unbuffered(benchmark::State& state) {
   // 128MB
   const int64_t kTotalBytes = 1 << 27;
   CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), 0);
 }

 // Vary chunk write size from 256 bytes to 64K
 BENCHMARK(Writer_Buffered)->RangeMultiplier(16)->Range(1 << 8, 1 << 16)->UseRealTime();

 BENCHMARK(Writer_Unbuffered)
     ->RangeMultiplier(4)
     ->RangeMultiplier(16)
     ->Range(1 << 8, 1 << 16)
     ->UseRealTime();

 }  // namespace cuda
 }  // namespace arrow
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "benchmark/benchmark.h"

	#include <cstdint>
	#include <memory>
	#include <vector>

	#include "arrow/array.h"
	#include "arrow/memory_pool.h"
	#include "arrow/testing/gtest_util.h"
	#include "arrow/testing/util.h"

	#include "arrow/gpu/cuda_api.h"

	namespace arrow {
	namespace cuda {

	constexpr int64_t kGpuNumber = 0;

	static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t total_bytes,
	const int64_t chunksize,
	const int64_t buffer_size) {
	CudaDeviceManager* manager;
	ABORT_NOT_OK(CudaDeviceManager::Instance().Value(&manager));
	std::shared_ptr<CudaContext> context;
	ABORT_NOT_OK(manager->GetContext(kGpuNumber).Value(&context));

	std::shared_ptr<CudaBuffer> device_buffer;
	ABORT_NOT_OK(context->Allocate(total_bytes).Value(&device_buffer));
	CudaBufferWriter writer(device_buffer);

	if (buffer_size > 0) {
	ABORT_NOT_OK(writer.SetBufferSize(buffer_size));
	}

	std::shared_ptr<ResizableBuffer> buffer;
	ASSERT_OK(MakeRandomByteBuffer(total_bytes, default_memory_pool(), &buffer));

	const uint8_t* host_data = buffer->data();
	while (state.KeepRunning()) {
	int64_t bytes_written = 0;
	ABORT_NOT_OK(writer.Seek(0));
	while (bytes_written < total_bytes) {
	int64_t bytes_to_write = std::min(chunksize, total_bytes - bytes_written);
	ABORT_NOT_OK(writer.Write(host_data + bytes_written, bytes_to_write));
	bytes_written += bytes_to_write;
	}
	}
	state.SetBytesProcessed(int64_t(state.iterations()) * total_bytes);
	}

	static void Writer_Buffered(benchmark::State& state) {
	// 128MB
	const int64_t kTotalBytes = 1 << 27;

	// 8MB
	const int64_t kBufferSize = 1 << 23;

	CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), kBufferSize);
	}

	static void Writer_Unbuffered(benchmark::State& state) {
	// 128MB
	const int64_t kTotalBytes = 1 << 27;
	CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), 0);
	}

	// Vary chunk write size from 256 bytes to 64K
	BENCHMARK(Writer_Buffered)->RangeMultiplier(16)->Range(1 << 8, 1 << 16)->UseRealTime();

	BENCHMARK(Writer_Unbuffered)
	->RangeMultiplier(4)
	->RangeMultiplier(16)
	->Range(1 << 8, 1 << 16)
	->UseRealTime();

	} // namespace cuda
	} // namespace arrow