blob: dee1211a492aac18a65daa584ed71fe380070bb7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <memory>
#include "arrow/buffer.h"
#include "arrow/io/concurrency.h"
#include "arrow/type_fwd.h"
namespace arrow {
namespace cuda {
class CudaContext;
class CudaIpcMemHandle;
/// \class CudaBuffer
/// \brief An Arrow buffer located on a GPU device
/// Be careful using this in any Arrow code which may not be GPU-aware
class ARROW_EXPORT CudaBuffer : public Buffer {
// XXX deprecate?
CudaBuffer(uint8_t* data, int64_t size, const std::shared_ptr<CudaContext>& context,
bool own_data = false, bool is_ipc = false);
CudaBuffer(uintptr_t address, int64_t size, const std::shared_ptr<CudaContext>& context,
bool own_data = false, bool is_ipc = false);
CudaBuffer(const std::shared_ptr<CudaBuffer>& parent, const int64_t offset,
const int64_t size);
/// \brief Convert back generic buffer into CudaBuffer
/// \param[in] buffer buffer to convert
/// \return CudaBuffer or Status
/// \note This function returns an error if the buffer isn't backed
/// by GPU memory
static Result<std::shared_ptr<CudaBuffer>> FromBuffer(std::shared_ptr<Buffer> buffer);
/// \brief Convert back generic buffer into CudaBuffer
/// \param[in] buffer buffer to convert
/// \param[out] out conversion result
/// \return Status
/// \note This function returns an error if the buffer isn't backed
/// by GPU memory
ARROW_DEPRECATED("Use Result-returning version")
static Status FromBuffer(std::shared_ptr<Buffer> buffer,
std::shared_ptr<CudaBuffer>* out);
/// \brief Copy memory from GPU device to CPU host
/// \param[in] position start position inside buffer to copy bytes from
/// \param[in] nbytes number of bytes to copy
/// \param[out] out start address of the host memory area to copy to
/// \return Status
Status CopyToHost(const int64_t position, const int64_t nbytes, void* out) const;
/// \brief Copy memory to device at position
/// \param[in] position start position to copy bytes to
/// \param[in] data the host data to copy
/// \param[in] nbytes number of bytes to copy
/// \return Status
Status CopyFromHost(const int64_t position, const void* data, int64_t nbytes);
/// \brief Copy memory from device to device at position
/// \param[in] position start position inside buffer to copy bytes to
/// \param[in] data start address of the device memory area to copy from
/// \param[in] nbytes number of bytes to copy
/// \return Status
/// \note It is assumed that both source and destination device
/// memories have been allocated within the same context.
Status CopyFromDevice(const int64_t position, const void* data, int64_t nbytes);
/// \brief Copy memory from another device to device at position
/// \param[in] src_ctx context of the source device memory
/// \param[in] position start position inside buffer to copy bytes to
/// \param[in] data start address of the another device memory area to copy from
/// \param[in] nbytes number of bytes to copy
/// \return Status
Status CopyFromAnotherDevice(const std::shared_ptr<CudaContext>& src_ctx,
const int64_t position, const void* data, int64_t nbytes);
/// \brief Expose this device buffer as IPC memory which can be used in other processes
/// \return Handle or Status
/// \note After calling this function, this device memory will not be freed
/// when the CudaBuffer is destructed
virtual Result<std::shared_ptr<CudaIpcMemHandle>> ExportForIpc();
/// \brief Expose this device buffer as IPC memory which can be used in other processes
/// \param[out] handle the exported IPC handle
/// \return Status
/// \note After calling this function, this device memory will not be freed
/// when the CudaBuffer is destructed
ARROW_DEPRECATED("Use Result-returning version")
virtual Status ExportForIpc(std::shared_ptr<CudaIpcMemHandle>* handle);
const std::shared_ptr<CudaContext>& context() const { return context_; }
std::shared_ptr<CudaContext> context_;
bool own_data_;
bool is_ipc_;
virtual Status Close();
/// \class CudaHostBuffer
/// \brief Device-accessible CPU memory created using cudaHostAlloc
class ARROW_EXPORT CudaHostBuffer : public MutableBuffer {
using MutableBuffer::MutableBuffer;
/// \brief Return a device address the GPU can read this memory from.
Result<uintptr_t> GetDeviceAddress(const std::shared_ptr<CudaContext>& ctx);
/// \class CudaIpcHandle
/// \brief A container for a CUDA IPC handle
class ARROW_EXPORT CudaIpcMemHandle {
/// \brief Create CudaIpcMemHandle from opaque buffer (e.g. from another process)
/// \param[in] opaque_handle a CUipcMemHandle as a const void*
/// \return Handle or Status
static Result<std::shared_ptr<CudaIpcMemHandle>> FromBuffer(const void* opaque_handle);
/// \brief Create CudaIpcMemHandle from opaque buffer (e.g. from another process)
/// \param[in] opaque_handle a CUipcMemHandle as a const void*
/// \param[out] handle the CudaIpcMemHandle instance
/// \return Status
ARROW_DEPRECATED("Use Result-returning version")
static Status FromBuffer(const void* opaque_handle,
std::shared_ptr<CudaIpcMemHandle>* handle);
/// \brief Write CudaIpcMemHandle to a Buffer
/// \param[in] pool a MemoryPool to allocate memory from
/// \return Buffer or Status
Result<std::shared_ptr<Buffer>> Serialize(
MemoryPool* pool = default_memory_pool()) const;
/// \brief Write CudaIpcMemHandle to a Buffer
/// \param[in] pool a MemoryPool to allocate memory from
/// \param[out] out the serialized buffer
/// \return Status
ARROW_DEPRECATED("Use Result-returning version")
Status Serialize(MemoryPool* pool, std::shared_ptr<Buffer>* out) const;
explicit CudaIpcMemHandle(const void* handle);
CudaIpcMemHandle(int64_t memory_size, const void* cu_handle);
struct CudaIpcMemHandleImpl;
std::unique_ptr<CudaIpcMemHandleImpl> impl_;
const void* handle() const;
int64_t memory_size() const;
friend CudaBuffer;
friend CudaContext;
/// \class CudaBufferReader
/// \brief File interface for zero-copy read from CUDA buffers
/// CAUTION: reading to a Buffer returns a Buffer pointing to device memory.
/// It will generally not be compatible with Arrow code expecting a buffer
/// pointing to CPU memory.
/// Reading to a raw pointer, though, copies device memory into the host
/// memory pointed to.
class ARROW_EXPORT CudaBufferReader
: public ::arrow::io::internal::RandomAccessFileConcurrencyWrapper<CudaBufferReader> {
explicit CudaBufferReader(const std::shared_ptr<Buffer>& buffer);
bool closed() const override;
bool supports_zero_copy() const override;
std::shared_ptr<CudaBuffer> buffer() const { return buffer_; }
friend ::arrow::io::internal::RandomAccessFileConcurrencyWrapper<CudaBufferReader>;
Status DoClose();
Result<int64_t> DoRead(int64_t nbytes, void* buffer);
Result<std::shared_ptr<Buffer>> DoRead(int64_t nbytes);
Result<int64_t> DoReadAt(int64_t position, int64_t nbytes, void* out);
Result<std::shared_ptr<Buffer>> DoReadAt(int64_t position, int64_t nbytes);
Result<int64_t> DoTell() const;
Status DoSeek(int64_t position);
Result<int64_t> DoGetSize();
Status CheckClosed() const {
if (!is_open_) {
return Status::Invalid("Operation forbidden on closed CudaBufferReader");
return Status::OK();
std::shared_ptr<CudaBuffer> buffer_;
std::shared_ptr<CudaContext> context_;
const uintptr_t address_;
int64_t size_;
int64_t position_;
bool is_open_;
/// \class CudaBufferWriter
/// \brief File interface for writing to CUDA buffers, with optional buffering
class ARROW_EXPORT CudaBufferWriter : public io::WritableFile {
explicit CudaBufferWriter(const std::shared_ptr<CudaBuffer>& buffer);
~CudaBufferWriter() override;
/// \brief Close writer and flush buffered bytes to GPU
Status Close() override;
bool closed() const override;
/// \brief Flush buffered bytes to GPU
Status Flush() override;
Status Seek(int64_t position) override;
Status Write(const void* data, int64_t nbytes) override;
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
Result<int64_t> Tell() const override;
/// \brief Set CPU buffer size to limit calls to cudaMemcpy
/// \param[in] buffer_size the size of CPU buffer to allocate
/// \return Status
/// By default writes are unbuffered
Status SetBufferSize(const int64_t buffer_size);
/// \brief Returns size of host (CPU) buffer, 0 for unbuffered
int64_t buffer_size() const;
/// \brief Returns number of bytes buffered on host
int64_t num_bytes_buffered() const;
class CudaBufferWriterImpl;
std::unique_ptr<CudaBufferWriterImpl> impl_;
/// \brief Allocate CUDA-accessible memory on CPU host
/// The GPU will benefit from fast access to this CPU-located buffer,
/// including fast memory copy.
/// \param[in] device_number device to expose host memory
/// \param[in] size number of bytes
/// \return Host buffer or Status
Result<std::shared_ptr<CudaHostBuffer>> AllocateCudaHostBuffer(int device_number,
const int64_t size);
/// \brief Allocate CUDA-accessible memory on CPU host
/// The GPU will benefit from fast access to this CPU-located buffer,
/// including fast memory copy.
/// \param[in] device_number device to expose host memory
/// \param[in] size number of bytes
/// \param[out] out the allocated buffer
/// \return Status
ARROW_DEPRECATED("Use Result-returning version")
Status AllocateCudaHostBuffer(int device_number, const int64_t size,
std::shared_ptr<CudaHostBuffer>* out);
/// Low-level: get a device address through which the CPU data be accessed.
Result<uintptr_t> GetDeviceAddress(const uint8_t* cpu_data,
const std::shared_ptr<CudaContext>& ctx);
/// Low-level: get a CPU address through which the device data be accessed.
Result<uint8_t*> GetHostAddress(uintptr_t device_ptr);
} // namespace cuda
} // namespace arrow