blob: c6a2ce3d28d09afbb3e4dbce9fccc0811f57dc88 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file tvm/runtime/device_api.h
* \brief Abstract device memory management API
*/
#ifndef TVM_RUNTIME_DEVICE_API_H_
#define TVM_RUNTIME_DEVICE_API_H_
#include <tvm/runtime/c_runtime_api.h>
#include <tvm/runtime/packed_func.h>
#include <string>
namespace tvm {
namespace runtime {
/*!
* \brief the query type into GetAttr
*/
enum DeviceAttrKind : int {
kExist = 0,
kMaxThreadsPerBlock = 1,
kWarpSize = 2,
kMaxSharedMemoryPerBlock = 3,
kComputeVersion = 4,
kDeviceName = 5,
kMaxClockRate = 6,
kMultiProcessorCount = 7,
kMaxThreadDimensions = 8,
kMaxRegistersPerBlock = 9,
kGcnArch = 10,
kApiVersion = 11
};
/*! \brief Number of bytes each allocation must align to */
constexpr int kAllocAlignment = 128;
/*! \brief Number of bytes each allocation must align to in temporary allocation */
constexpr int kTempAllocaAlignment = 128;
/*! \brief Maximum size that can be allocated on stack */
constexpr int kMaxStackAlloca = 1024;
/*!
* \brief TVM Runtime Device API, abstracts the device
* specific interface for memory management.
*/
class TVM_DLL DeviceAPI {
public:
/*! \brief virtual destructor */
virtual ~DeviceAPI() {}
/*!
* \brief Set the environment device id to ctx
* \param ctx The context to be set.
*/
virtual void SetDevice(TVMContext ctx) = 0;
/*!
* \brief Get attribute of specified device.
* \param ctx The device context
* \param kind The result kind
* \param rv The return value.
* \sa DeviceAttrKind
*/
virtual void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) = 0;
/*!
* \brief Allocate a data space on device.
* \param ctx The device context to perform operation.
* \param nbytes The number of bytes in memory.
* \param alignment The alignment of the memory.
* \param type_hint The type of elements. Only needed by certain backends such
* as OpenGL, as nbytes & alignment are sufficient for most backends.
* \return The allocated device pointer.
*/
virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
DLDataType type_hint) = 0;
/*!
* \brief Free a data space on device.
* \param ctx The device context to perform operation.
* \param ptr The data space.
*/
virtual void FreeDataSpace(TVMContext ctx, void* ptr) = 0;
/*!
* \brief copy data from one place to another
* \param from The source array.
* \param from_offset The byte offeset in the from.
* \param to The target array.
* \param to_offset The byte offset in the to.
* \param num_bytes The size of the memory in bytes
* \param ctx_from The source context
* \param ctx_to The target context
* \param type_hint The type of elements, only neded by certain backends.
* can be useful for cross device endian converison.
* \param stream Optional stream object.
*/
virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
DLDataType type_hint, TVMStreamHandle stream) = 0;
/*!
* \brief Create a new stream of execution.
*
* \param ctx The context of allocation.
*/
virtual TVMStreamHandle CreateStream(TVMContext ctx);
/*!
* \brief Free a stream of execution
*
* \param ctx The context of the stream
* \param stream The pointer to be freed.
*/
virtual void FreeStream(TVMContext ctx, TVMStreamHandle stream);
/*!
* \brief Synchronize the stream
* \param ctx The context to perform operation.
* \param stream The stream to be sync.
*/
virtual void StreamSync(TVMContext ctx, TVMStreamHandle stream) = 0;
/*!
* \brief Set the stream
* \param ctx The context to set stream.
* \param stream The stream to be set.
*/
virtual void SetStream(TVMContext ctx, TVMStreamHandle stream) {}
/*!
* \brief Synchronize 2 streams of execution.
*
* An event is created in event_src stream that the second then
* stream waits on. Neither event_src or event_dst need to be of
* the same device ID as the context, but they must be of the same
* device type.
*
* \param ctx The context of the streams.
* \param event_src The source stream to synchronize.
* \param event_dst The destination stream to synchronize.
*/
virtual void SyncStreamFromTo(TVMContext ctx, TVMStreamHandle event_src,
TVMStreamHandle event_dst);
/*!
* \brief Allocate temporal workspace for backend execution.
*
* \note We have the following assumption about backend temporal
* workspace allocation, and backend will optimize for such assumption:
*
* - Only a few allocation will happen, and space will be released after use.
* - The release order is usually in reverse order of allocate (stack style).
* - Repeative pattern of same allocations over different runs.
* - Workspace should not overlap between different threads(i.e. be threadlocal)
*
* \param ctx The context of allocation.
* \param nbytes The size to be allocated.
* \param type_hint The type of elements. Only needed by certain backends such
* as OpenGL, as nbytes is sufficient for most backends.
*/
virtual void* AllocWorkspace(TVMContext ctx, size_t nbytes, DLDataType type_hint = {});
/*!
* \brief Free temporal workspace in backend execution.
*
* \param ctx The context of allocation.
* \param ptr The pointer to be freed.
*/
virtual void FreeWorkspace(TVMContext ctx, void* ptr);
/*!
* \brief Get device API based on context.
* \param ctx The context
* \param allow_missing Whether allow missing
* \return The corresponding device API.
*/
static DeviceAPI* Get(TVMContext ctx, bool allow_missing = false);
/*!
* \brief Whether a certian device type requires set device context
* before launching the kernel function.
* \param device_type The device type.
*/
static bool NeedSetDeviceContext(int device_type) {
return device_type != kDLCPU && device_type != kDLMicroDev;
}
};
/*! \brief The device type bigger than this is RPC device */
constexpr int kRPCSessMask = 128;
/*!
* \brief The name of Device API factory.
* \param type The device type.
* \return the device name.
*/
inline const char* DeviceName(int type) {
switch (type) {
case kDLCPU:
return "cpu";
case kDLGPU:
return "gpu";
case kDLCPUPinned:
return "cpu_pinned";
case kDLOpenCL:
return "opencl";
case kDLSDAccel:
return "sdaccel";
case kDLAOCL:
return "aocl";
case kDLVulkan:
return "vulkan";
case kDLMetal:
return "metal";
case kDLVPI:
return "vpi";
case kDLROCM:
return "rocm";
case kDLExtDev:
return "ext_dev";
case kDLWebGPU:
return "webgpu";
case kDLMicroDev:
return "micro_dev";
case kDLHexagon:
return "hexagon";
default:
LOG(FATAL) << "unknown type =" << type;
return "Unknown";
}
}
inline std::ostream& operator<<(std::ostream& os, DLContext ctx) { // NOLINT(*)
int device_type = static_cast<int>(ctx.device_type);
if (device_type > kRPCSessMask) {
os << "remote[" << (device_type / kRPCSessMask) << "]-";
device_type = device_type % kRPCSessMask;
}
os << runtime::DeviceName(device_type) << "(" << ctx.device_id << ")";
return os;
}
} // namespace runtime
} // namespace tvm
#endif // TVM_RUNTIME_DEVICE_API_H_