include/tvm/runtime/device_api.h - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file tvm/runtime/device_api.h
  * \brief Abstract device memory management API
  */
 #ifndef TVM_RUNTIME_DEVICE_API_H_
 #define TVM_RUNTIME_DEVICE_API_H_

 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/packed_func.h>

 #include <string>

 namespace tvm {
 namespace runtime {
 /*!
  * \brief the query type into GetAttr
  */
 enum DeviceAttrKind : int {
   kExist = 0,
   kMaxThreadsPerBlock = 1,
   kWarpSize = 2,
   kMaxSharedMemoryPerBlock = 3,
   kComputeVersion = 4,
   kDeviceName = 5,
   kMaxClockRate = 6,
   kMultiProcessorCount = 7,
   kMaxThreadDimensions = 8,
   kMaxRegistersPerBlock = 9,
   kGcnArch = 10,
   kApiVersion = 11
 };

 /*! \brief Number of bytes each allocation must align to */
 constexpr int kAllocAlignment = 128;

 /*! \brief Number of bytes each allocation must align to in temporary allocation */
 constexpr int kTempAllocaAlignment = 128;

 /*! \brief Maximum size that can be allocated on stack */
 constexpr int kMaxStackAlloca = 1024;

 /*!
  *  \brief TVM Runtime Device API, abstracts the device
  *  specific interface for memory management.
  */
 class TVM_DLL DeviceAPI {
  public:
   /*! \brief virtual destructor */
   virtual ~DeviceAPI() {}
   /*!
    * \brief Set the environment device id to ctx
    * \param ctx The context to be set.
    */
   virtual void SetDevice(TVMContext ctx) = 0;
   /*!
    * \brief Get attribute of specified device.
    * \param ctx The device context
    * \param kind The result kind
    * \param rv The return value.
    * \sa DeviceAttrKind
    */
   virtual void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) = 0;
   /*!
    * \brief Allocate a data space on device.
    * \param ctx The device context to perform operation.
    * \param nbytes The number of bytes in memory.
    * \param alignment The alignment of the memory.
    * \param type_hint The type of elements. Only needed by certain backends such
    * as OpenGL, as nbytes & alignment are sufficient for most backends.
    * \return The allocated device pointer.
    */
   virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
                                DLDataType type_hint) = 0;
   /*!
    * \brief Free a data space on device.
    * \param ctx The device context to perform operation.
    * \param ptr The data space.
    */
   virtual void FreeDataSpace(TVMContext ctx, void* ptr) = 0;
   /*!
    * \brief copy data from one place to another
    * \param from The source array.
    * \param from_offset The byte offeset in the from.
    * \param to The target array.
    * \param to_offset The byte offset in the to.
    * \param num_bytes The size of the memory in bytes
    * \param ctx_from The source context
    * \param ctx_to The target context
    * \param type_hint The type of elements, only neded by certain backends.
    *                  can be useful for cross device endian converison.
    * \param stream Optional stream object.
    */
   virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
                               size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
                               DLDataType type_hint, TVMStreamHandle stream) = 0;
   /*!
    * \brief Create a new stream of execution.
    *
    * \param ctx The context of allocation.
    */
   virtual TVMStreamHandle CreateStream(TVMContext ctx);

   /*!
    * \brief Free a stream of execution
    *
    * \param ctx The context of the stream
    * \param stream The pointer to be freed.
    */
   virtual void FreeStream(TVMContext ctx, TVMStreamHandle stream);

   /*!
    * \brief Synchronize the stream
    * \param ctx The context to perform operation.
    * \param stream The stream to be sync.
    */
   virtual void StreamSync(TVMContext ctx, TVMStreamHandle stream) = 0;
   /*!
    * \brief Set the stream
    * \param ctx The context to set stream.
    * \param stream The stream to be set.
    */
   virtual void SetStream(TVMContext ctx, TVMStreamHandle stream) {}
   /*!
    * \brief Synchronize 2 streams of execution.
    *
    * An event is created in event_src stream that the second then
    * stream waits on.  Neither event_src or event_dst need to be of
    * the same device ID as the context, but they must be of the same
    * device type.
    *
    * \param ctx The context of the streams.
    * \param event_src The source stream to synchronize.
    * \param event_dst The destination stream to synchronize.
    */
   virtual void SyncStreamFromTo(TVMContext ctx, TVMStreamHandle event_src,
                                 TVMStreamHandle event_dst);
   /*!
    * \brief Allocate temporal workspace for backend execution.
    *
    *  \note We have the following assumption about backend temporal
    *   workspace allocation, and backend will optimize for such assumption:
    *
    *  - Only a few allocation will happen, and space will be released after use.
    *  - The release order is usually in reverse order of allocate (stack style).
    *  - Repeative pattern of same allocations over different runs.
    *  - Workspace should not overlap between different threads(i.e. be threadlocal)
    *
    * \param ctx The context of allocation.
    * \param nbytes The size to be allocated.
    * \param type_hint The type of elements. Only needed by certain backends such
    * as OpenGL, as nbytes is sufficient for most backends.
    */
   virtual void* AllocWorkspace(TVMContext ctx, size_t nbytes, DLDataType type_hint = {});
   /*!
    * \brief Free temporal workspace in backend execution.
    *
    * \param ctx The context of allocation.
    * \param ptr The pointer to be freed.
    */
   virtual void FreeWorkspace(TVMContext ctx, void* ptr);

   /*!
    * \brief Get device API based on context.
    * \param ctx The context
    * \param allow_missing Whether allow missing
    * \return The corresponding device API.
    */
   static DeviceAPI* Get(TVMContext ctx, bool allow_missing = false);

   /*!
    * \brief Whether a certian device type requires set device context
    *        before launching the kernel function.
    * \param device_type The device type.
    */
   static bool NeedSetDeviceContext(int device_type) {
     return device_type != kDLCPU && device_type != kDLMicroDev;
   }
 };

 /*! \brief The device type bigger than this is RPC device */
 constexpr int kRPCSessMask = 128;

 /*!
  * \brief The name of Device API factory.
  * \param type The device type.
  * \return the device name.
  */
 inline const char* DeviceName(int type) {
   switch (type) {
     case kDLCPU:
       return "cpu";
     case kDLGPU:
       return "gpu";
     case kDLCPUPinned:
       return "cpu_pinned";
     case kDLOpenCL:
       return "opencl";
     case kDLSDAccel:
       return "sdaccel";
     case kDLAOCL:
       return "aocl";
     case kDLVulkan:
       return "vulkan";
     case kDLMetal:
       return "metal";
     case kDLVPI:
       return "vpi";
     case kDLROCM:
       return "rocm";
     case kDLExtDev:
       return "ext_dev";
     case kDLWebGPU:
       return "webgpu";
     case kDLMicroDev:
       return "micro_dev";
     case kDLHexagon:
       return "hexagon";
     default:
       LOG(FATAL) << "unknown type =" << type;
       return "Unknown";
   }
 }

 inline std::ostream& operator<<(std::ostream& os, DLContext ctx) {  // NOLINT(*)
   int device_type = static_cast<int>(ctx.device_type);
   if (device_type > kRPCSessMask) {
     os << "remote[" << (device_type / kRPCSessMask) << "]-";
     device_type = device_type % kRPCSessMask;
   }
   os << runtime::DeviceName(device_type) << "(" << ctx.device_id << ")";
   return os;
 }
 }  // namespace runtime
 }  // namespace tvm
 #endif  // TVM_RUNTIME_DEVICE_API_H_
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file tvm/runtime/device_api.h
	* \brief Abstract device memory management API
	*/
	#ifndef TVM_RUNTIME_DEVICE_API_H_
	#define TVM_RUNTIME_DEVICE_API_H_

	#include <tvm/runtime/c_runtime_api.h>
	#include <tvm/runtime/packed_func.h>

	#include <string>

	namespace tvm {
	namespace runtime {
	/*!
	* \brief the query type into GetAttr
	*/
	enum DeviceAttrKind : int {
	kExist = 0,
	kMaxThreadsPerBlock = 1,
	kWarpSize = 2,
	kMaxSharedMemoryPerBlock = 3,
	kComputeVersion = 4,
	kDeviceName = 5,
	kMaxClockRate = 6,
	kMultiProcessorCount = 7,
	kMaxThreadDimensions = 8,
	kMaxRegistersPerBlock = 9,
	kGcnArch = 10,
	kApiVersion = 11
	};

	/! \brief Number of bytes each allocation must align to /
	constexpr int kAllocAlignment = 128;

	/! \brief Number of bytes each allocation must align to in temporary allocation /
	constexpr int kTempAllocaAlignment = 128;

	/! \brief Maximum size that can be allocated on stack /
	constexpr int kMaxStackAlloca = 1024;

	/*!
	* \brief TVM Runtime Device API, abstracts the device
	* specific interface for memory management.
	*/
	class TVM_DLL DeviceAPI {
	public:
	/! \brief virtual destructor /
	virtual ~DeviceAPI() {}
	/*!
	* \brief Set the environment device id to ctx
	* \param ctx The context to be set.
	*/
	virtual void SetDevice(TVMContext ctx) = 0;
	/*!
	* \brief Get attribute of specified device.
	* \param ctx The device context
	* \param kind The result kind
	* \param rv The return value.
	* \sa DeviceAttrKind
	*/
	virtual void GetAttr(TVMContext ctx, DeviceAttrKind kind, TVMRetValue* rv) = 0;
	/*!
	* \brief Allocate a data space on device.
	* \param ctx The device context to perform operation.
	* \param nbytes The number of bytes in memory.
	* \param alignment The alignment of the memory.
	* \param type_hint The type of elements. Only needed by certain backends such
	* as OpenGL, as nbytes & alignment are sufficient for most backends.
	* \return The allocated device pointer.
	*/
	virtual void* AllocDataSpace(TVMContext ctx, size_t nbytes, size_t alignment,
	DLDataType type_hint) = 0;
	/*!
	* \brief Free a data space on device.
	* \param ctx The device context to perform operation.
	* \param ptr The data space.
	*/
	virtual void FreeDataSpace(TVMContext ctx, void* ptr) = 0;
	/*!
	* \brief copy data from one place to another
	* \param from The source array.
	* \param from_offset The byte offeset in the from.
	* \param to The target array.
	* \param to_offset The byte offset in the to.
	* \param num_bytes The size of the memory in bytes
	* \param ctx_from The source context
	* \param ctx_to The target context
	* \param type_hint The type of elements, only neded by certain backends.
	* can be useful for cross device endian converison.
	* \param stream Optional stream object.
	*/
	virtual void CopyDataFromTo(const void* from, size_t from_offset, void* to, size_t to_offset,
	size_t num_bytes, TVMContext ctx_from, TVMContext ctx_to,
	DLDataType type_hint, TVMStreamHandle stream) = 0;
	/*!
	* \brief Create a new stream of execution.
	*
	* \param ctx The context of allocation.
	*/
	virtual TVMStreamHandle CreateStream(TVMContext ctx);

	/*!
	* \brief Free a stream of execution
	*
	* \param ctx The context of the stream
	* \param stream The pointer to be freed.
	*/
	virtual void FreeStream(TVMContext ctx, TVMStreamHandle stream);

	/*!
	* \brief Synchronize the stream
	* \param ctx The context to perform operation.
	* \param stream The stream to be sync.
	*/
	virtual void StreamSync(TVMContext ctx, TVMStreamHandle stream) = 0;
	/*!
	* \brief Set the stream
	* \param ctx The context to set stream.
	* \param stream The stream to be set.
	*/
	virtual void SetStream(TVMContext ctx, TVMStreamHandle stream) {}
	/*!
	* \brief Synchronize 2 streams of execution.
	*
	* An event is created in event_src stream that the second then
	* stream waits on. Neither event_src or event_dst need to be of
	* the same device ID as the context, but they must be of the same
	* device type.
	*
	* \param ctx The context of the streams.
	* \param event_src The source stream to synchronize.
	* \param event_dst The destination stream to synchronize.
	*/
	virtual void SyncStreamFromTo(TVMContext ctx, TVMStreamHandle event_src,
	TVMStreamHandle event_dst);
	/*!
	* \brief Allocate temporal workspace for backend execution.
	*
	* \note We have the following assumption about backend temporal
	* workspace allocation, and backend will optimize for such assumption:
	*
	* - Only a few allocation will happen, and space will be released after use.
	* - The release order is usually in reverse order of allocate (stack style).
	* - Repeative pattern of same allocations over different runs.
	* - Workspace should not overlap between different threads(i.e. be threadlocal)
	*
	* \param ctx The context of allocation.
	* \param nbytes The size to be allocated.
	* \param type_hint The type of elements. Only needed by certain backends such
	* as OpenGL, as nbytes is sufficient for most backends.
	*/
	virtual void* AllocWorkspace(TVMContext ctx, size_t nbytes, DLDataType type_hint = {});
	/*!
	* \brief Free temporal workspace in backend execution.
	*
	* \param ctx The context of allocation.
	* \param ptr The pointer to be freed.
	*/
	virtual void FreeWorkspace(TVMContext ctx, void* ptr);

	/*!
	* \brief Get device API based on context.
	* \param ctx The context
	* \param allow_missing Whether allow missing
	* \return The corresponding device API.
	*/
	static DeviceAPI* Get(TVMContext ctx, bool allow_missing = false);

	/*!
	* \brief Whether a certian device type requires set device context
	* before launching the kernel function.
	* \param device_type The device type.
	*/
	static bool NeedSetDeviceContext(int device_type) {
	return device_type != kDLCPU && device_type != kDLMicroDev;
	}
	};

	/! \brief The device type bigger than this is RPC device /
	constexpr int kRPCSessMask = 128;

	/*!
	* \brief The name of Device API factory.
	* \param type The device type.
	* \return the device name.
	*/
	inline const char* DeviceName(int type) {
	switch (type) {
	case kDLCPU:
	return "cpu";
	case kDLGPU:
	return "gpu";
	case kDLCPUPinned:
	return "cpu_pinned";
	case kDLOpenCL:
	return "opencl";
	case kDLSDAccel:
	return "sdaccel";
	case kDLAOCL:
	return "aocl";
	case kDLVulkan:
	return "vulkan";
	case kDLMetal:
	return "metal";
	case kDLVPI:
	return "vpi";
	case kDLROCM:
	return "rocm";
	case kDLExtDev:
	return "ext_dev";
	case kDLWebGPU:
	return "webgpu";
	case kDLMicroDev:
	return "micro_dev";
	case kDLHexagon:
	return "hexagon";
	default:
	LOG(FATAL) << "unknown type =" << type;
	return "Unknown";
	}
	}

	inline std::ostream& operator<<(std::ostream& os, DLContext ctx) { // NOLINT(*)
	int device_type = static_cast<int>(ctx.device_type);
	if (device_type > kRPCSessMask) {
	os << "remote[" << (device_type / kRPCSessMask) << "]-";
	device_type = device_type % kRPCSessMask;
	}
	os << runtime::DeviceName(device_type) << "(" << ctx.device_id << ")";
	return os;
	}
	} // namespace runtime
	} // namespace tvm
	#endif // TVM_RUNTIME_DEVICE_API_H_