| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file hexagon_device_api.cc |
| */ |
| |
| #include "hexagon_device_api.h" |
| |
| #include <tvm/ffi/function.h> |
| #include <tvm/ffi/reflection/registry.h> |
| #include <tvm/runtime/logging.h> |
| #include <tvm/runtime/tensor.h> |
| |
| #include <cstdlib> |
| #include <cstring> |
| |
| #include "../workspace_pool.h" |
| #include "hexagon_common.h" |
| |
| namespace tvm { |
| namespace runtime { |
| namespace hexagon { |
| |
| HexagonDeviceAPI* HexagonDeviceAPI::Global() { |
| static auto* inst = new HexagonDeviceAPI(); |
| return inst; |
| } |
| |
| void HexagonDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, ffi::Any* rv) { |
| if (kind == kExist) { |
| *rv = 1; |
| } |
| } |
| |
| // DataSpace: static allocations for Hexagon |
| void* HexagonDeviceAPI::AllocDataSpace(Device dev, int ndim, const int64_t* shape, DLDataType dtype, |
| ffi::Optional<ffi::String> mem_scope) { |
| TVM_FFI_ICHECK(shape || ndim == 0) |
| << "shape array is null for a non-scalar tensor, ndim = " << ndim; |
| TVM_FFI_ICHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; |
| |
| // IMPORTANT NOTE! |
| // Hexagon treats "global" memory scope VERY DIFFERENTLY from all the others. |
| // |
| // With "global": |
| // - As with "global.ddr", this uses the target device's DDR memory. |
| // - The memory allocation must be a single, contiguous region of |
| // (virtual) memory addresses. |
| // - 'ndim' and 'shape' give the dimensions of the tensor to be stored |
| // in this allocation. There's no (practical) limit on the maximum |
| // rank (ndim) of the tensor. |
| // |
| // All other supported memory-scope names: |
| // - 'ndim' must be exactly 1 or 2: |
| // 1: A single, contiguous region of memory is requested. |
| // 2: A two-level memory allocation is required, suitable for storing a tensor |
| // in Hexagon's "indirect tensor" format: |
| // - shape[0] indicates the number of tensor-content memory allocations. |
| // - shape[1] indicates the size of each tensor-content memory allocation. |
| if (!mem_scope.has_value() || mem_scope.value().empty() || mem_scope.value() == "global") { |
| return DeviceAPI::AllocDataSpace(dev, ndim, shape, dtype, mem_scope); |
| } |
| |
| // NOTE: This check should be superfluous, but it's probably a good idea to leave it in |
| // until the AoT executor's multi-device dispatch code is mature. --cconvey 2022-08-26 |
| TVM_FFI_ICHECK(dev.device_type == kDLHexagon) |
| << "dev.device_type: " << dev.device_type << " DeviceName(" << dev.device_type |
| << "): " << DLDeviceType2Str(dev.device_type) << ""; |
| |
| TVM_FFI_ICHECK(ndim >= 0 && ndim <= 2) |
| << "Hexagon Device API supports only 1d and 2d allocations, but received ndim = " << ndim; |
| |
| const size_t typesize = (dtype.bits / 8) * dtype.lanes; |
| |
| TVM_FFI_ICHECK(runtime_hexbuffs) |
| << "Attempted to allocate Hexagon data with " |
| << "HexagonDeviceAPI::AllocDataSpace before initializing resources. " |
| << "Please call HexagonDeviceAPI::AcquireResources"; |
| |
| if (ndim == 0) { |
| // Allocate storage for a single scalar value. |
| return runtime_hexbuffs->AllocateHexagonBuffer(typesize, kHexagonAllocAlignment, mem_scope); |
| } else if (ndim == 1) { |
| // Allocate a single, contiguous memory region. |
| size_t nbytes = shape[0] * typesize; |
| return runtime_hexbuffs->AllocateHexagonBuffer(nbytes, kHexagonAllocAlignment, mem_scope); |
| } else if (ndim == 2) { |
| // Allocate the region(s) needed for Hexagon's indirect-tensor format. |
| size_t nallocs = shape[0]; |
| size_t nbytes = shape[1] * typesize; |
| return runtime_hexbuffs->AllocateHexagonBuffer(nallocs, nbytes, kHexagonAllocAlignment, |
| mem_scope); |
| } else { |
| return nullptr; // unreachable |
| } |
| } |
| |
| void* HexagonDeviceAPI::AllocDataSpace(Device dev, size_t nbytes, size_t alignment, |
| DLDataType type_hint) { |
| TVM_FFI_ICHECK(nbytes) << "number of bytes is zero"; |
| TVM_FFI_ICHECK(alignment) << "alignment is zero"; |
| TVM_FFI_ICHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; |
| if (alignment < kHexagonAllocAlignment) { |
| alignment = kHexagonAllocAlignment; |
| } |
| TVM_FFI_ICHECK(runtime_hexbuffs) |
| << "Attempted to allocate Hexagon data with " |
| << "HexagonDeviceAPI::AllocDataSpace before initializing resources. " |
| << "Please call HexagonDeviceAPI::AcquireResources"; |
| return runtime_hexbuffs->AllocateHexagonBuffer(nbytes, alignment, ffi::String("global")); |
| } |
| |
| void HexagonDeviceAPI::FreeDataSpace(Device dev, void* ptr) { |
| TVM_FFI_ICHECK(ptr) << "buffer pointer is null"; |
| TVM_FFI_ICHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; |
| if (runtime_hexbuffs) { |
| runtime_hexbuffs->FreeHexagonBuffer(ptr); |
| } else { |
| // Either AcquireResources was never called, or ReleaseResources was called. Since this can |
| // occur in the normal course of shutdown, log a message and continue. |
| DLOG(INFO) << "FreeDataSpace called outside a session for " << ptr; |
| } |
| } |
| |
| // WorkSpace: runtime allocations for Hexagon |
| struct HexagonWorkspacePool : public WorkspacePool { |
| HexagonWorkspacePool() |
| : WorkspacePool(static_cast<DLDeviceType>(kDLHexagon), HexagonDeviceAPI::Global()) {} |
| }; |
| |
| static HexagonWorkspacePool* HexagonWorkspacePoolThreadLocal() { |
| static thread_local HexagonWorkspacePool inst; |
| return &inst; |
| } |
| |
| void* HexagonDeviceAPI::AllocWorkspace(Device dev, size_t size, DLDataType type_hint) { |
| TVM_FFI_ICHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; |
| return HexagonWorkspacePoolThreadLocal()->AllocWorkspace(dev, size); |
| } |
| |
| void HexagonDeviceAPI::FreeWorkspace(Device dev, void* data) { |
| TVM_FFI_ICHECK(IsValidDevice(dev)) << "dev.device_type: " << dev.device_type; |
| TVM_FFI_ICHECK(runtime_hexbuffs) << "Attempted to free Hexagon workspace with " |
| << "HexagonDeviceAPI::FreeWorkspace outside of a session. " |
| << "Please call HexagonDeviceAPI::AcquireResources"; |
| TVM_FFI_ICHECK(runtime_hexbuffs->FindHexagonBuffer(data) != nullptr) |
| << "Attempt made to free unknown or already freed workspace allocation"; |
| HexagonWorkspacePoolThreadLocal()->FreeWorkspace(dev, data); |
| } |
| |
| void HexagonDeviceAPI::CopyDataFromTo(DLTensor* from, DLTensor* to, TVMStreamHandle stream) { |
| TVM_FFI_ICHECK_EQ(from->byte_offset, 0); |
| TVM_FFI_ICHECK_EQ(to->byte_offset, 0); |
| TVM_FFI_ICHECK_EQ(GetDataSize(*from), GetDataSize(*to)); |
| TVM_FFI_ICHECK(runtime_hexbuffs) |
| << "Attempted to copy Hexagon data with " |
| << "HexagonDeviceAPI::CopyDataFromTo before initializing resources. " |
| << "Please call HexagonDeviceAPI::AcquireResources"; |
| |
| auto lookup_hexagon_buffer = [this](void* ptr) -> HexagonBuffer* { |
| return runtime_hexbuffs->FindHexagonBuffer(ptr); |
| }; |
| |
| HexagonBuffer* hex_from_buf = lookup_hexagon_buffer(from->data); |
| HexagonBuffer* hex_to_buf = lookup_hexagon_buffer(to->data); |
| |
| if (hex_from_buf && hex_to_buf) { |
| hex_to_buf->CopyFrom(*hex_from_buf, GetDataSize(*from)); |
| } else if (hex_to_buf) { |
| hex_to_buf->CopyFrom(from->data, GetDataSize(*from)); |
| } else if (hex_from_buf) { |
| hex_from_buf->CopyTo(to->data, GetDataSize(*to)); |
| } else { |
| TVM_FFI_ICHECK(false) |
| << "CopyDataFromTo requested between src and dst which are not managed by the " |
| "hexagon device api."; |
| } |
| } |
| |
| void HexagonDeviceAPI::CopyDataFromTo(const void* from, size_t from_offset, void* to, |
| size_t to_offset, size_t size, Device dev_from, Device dev_to, |
| DLDataType type_hint, TVMStreamHandle stream) { |
| memcpy(static_cast<char*>(to) + to_offset, static_cast<const char*>(from) + from_offset, size); |
| } |
| |
| TVM_FFI_STATIC_INIT_BLOCK() { |
| namespace refl = tvm::ffi::reflection; |
| refl::GlobalDef() |
| .def_packed("device_api.hexagon.dma_copy_dltensor", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| auto dst = args[0].cast<DLTensor*>(); |
| auto src = args[1].cast<DLTensor*>(); |
| int size = args[2].cast<int>(); |
| TVM_FFI_ICHECK(size > 0); |
| bool bypass_cache = args[3].cast<bool>(); |
| |
| int ret = DMA_RETRY; |
| do { |
| ret = HexagonDeviceAPI::Global()->UserDMA()->Copy( |
| SYNC_DMA_QUEUE, dst->data, src->data, size, bypass_cache); |
| } while (ret == DMA_RETRY); |
| TVM_FFI_ICHECK(ret == DMA_SUCCESS); |
| HexagonDeviceAPI::Global()->UserDMA()->Wait(SYNC_DMA_QUEUE, 0); |
| |
| *rv = static_cast<int32_t>(0); |
| }) |
| .def_packed("device_api.hexagon.dma_copy", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| uint32_t queue_id = args[0].cast<uint32_t>(); |
| void* dst = args[1].cast<void*>(); |
| void* src = args[2].cast<void*>(); |
| uint32_t size = args[3].cast<uint32_t>(); |
| TVM_FFI_ICHECK(size > 0); |
| bool bypass_cache = args[4].cast<bool>(); |
| |
| int ret = DMA_RETRY; |
| do { |
| ret = HexagonDeviceAPI::Global()->UserDMA()->Copy(queue_id, dst, src, size, |
| bypass_cache); |
| } while (ret == DMA_RETRY); |
| TVM_FFI_ICHECK(ret == DMA_SUCCESS); |
| *rv = static_cast<int32_t>(ret); |
| }) |
| .def_packed("device_api.hexagon.dma_wait", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| uint32_t queue_id = args[0].cast<uint32_t>(); |
| int inflight = args[1].cast<int>(); |
| TVM_FFI_ICHECK(inflight >= 0); |
| HexagonDeviceAPI::Global()->UserDMA()->Wait(queue_id, inflight); |
| *rv = static_cast<int32_t>(0); |
| }) |
| .def_packed("device_api.hexagon.dma_start_group", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| uint32_t queue_id = args[0].cast<uint32_t>(); |
| HexagonDeviceAPI::Global()->UserDMA()->StartGroup(queue_id); |
| *rv = static_cast<int32_t>(0); |
| }) |
| .def_packed("device_api.hexagon.dma_end_group", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| uint32_t queue_id = args[0].cast<uint32_t>(); |
| HexagonDeviceAPI::Global()->UserDMA()->EndGroup(queue_id); |
| *rv = static_cast<int32_t>(0); |
| }) |
| .def_packed("device_api.hexagon.alloc_nd", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| int32_t device_type = args[0].cast<int32_t>(); |
| int32_t device_id = args[1].cast<int32_t>(); |
| int32_t dtype_code_hint = args[2].cast<int32_t>(); |
| int32_t dtype_bits_hint = args[3].cast<int32_t>(); |
| auto scope = args[4].cast<std::string>(); |
| TVM_FFI_ICHECK(scope.find("global.vtcm") != std::string::npos); |
| int64_t ndim = args[5].cast<int64_t>(); |
| TVM_FFI_ICHECK((ndim == 1 || ndim == 2) && |
| "Hexagon Device API supports only 1d and 2d allocations"); |
| int64_t* shape = static_cast<int64_t*>(args[6].cast<void*>()); |
| |
| Device dev; |
| dev.device_type = static_cast<DLDeviceType>(device_type); |
| dev.device_id = device_id; |
| |
| DLDataType type_hint; |
| type_hint.code = static_cast<decltype(type_hint.code)>(dtype_code_hint); |
| type_hint.bits = static_cast<decltype(type_hint.bits)>(dtype_bits_hint); |
| type_hint.lanes = 1; |
| |
| HexagonDeviceAPI* hexapi = HexagonDeviceAPI::Global(); |
| *rv = hexapi->AllocDataSpace(dev, ndim, shape, type_hint, ffi::String(scope)); |
| }) |
| .def_packed("device_api.hexagon.free_nd", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| int32_t device_type = args[0].cast<int32_t>(); |
| int32_t device_id = args[1].cast<int32_t>(); |
| auto scope = args[2].cast<std::string>(); |
| TVM_FFI_ICHECK(scope.find("global.vtcm") != std::string::npos); |
| void* ptr = args[3].cast<void*>(); |
| |
| Device dev; |
| dev.device_type = static_cast<DLDeviceType>(device_type); |
| dev.device_id = device_id; |
| |
| HexagonDeviceAPI* hexapi = HexagonDeviceAPI::Global(); |
| hexapi->FreeDataSpace(dev, ptr); |
| *rv = static_cast<int32_t>(0); |
| }) |
| .def_packed("device_api.hexagon.acquire_resources", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| HexagonDeviceAPI* api = HexagonDeviceAPI::Global(); |
| api->AcquireResources(); |
| }) |
| .def_packed("device_api.hexagon.release_resources", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| HexagonDeviceAPI* api = HexagonDeviceAPI::Global(); |
| api->ReleaseResources(); |
| }) |
| .def_packed("device_api.hexagon.vtcm_device_bytes", |
| [](ffi::PackedArgs args, ffi::Any* rv) { |
| HexagonDeviceAPI* api = HexagonDeviceAPI::Global(); |
| *rv = static_cast<int32_t>(api->VtcmPool()->VtcmDeviceBytes()); |
| }) |
| .def_packed("device_api.hexagon", [](ffi::PackedArgs args, ffi::Any* rv) { |
| DeviceAPI* ptr = HexagonDeviceAPI::Global(); |
| *rv = static_cast<void*>(ptr); |
| }); |
| } |
| |
| } // namespace hexagon |
| } // namespace runtime |
| } // namespace tvm |