| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file tensor.cc |
| * \brief Tensor container infratructure. |
| */ |
| #include <tvm/ffi/function.h> |
| #include <tvm/ffi/reflection/registry.h> |
| #include <tvm/runtime/base.h> |
| #include <tvm/runtime/device_api.h> |
| #include <tvm/runtime/logging.h> |
| #include <tvm/runtime/tensor.h> |
| |
| #include "tvm/runtime/data_type.h" |
| |
| namespace tvm { |
| namespace runtime { |
| |
| inline void VerifyDataType(DLDataType dtype) { |
| TVM_FFI_ICHECK_GE(dtype.lanes, 1); |
| if (dtype.code == kDLFloat) { |
| TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0); |
| } else { |
| // allow uint1 as a special flag for bool. |
| if (dtype.bits == 1 && dtype.code == kDLUInt) return; |
| // allow int1/uint4/int4 |
| else if (dtype.bits == 1 && dtype.code == kDLInt) |
| return; |
| else if (dtype.bits == 4 && dtype.code == kDLUInt) |
| return; |
| else if (dtype.bits == 4 && dtype.code == kDLInt) |
| return; |
| else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e2m3fn) |
| return; |
| else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e3m2fn) |
| return; |
| else if (dtype.bits == 4 && dtype.code == DataType::kFloat4_e2m1fn) |
| return; |
| else |
| TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0); |
| } |
| TVM_FFI_ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0); |
| } |
| |
| void TensorCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) { |
| size_t arr_size = GetDataSize(*handle); |
| TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "TensorCopyFromBytes: size mismatch"; |
| TVM_FFI_ICHECK(IsContiguous(*handle)) |
| << "TensorCopyFromBytes only support contiguous array for now"; |
| |
| DLTensor from; |
| from.data = const_cast<void*>(data); |
| from.device = Device{kDLCPU, 0}; |
| from.ndim = handle->ndim; |
| from.dtype = handle->dtype; |
| from.shape = handle->shape; |
| from.strides = nullptr; |
| from.byte_offset = 0; |
| DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr); |
| // Synchronize in case data become unavailable later. |
| DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr); |
| } |
| |
| void Tensor::CopyToBytes(const DLTensor* handle, void* data, size_t nbytes, |
| TVMStreamHandle stream) { |
| size_t arr_size = GetDataSize(*handle); |
| TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch"; |
| TVM_FFI_ICHECK(ffi::IsContiguous(*handle)) |
| << "ArrayCopyToBytes only support contiguous array for now"; |
| |
| DLTensor to; |
| to.data = const_cast<void*>(data); |
| to.device = Device{kDLCPU, 0}; |
| to.ndim = handle->ndim; |
| to.dtype = handle->dtype; |
| to.shape = handle->shape; |
| to.strides = nullptr; |
| to.byte_offset = 0; |
| |
| DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, stream); |
| // Synchronize in case data become unavailable later. |
| DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream); |
| } |
| |
| void Tensor::CopyFromBytes(const DLTensor* handle, void* data, size_t nbytes, |
| TVMStreamHandle stream) { |
| size_t arr_size = GetDataSize(*handle); |
| TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch"; |
| TVM_FFI_ICHECK(ffi::IsContiguous(*handle)) |
| << "ArrayCopyToBytes only support contiguous array for now"; |
| |
| DLTensor from; |
| from.data = const_cast<void*>(data); |
| from.device = Device{kDLCPU, 0}; |
| from.ndim = handle->ndim; |
| from.dtype = handle->dtype; |
| from.shape = handle->shape; |
| from.strides = nullptr; |
| from.byte_offset = 0; |
| |
| DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, const_cast<DLTensor*>(handle), stream); |
| // Synchronize in case data become unavailable later. |
| DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream); |
| } |
| |
| Tensor Tensor::Empty(ffi::Shape shape, DLDataType dtype, Device dev, |
| ffi::Optional<ffi::String> mem_scope) { |
| struct DeviceAPIAlloc { |
| void AllocData(DLTensor* tensor, ffi::Optional<ffi::String> mem_scope) { |
| tensor->data = DeviceAPI::Get(tensor->device) |
| ->AllocDataSpace(tensor->device, tensor->ndim, tensor->shape, |
| tensor->dtype, mem_scope); |
| } |
| void FreeData(DLTensor* tensor) { |
| DeviceAPI::Get(tensor->device)->FreeDataSpace(tensor->device, tensor->data); |
| } |
| }; |
| return ffi::Tensor::FromNDAlloc(DeviceAPIAlloc(), shape, dtype, dev, mem_scope); |
| } |
| |
| Tensor Tensor::CreateView(ffi::Shape shape, DLDataType dtype, uint64_t relative_byte_offset) const { |
| TVM_FFI_ICHECK(data_ != nullptr); |
| |
| const DLTensor& orig = *get_mutable(); |
| TVM_FFI_ICHECK(IsContiguous()) << [&orig]() { |
| std::stringstream ss; |
| ss << "Can only create view for compact tensor, but found strides "; |
| |
| ss << "["; |
| for (int i = 0; i < orig.ndim; i++) { |
| if (i) ss << ", "; |
| ss << orig.strides[i]; |
| } |
| ss << "]"; |
| |
| ss << ", for shape "; |
| ss << "["; |
| for (int i = 0; i < orig.ndim; i++) { |
| if (i) ss << ", "; |
| ss << orig.shape[i]; |
| } |
| ss << "]"; |
| return ss.str(); |
| }(); |
| const auto& curr_dl_tensor = *get_mutable(); |
| size_t curr_size = GetDataSize(curr_dl_tensor); |
| size_t view_size = ffi::GetDataSize(shape.Product(), dtype); |
| TVM_FFI_CHECK_LE(relative_byte_offset + view_size, curr_size, ValueError) |
| << "View with shape " << shape << " and datatype " << dtype << " would have a size of " |
| << view_size << " bytes. " |
| << "This would occupy bytes " << relative_byte_offset << " <= i_byte < " |
| << (relative_byte_offset + view_size) << " within the backing array. " |
| << "However, the Tensor being viewed only contains " << curr_size << " bytes (shape = " |
| << ffi::Shape(curr_dl_tensor.shape, curr_dl_tensor.shape + curr_dl_tensor.ndim) |
| << ", dtype= " << curr_dl_tensor.dtype << ")."; |
| |
| // helper allocator class that retains ref count of original Tensor |
| class ViewBasedAlloc { |
| public: |
| explicit ViewBasedAlloc(Tensor source) : source_(source) {} |
| void AllocData(DLTensor* tensor, int64_t byte_offset) { |
| tensor->data = source_.get_mutable()->data; |
| tensor->byte_offset = byte_offset; |
| } |
| |
| void FreeData(DLTensor* tensor) {} |
| |
| private: |
| Tensor source_; |
| }; |
| |
| Tensor ret = Tensor::FromNDAlloc(ViewBasedAlloc(Tensor(*this)), shape, dtype, (*this)->device, |
| curr_dl_tensor.byte_offset + relative_byte_offset); |
| return ret; |
| } |
| |
| void Tensor::CopyToBytes(void* data, size_t nbytes) const { |
| TVM_FFI_ICHECK(data != nullptr); |
| TVM_FFI_ICHECK(data_ != nullptr); |
| Tensor::CopyToBytes(get_mutable(), data, nbytes); |
| } |
| |
| void Tensor::CopyFromBytes(const void* data, size_t nbytes) { |
| TVM_FFI_ICHECK(data != nullptr); |
| TVM_FFI_ICHECK(data_ != nullptr); |
| TensorCopyFromBytes(get_mutable(), data, nbytes); |
| } |
| |
| Tensor Tensor::CopyTo(const Device& dev, ffi::Optional<ffi::String> mem_scope) const { |
| TVM_FFI_ICHECK(data_ != nullptr); |
| const DLTensor* dptr = operator->(); |
| Tensor ret = |
| Empty(ffi::Shape(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev, mem_scope); |
| this->CopyTo(ret); |
| Device copy_gpu_dev = dptr->device.device_type != kDLCPU ? dptr->device : dev; |
| DeviceAPI::Get(copy_gpu_dev)->StreamSync(copy_gpu_dev, nullptr); |
| return ret; |
| } |
| |
| void Tensor::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) { |
| size_t from_size = GetDataSize(*from); |
| size_t to_size = GetDataSize(*to); |
| TVM_FFI_ICHECK_EQ(from_size, to_size) |
| << "TVMTensorCopyFromTo: The size in bytes must exactly match."; |
| |
| TVM_FFI_ICHECK(from->device.device_type == to->device.device_type || |
| from->device.device_type == kDLCPU || to->device.device_type == kDLCPU || |
| from->device.device_type == kDLCUDAHost || to->device.device_type == kDLCUDAHost || |
| from->device.device_type == kDLROCMHost || to->device.device_type == kDLROCMHost) |
| << "Can not copy across different device types directly. From device type: " |
| << from->device.device_type << " to device type: " << to->device.device_type; |
| |
| // Use the device that is *not* a cpu device to get the correct device |
| // api manager. |
| Device dev = from->device.device_type != kDLCPU ? from->device : to->device; |
| |
| DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream); |
| } |
| |
| } // namespace runtime |
| } // namespace tvm |
| |
| using namespace tvm::runtime; |
| |
| TVM_FFI_STATIC_INIT_BLOCK() { |
| namespace refl = tvm::ffi::reflection; |
| refl::GlobalDef() |
| .def("runtime.TVMTensorAllocWithScope", Tensor::Empty) |
| .def_method("runtime.TVMTensorCreateView", &Tensor::CreateView) |
| .def("runtime.TVMTensorCopyFromBytes", |
| [](DLTensor* arr, void* data, size_t nbytes) { TensorCopyFromBytes(arr, data, nbytes); }) |
| .def("runtime.TVMTensorCopyToBytes", |
| [](DLTensor* arr, void* data, size_t nbytes) { Tensor::CopyToBytes(arr, data, nbytes); }) |
| .def("runtime.TVMTensorCopyFromTo", |
| [](DLTensor* from, DLTensor* to) { Tensor::CopyFromTo(from, to); }); |
| } |