blob: d4fe1772b97896fea59b6bfdf1e326dad77253d1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file tensor.cc
* \brief Tensor container infratructure.
*/
#include <tvm/ffi/function.h>
#include <tvm/ffi/reflection/registry.h>
#include <tvm/runtime/base.h>
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/logging.h>
#include <tvm/runtime/tensor.h>
#include "tvm/runtime/data_type.h"
namespace tvm {
namespace runtime {
inline void VerifyDataType(DLDataType dtype) {
TVM_FFI_ICHECK_GE(dtype.lanes, 1);
if (dtype.code == kDLFloat) {
TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
} else {
// allow uint1 as a special flag for bool.
if (dtype.bits == 1 && dtype.code == kDLUInt) return;
// allow int1/uint4/int4
else if (dtype.bits == 1 && dtype.code == kDLInt)
return;
else if (dtype.bits == 4 && dtype.code == kDLUInt)
return;
else if (dtype.bits == 4 && dtype.code == kDLInt)
return;
else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e2m3fn)
return;
else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e3m2fn)
return;
else if (dtype.bits == 4 && dtype.code == DataType::kFloat4_e2m1fn)
return;
else
TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
}
TVM_FFI_ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
}
void TensorCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
size_t arr_size = GetDataSize(*handle);
TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "TensorCopyFromBytes: size mismatch";
TVM_FFI_ICHECK(IsContiguous(*handle))
<< "TensorCopyFromBytes only support contiguous array for now";
DLTensor from;
from.data = const_cast<void*>(data);
from.device = Device{kDLCPU, 0};
from.ndim = handle->ndim;
from.dtype = handle->dtype;
from.shape = handle->shape;
from.strides = nullptr;
from.byte_offset = 0;
DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr);
// Synchronize in case data become unavailable later.
DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr);
}
void Tensor::CopyToBytes(const DLTensor* handle, void* data, size_t nbytes,
TVMStreamHandle stream) {
size_t arr_size = GetDataSize(*handle);
TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
<< "ArrayCopyToBytes only support contiguous array for now";
DLTensor to;
to.data = const_cast<void*>(data);
to.device = Device{kDLCPU, 0};
to.ndim = handle->ndim;
to.dtype = handle->dtype;
to.shape = handle->shape;
to.strides = nullptr;
to.byte_offset = 0;
DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, stream);
// Synchronize in case data become unavailable later.
DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
}
void Tensor::CopyFromBytes(const DLTensor* handle, void* data, size_t nbytes,
TVMStreamHandle stream) {
size_t arr_size = GetDataSize(*handle);
TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
<< "ArrayCopyToBytes only support contiguous array for now";
DLTensor from;
from.data = const_cast<void*>(data);
from.device = Device{kDLCPU, 0};
from.ndim = handle->ndim;
from.dtype = handle->dtype;
from.shape = handle->shape;
from.strides = nullptr;
from.byte_offset = 0;
DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, const_cast<DLTensor*>(handle), stream);
// Synchronize in case data become unavailable later.
DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
}
Tensor Tensor::Empty(ffi::Shape shape, DLDataType dtype, Device dev,
ffi::Optional<ffi::String> mem_scope) {
struct DeviceAPIAlloc {
void AllocData(DLTensor* tensor, ffi::Optional<ffi::String> mem_scope) {
tensor->data = DeviceAPI::Get(tensor->device)
->AllocDataSpace(tensor->device, tensor->ndim, tensor->shape,
tensor->dtype, mem_scope);
}
void FreeData(DLTensor* tensor) {
DeviceAPI::Get(tensor->device)->FreeDataSpace(tensor->device, tensor->data);
}
};
return ffi::Tensor::FromNDAlloc(DeviceAPIAlloc(), shape, dtype, dev, mem_scope);
}
Tensor Tensor::CreateView(ffi::Shape shape, DLDataType dtype, uint64_t relative_byte_offset) const {
TVM_FFI_ICHECK(data_ != nullptr);
const DLTensor& orig = *get_mutable();
TVM_FFI_ICHECK(IsContiguous()) << [&orig]() {
std::stringstream ss;
ss << "Can only create view for compact tensor, but found strides ";
ss << "[";
for (int i = 0; i < orig.ndim; i++) {
if (i) ss << ", ";
ss << orig.strides[i];
}
ss << "]";
ss << ", for shape ";
ss << "[";
for (int i = 0; i < orig.ndim; i++) {
if (i) ss << ", ";
ss << orig.shape[i];
}
ss << "]";
return ss.str();
}();
const auto& curr_dl_tensor = *get_mutable();
size_t curr_size = GetDataSize(curr_dl_tensor);
size_t view_size = ffi::GetDataSize(shape.Product(), dtype);
TVM_FFI_CHECK_LE(relative_byte_offset + view_size, curr_size, ValueError)
<< "View with shape " << shape << " and datatype " << dtype << " would have a size of "
<< view_size << " bytes. "
<< "This would occupy bytes " << relative_byte_offset << " <= i_byte < "
<< (relative_byte_offset + view_size) << " within the backing array. "
<< "However, the Tensor being viewed only contains " << curr_size << " bytes (shape = "
<< ffi::Shape(curr_dl_tensor.shape, curr_dl_tensor.shape + curr_dl_tensor.ndim)
<< ", dtype= " << curr_dl_tensor.dtype << ").";
// helper allocator class that retains ref count of original Tensor
class ViewBasedAlloc {
public:
explicit ViewBasedAlloc(Tensor source) : source_(source) {}
void AllocData(DLTensor* tensor, int64_t byte_offset) {
tensor->data = source_.get_mutable()->data;
tensor->byte_offset = byte_offset;
}
void FreeData(DLTensor* tensor) {}
private:
Tensor source_;
};
Tensor ret = Tensor::FromNDAlloc(ViewBasedAlloc(Tensor(*this)), shape, dtype, (*this)->device,
curr_dl_tensor.byte_offset + relative_byte_offset);
return ret;
}
void Tensor::CopyToBytes(void* data, size_t nbytes) const {
TVM_FFI_ICHECK(data != nullptr);
TVM_FFI_ICHECK(data_ != nullptr);
Tensor::CopyToBytes(get_mutable(), data, nbytes);
}
void Tensor::CopyFromBytes(const void* data, size_t nbytes) {
TVM_FFI_ICHECK(data != nullptr);
TVM_FFI_ICHECK(data_ != nullptr);
TensorCopyFromBytes(get_mutable(), data, nbytes);
}
Tensor Tensor::CopyTo(const Device& dev, ffi::Optional<ffi::String> mem_scope) const {
TVM_FFI_ICHECK(data_ != nullptr);
const DLTensor* dptr = operator->();
Tensor ret =
Empty(ffi::Shape(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev, mem_scope);
this->CopyTo(ret);
Device copy_gpu_dev = dptr->device.device_type != kDLCPU ? dptr->device : dev;
DeviceAPI::Get(copy_gpu_dev)->StreamSync(copy_gpu_dev, nullptr);
return ret;
}
void Tensor::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
size_t from_size = GetDataSize(*from);
size_t to_size = GetDataSize(*to);
TVM_FFI_ICHECK_EQ(from_size, to_size)
<< "TVMTensorCopyFromTo: The size in bytes must exactly match.";
TVM_FFI_ICHECK(from->device.device_type == to->device.device_type ||
from->device.device_type == kDLCPU || to->device.device_type == kDLCPU ||
from->device.device_type == kDLCUDAHost || to->device.device_type == kDLCUDAHost ||
from->device.device_type == kDLROCMHost || to->device.device_type == kDLROCMHost)
<< "Can not copy across different device types directly. From device type: "
<< from->device.device_type << " to device type: " << to->device.device_type;
// Use the device that is *not* a cpu device to get the correct device
// api manager.
Device dev = from->device.device_type != kDLCPU ? from->device : to->device;
DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream);
}
} // namespace runtime
} // namespace tvm
using namespace tvm::runtime;
TVM_FFI_STATIC_INIT_BLOCK() {
namespace refl = tvm::ffi::reflection;
refl::GlobalDef()
.def("runtime.TVMTensorAllocWithScope", Tensor::Empty)
.def_method("runtime.TVMTensorCreateView", &Tensor::CreateView)
.def("runtime.TVMTensorCopyFromBytes",
[](DLTensor* arr, void* data, size_t nbytes) { TensorCopyFromBytes(arr, data, nbytes); })
.def("runtime.TVMTensorCopyToBytes",
[](DLTensor* arr, void* data, size_t nbytes) { Tensor::CopyToBytes(arr, data, nbytes); })
.def("runtime.TVMTensorCopyFromTo",
[](DLTensor* from, DLTensor* to) { Tensor::CopyFromTo(from, to); });
}