src/runtime/tensor.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 /*!
  * \file tensor.cc
  * \brief Tensor container infratructure.
  */
 #include <tvm/ffi/function.h>
 #include <tvm/ffi/reflection/registry.h>
 #include <tvm/runtime/base.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/runtime/tensor.h>

 #include "tvm/runtime/data_type.h"

 namespace tvm {
 namespace runtime {

 inline void VerifyDataType(DLDataType dtype) {
   TVM_FFI_ICHECK_GE(dtype.lanes, 1);
   if (dtype.code == kDLFloat) {
     TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
   } else {
     // allow uint1 as a special flag for bool.
     if (dtype.bits == 1 && dtype.code == kDLUInt) return;
     // allow int1/uint4/int4
     else if (dtype.bits == 1 && dtype.code == kDLInt)
       return;
     else if (dtype.bits == 4 && dtype.code == kDLUInt)
       return;
     else if (dtype.bits == 4 && dtype.code == kDLInt)
       return;
     else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e2m3fn)
       return;
     else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e3m2fn)
       return;
     else if (dtype.bits == 4 && dtype.code == DataType::kFloat4_e2m1fn)
       return;
     else
       TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
   }
   TVM_FFI_ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
 }

 void TensorCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
   size_t arr_size = GetDataSize(*handle);
   TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "TensorCopyFromBytes: size mismatch";
   TVM_FFI_ICHECK(IsContiguous(*handle))
       << "TensorCopyFromBytes only support contiguous array for now";

   DLTensor from;
   from.data = const_cast<void*>(data);
   from.device = Device{kDLCPU, 0};
   from.ndim = handle->ndim;
   from.dtype = handle->dtype;
   from.shape = handle->shape;
   from.strides = nullptr;
   from.byte_offset = 0;
   DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr);
   // Synchronize in case data become unavailable later.
   DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr);
 }

 void Tensor::CopyToBytes(const DLTensor* handle, void* data, size_t nbytes,
                          TVMStreamHandle stream) {
   size_t arr_size = GetDataSize(*handle);
   TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
   TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
       << "ArrayCopyToBytes only support contiguous array for now";

   DLTensor to;
   to.data = const_cast<void*>(data);
   to.device = Device{kDLCPU, 0};
   to.ndim = handle->ndim;
   to.dtype = handle->dtype;
   to.shape = handle->shape;
   to.strides = nullptr;
   to.byte_offset = 0;

   DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, stream);
   // Synchronize in case data become unavailable later.
   DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
 }

 void Tensor::CopyFromBytes(const DLTensor* handle, void* data, size_t nbytes,
                            TVMStreamHandle stream) {
   size_t arr_size = GetDataSize(*handle);
   TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
   TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
       << "ArrayCopyToBytes only support contiguous array for now";

   DLTensor from;
   from.data = const_cast<void*>(data);
   from.device = Device{kDLCPU, 0};
   from.ndim = handle->ndim;
   from.dtype = handle->dtype;
   from.shape = handle->shape;
   from.strides = nullptr;
   from.byte_offset = 0;

   DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, const_cast<DLTensor*>(handle), stream);
   // Synchronize in case data become unavailable later.
   DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
 }

 Tensor Tensor::Empty(ffi::Shape shape, DLDataType dtype, Device dev,
                      ffi::Optional<ffi::String> mem_scope) {
   struct DeviceAPIAlloc {
     void AllocData(DLTensor* tensor, ffi::Optional<ffi::String> mem_scope) {
       tensor->data = DeviceAPI::Get(tensor->device)
                          ->AllocDataSpace(tensor->device, tensor->ndim, tensor->shape,
                                           tensor->dtype, mem_scope);
     }
     void FreeData(DLTensor* tensor) {
       DeviceAPI::Get(tensor->device)->FreeDataSpace(tensor->device, tensor->data);
     }
   };
   return ffi::Tensor::FromNDAlloc(DeviceAPIAlloc(), shape, dtype, dev, mem_scope);
 }

 Tensor Tensor::CreateView(ffi::Shape shape, DLDataType dtype, uint64_t relative_byte_offset) const {
   TVM_FFI_ICHECK(data_ != nullptr);

   const DLTensor& orig = *get_mutable();
   TVM_FFI_ICHECK(IsContiguous()) << [&orig]() {
     std::stringstream ss;
     ss << "Can only create view for compact tensor, but found strides ";

     ss << "[";
     for (int i = 0; i < orig.ndim; i++) {
       if (i) ss << ", ";
       ss << orig.strides[i];
     }
     ss << "]";

     ss << ", for shape ";
     ss << "[";
     for (int i = 0; i < orig.ndim; i++) {
       if (i) ss << ", ";
       ss << orig.shape[i];
     }
     ss << "]";
     return ss.str();
   }();
   const auto& curr_dl_tensor = *get_mutable();
   size_t curr_size = GetDataSize(curr_dl_tensor);
   size_t view_size = ffi::GetDataSize(shape.Product(), dtype);
   TVM_FFI_CHECK_LE(relative_byte_offset + view_size, curr_size, ValueError)
       << "View with shape " << shape << " and datatype " << dtype << " would have a size of "
       << view_size << " bytes.  "
       << "This would occupy bytes " << relative_byte_offset << " <= i_byte < "
       << (relative_byte_offset + view_size) << " within the backing array.  "
       << "However, the Tensor being viewed only contains " << curr_size << " bytes (shape = "
       << ffi::Shape(curr_dl_tensor.shape, curr_dl_tensor.shape + curr_dl_tensor.ndim)
       << ", dtype= " << curr_dl_tensor.dtype << ").";

   // helper allocator class that retains ref count of original Tensor
   class ViewBasedAlloc {
    public:
     explicit ViewBasedAlloc(Tensor source) : source_(source) {}
     void AllocData(DLTensor* tensor, int64_t byte_offset) {
       tensor->data = source_.get_mutable()->data;
       tensor->byte_offset = byte_offset;
     }

     void FreeData(DLTensor* tensor) {}

    private:
     Tensor source_;
   };

   Tensor ret = Tensor::FromNDAlloc(ViewBasedAlloc(Tensor(*this)), shape, dtype, (*this)->device,
                                    curr_dl_tensor.byte_offset + relative_byte_offset);
   return ret;
 }

 void Tensor::CopyToBytes(void* data, size_t nbytes) const {
   TVM_FFI_ICHECK(data != nullptr);
   TVM_FFI_ICHECK(data_ != nullptr);
   Tensor::CopyToBytes(get_mutable(), data, nbytes);
 }

 void Tensor::CopyFromBytes(const void* data, size_t nbytes) {
   TVM_FFI_ICHECK(data != nullptr);
   TVM_FFI_ICHECK(data_ != nullptr);
   TensorCopyFromBytes(get_mutable(), data, nbytes);
 }

 Tensor Tensor::CopyTo(const Device& dev, ffi::Optional<ffi::String> mem_scope) const {
   TVM_FFI_ICHECK(data_ != nullptr);
   const DLTensor* dptr = operator->();
   Tensor ret =
       Empty(ffi::Shape(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev, mem_scope);
   this->CopyTo(ret);
   Device copy_gpu_dev = dptr->device.device_type != kDLCPU ? dptr->device : dev;
   DeviceAPI::Get(copy_gpu_dev)->StreamSync(copy_gpu_dev, nullptr);
   return ret;
 }

 void Tensor::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
   size_t from_size = GetDataSize(*from);
   size_t to_size = GetDataSize(*to);
   TVM_FFI_ICHECK_EQ(from_size, to_size)
       << "TVMTensorCopyFromTo: The size in bytes must exactly match.";

   TVM_FFI_ICHECK(from->device.device_type == to->device.device_type ||
                  from->device.device_type == kDLCPU || to->device.device_type == kDLCPU ||
                  from->device.device_type == kDLCUDAHost || to->device.device_type == kDLCUDAHost ||
                  from->device.device_type == kDLROCMHost || to->device.device_type == kDLROCMHost)
       << "Can not copy across different device types directly. From device type: "
       << from->device.device_type << " to device type: " << to->device.device_type;

   // Use the device that is *not* a cpu device to get the correct device
   // api manager.
   Device dev = from->device.device_type != kDLCPU ? from->device : to->device;

   DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream);
 }

 }  // namespace runtime
 }  // namespace tvm

 using namespace tvm::runtime;

 TVM_FFI_STATIC_INIT_BLOCK() {
   namespace refl = tvm::ffi::reflection;
   refl::GlobalDef()
       .def("runtime.TVMTensorAllocWithScope", Tensor::Empty)
       .def_method("runtime.TVMTensorCreateView", &Tensor::CreateView)
       .def("runtime.TVMTensorCopyFromBytes",
            [](DLTensor* arr, void* data, size_t nbytes) { TensorCopyFromBytes(arr, data, nbytes); })
       .def("runtime.TVMTensorCopyToBytes",
            [](DLTensor* arr, void* data, size_t nbytes) { Tensor::CopyToBytes(arr, data, nbytes); })
       .def("runtime.TVMTensorCopyFromTo",
            [](DLTensor* from, DLTensor* to) { Tensor::CopyFromTo(from, to); });
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	/*!
	* \file tensor.cc
	* \brief Tensor container infratructure.
	*/
	#include <tvm/ffi/function.h>
	#include <tvm/ffi/reflection/registry.h>
	#include <tvm/runtime/base.h>
	#include <tvm/runtime/device_api.h>
	#include <tvm/runtime/logging.h>
	#include <tvm/runtime/tensor.h>

	#include "tvm/runtime/data_type.h"

	namespace tvm {
	namespace runtime {

	inline void VerifyDataType(DLDataType dtype) {
	TVM_FFI_ICHECK_GE(dtype.lanes, 1);
	if (dtype.code == kDLFloat) {
	TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
	} else {
	// allow uint1 as a special flag for bool.
	if (dtype.bits == 1 && dtype.code == kDLUInt) return;
	// allow int1/uint4/int4
	else if (dtype.bits == 1 && dtype.code == kDLInt)
	return;
	else if (dtype.bits == 4 && dtype.code == kDLUInt)
	return;
	else if (dtype.bits == 4 && dtype.code == kDLInt)
	return;
	else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e2m3fn)
	return;
	else if (dtype.bits == 6 && dtype.code == DataType::kFloat6_e3m2fn)
	return;
	else if (dtype.bits == 4 && dtype.code == DataType::kFloat4_e2m1fn)
	return;
	else
	TVM_FFI_ICHECK_EQ(dtype.bits % 8, 0);
	}
	TVM_FFI_ICHECK_EQ(dtype.bits & (dtype.bits - 1), 0);
	}

	void TensorCopyFromBytes(DLTensor* handle, const void* data, size_t nbytes) {
	size_t arr_size = GetDataSize(*handle);
	TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "TensorCopyFromBytes: size mismatch";
	TVM_FFI_ICHECK(IsContiguous(*handle))
	<< "TensorCopyFromBytes only support contiguous array for now";

	DLTensor from;
	from.data = const_cast<void*>(data);
	from.device = Device{kDLCPU, 0};
	from.ndim = handle->ndim;
	from.dtype = handle->dtype;
	from.shape = handle->shape;
	from.strides = nullptr;
	from.byte_offset = 0;
	DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, handle, nullptr);
	// Synchronize in case data become unavailable later.
	DeviceAPI::Get(handle->device)->StreamSync(handle->device, nullptr);
	}

	void Tensor::CopyToBytes(const DLTensor* handle, void* data, size_t nbytes,
	TVMStreamHandle stream) {
	size_t arr_size = GetDataSize(*handle);
	TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
	TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
	<< "ArrayCopyToBytes only support contiguous array for now";

	DLTensor to;
	to.data = const_cast<void*>(data);
	to.device = Device{kDLCPU, 0};
	to.ndim = handle->ndim;
	to.dtype = handle->dtype;
	to.shape = handle->shape;
	to.strides = nullptr;
	to.byte_offset = 0;

	DeviceAPI::Get(handle->device)->CopyDataFromTo(const_cast<DLTensor*>(handle), &to, stream);
	// Synchronize in case data become unavailable later.
	DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
	}

	void Tensor::CopyFromBytes(const DLTensor* handle, void* data, size_t nbytes,
	TVMStreamHandle stream) {
	size_t arr_size = GetDataSize(*handle);
	TVM_FFI_ICHECK_EQ(arr_size, nbytes) << "ArrayCopyToBytes: size mismatch";
	TVM_FFI_ICHECK(ffi::IsContiguous(*handle))
	<< "ArrayCopyToBytes only support contiguous array for now";

	DLTensor from;
	from.data = const_cast<void*>(data);
	from.device = Device{kDLCPU, 0};
	from.ndim = handle->ndim;
	from.dtype = handle->dtype;
	from.shape = handle->shape;
	from.strides = nullptr;
	from.byte_offset = 0;

	DeviceAPI::Get(handle->device)->CopyDataFromTo(&from, const_cast<DLTensor*>(handle), stream);
	// Synchronize in case data become unavailable later.
	DeviceAPI::Get(handle->device)->StreamSync(handle->device, stream);
	}

	Tensor Tensor::Empty(ffi::Shape shape, DLDataType dtype, Device dev,
	ffi::Optional<ffi::String> mem_scope) {
	struct DeviceAPIAlloc {
	void AllocData(DLTensor* tensor, ffi::Optional<ffi::String> mem_scope) {
	tensor->data = DeviceAPI::Get(tensor->device)
	->AllocDataSpace(tensor->device, tensor->ndim, tensor->shape,
	tensor->dtype, mem_scope);
	}
	void FreeData(DLTensor* tensor) {
	DeviceAPI::Get(tensor->device)->FreeDataSpace(tensor->device, tensor->data);
	}
	};
	return ffi::Tensor::FromNDAlloc(DeviceAPIAlloc(), shape, dtype, dev, mem_scope);
	}

	Tensor Tensor::CreateView(ffi::Shape shape, DLDataType dtype, uint64_t relative_byte_offset) const {
	TVM_FFI_ICHECK(data_ != nullptr);

	const DLTensor& orig = *get_mutable();
	TVM_FFI_ICHECK(IsContiguous()) << [&orig]() {
	std::stringstream ss;
	ss << "Can only create view for compact tensor, but found strides ";

	ss << "[";
	for (int i = 0; i < orig.ndim; i++) {
	if (i) ss << ", ";
	ss << orig.strides[i];
	}
	ss << "]";

	ss << ", for shape ";
	ss << "[";
	for (int i = 0; i < orig.ndim; i++) {
	if (i) ss << ", ";
	ss << orig.shape[i];
	}
	ss << "]";
	return ss.str();
	}();
	const auto& curr_dl_tensor = *get_mutable();
	size_t curr_size = GetDataSize(curr_dl_tensor);
	size_t view_size = ffi::GetDataSize(shape.Product(), dtype);
	TVM_FFI_CHECK_LE(relative_byte_offset + view_size, curr_size, ValueError)
	<< "View with shape " << shape << " and datatype " << dtype << " would have a size of "
	<< view_size << " bytes. "
	<< "This would occupy bytes " << relative_byte_offset << " <= i_byte < "
	<< (relative_byte_offset + view_size) << " within the backing array. "
	<< "However, the Tensor being viewed only contains " << curr_size << " bytes (shape = "
	<< ffi::Shape(curr_dl_tensor.shape, curr_dl_tensor.shape + curr_dl_tensor.ndim)
	<< ", dtype= " << curr_dl_tensor.dtype << ").";

	// helper allocator class that retains ref count of original Tensor
	class ViewBasedAlloc {
	public:
	explicit ViewBasedAlloc(Tensor source) : source_(source) {}
	void AllocData(DLTensor* tensor, int64_t byte_offset) {
	tensor->data = source_.get_mutable()->data;
	tensor->byte_offset = byte_offset;
	}

	void FreeData(DLTensor* tensor) {}

	private:
	Tensor source_;
	};

	Tensor ret = Tensor::FromNDAlloc(ViewBasedAlloc(Tensor(this)), shape, dtype, (this)->device,
	curr_dl_tensor.byte_offset + relative_byte_offset);
	return ret;
	}

	void Tensor::CopyToBytes(void* data, size_t nbytes) const {
	TVM_FFI_ICHECK(data != nullptr);
	TVM_FFI_ICHECK(data_ != nullptr);
	Tensor::CopyToBytes(get_mutable(), data, nbytes);
	}

	void Tensor::CopyFromBytes(const void* data, size_t nbytes) {
	TVM_FFI_ICHECK(data != nullptr);
	TVM_FFI_ICHECK(data_ != nullptr);
	TensorCopyFromBytes(get_mutable(), data, nbytes);
	}

	Tensor Tensor::CopyTo(const Device& dev, ffi::Optional<ffi::String> mem_scope) const {
	TVM_FFI_ICHECK(data_ != nullptr);
	const DLTensor* dptr = operator->();
	Tensor ret =
	Empty(ffi::Shape(dptr->shape, dptr->shape + dptr->ndim), dptr->dtype, dev, mem_scope);
	this->CopyTo(ret);
	Device copy_gpu_dev = dptr->device.device_type != kDLCPU ? dptr->device : dev;
	DeviceAPI::Get(copy_gpu_dev)->StreamSync(copy_gpu_dev, nullptr);
	return ret;
	}

	void Tensor::CopyFromTo(const DLTensor* from, DLTensor* to, TVMStreamHandle stream) {
	size_t from_size = GetDataSize(*from);
	size_t to_size = GetDataSize(*to);
	TVM_FFI_ICHECK_EQ(from_size, to_size)
	<< "TVMTensorCopyFromTo: The size in bytes must exactly match.";

	TVM_FFI_ICHECK(from->device.device_type == to->device.device_type \|\|
	from->device.device_type == kDLCPU \|\| to->device.device_type == kDLCPU \|\|
	from->device.device_type == kDLCUDAHost \|\| to->device.device_type == kDLCUDAHost \|\|
	from->device.device_type == kDLROCMHost \|\| to->device.device_type == kDLROCMHost)
	<< "Can not copy across different device types directly. From device type: "
	<< from->device.device_type << " to device type: " << to->device.device_type;

	// Use the device that is not a cpu device to get the correct device
	// api manager.
	Device dev = from->device.device_type != kDLCPU ? from->device : to->device;

	DeviceAPI::Get(dev)->CopyDataFromTo(const_cast<DLTensor*>(from), to, stream);
	}

	} // namespace runtime
	} // namespace tvm

	using namespace tvm::runtime;

	TVM_FFI_STATIC_INIT_BLOCK() {
	namespace refl = tvm::ffi::reflection;
	refl::GlobalDef()
	.def("runtime.TVMTensorAllocWithScope", Tensor::Empty)
	.def_method("runtime.TVMTensorCreateView", &Tensor::CreateView)
	.def("runtime.TVMTensorCopyFromBytes",
	[](DLTensor* arr, void* data, size_t nbytes) { TensorCopyFromBytes(arr, data, nbytes); })
	.def("runtime.TVMTensorCopyToBytes",
	[](DLTensor* arr, void* data, size_t nbytes) { Tensor::CopyToBytes(arr, data, nbytes); })
	.def("runtime.TVMTensorCopyFromTo",
	[](DLTensor* from, DLTensor* to) { Tensor::CopyFromTo(from, to); });
	}