examples/quickstart/load/load_cuda.cc - tvm-ffi - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 // [main.begin]
 // File: load/load_cuda.cc
 #include <tvm/ffi/extra/module.h>
 #include <tvm/ffi/tvm_ffi.h>

 namespace {
 namespace ffi = tvm::ffi;
 /*!
  * \brief Main logics of library loading and function calling with CUDA tensors.
  * \param x The input tensor on CUDA device.
  * \param y The output tensor on CUDA device.
  */
 void Run(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
   // Load shared library `build/add_one_cuda.so`
   ffi::Module mod = ffi::Module::LoadFromFile("build/add_one_cuda.so");
   // Look up `add_one_cuda` function
   ffi::Function add_one_cuda = mod->GetFunction("add_one_cuda").value();
   // Call the function with CUDA tensors
   add_one_cuda(x, y);
 }
 }  // namespace
 // [main.end]
 /************* Auxiliary Logics *************/
 // [aux.begin]
 #include <cuda_runtime.h>
 #include <tvm/ffi/error.h>

 #include <iostream>
 #include <vector>

 struct CUDANDAlloc {
   void AllocData(DLTensor* tensor) {
     size_t data_size = ffi::GetDataSize(*tensor);
     void* ptr = nullptr;
     cudaError_t err = cudaMalloc(&ptr, data_size);
     TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
     tensor->data = ptr;
   }

   void FreeData(DLTensor* tensor) {
     if (tensor->data != nullptr) {
       cudaError_t err = cudaFree(tensor->data);
       TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
       tensor->data = nullptr;
     }
   }
 };

 /*!
  * \brief Allocate a CUDA tensor with the given shape and data type.
  * \param shape The shape of the tensor.
  * \param dtype The data type of the tensor.
  * \param device The CUDA device.
  * \return The allocated CUDA tensor.
  */
 inline ffi::Tensor Empty(ffi::Shape shape, DLDataType dtype, DLDevice device) {
   return ffi::Tensor::FromNDAlloc(CUDANDAlloc(), shape, dtype, device);
 }

 int main() {
   DLDataType f32_dtype{kDLFloat, 32, 1};
   DLDevice cuda_device{kDLCUDA, 0};

   constexpr int ARRAY_SIZE = 5;

   ffi::Tensor x = Empty({ARRAY_SIZE}, f32_dtype, cuda_device);
   ffi::Tensor y = Empty({ARRAY_SIZE}, f32_dtype, cuda_device);

   std::vector<float> host_x(ARRAY_SIZE);
   for (int i = 0; i < ARRAY_SIZE; ++i) {
     host_x[i] = static_cast<float>(i + 1);
   }

   size_t nbytes = host_x.size() * sizeof(float);
   cudaError_t err = cudaMemcpy(x.data_ptr(), host_x.data(), nbytes, cudaMemcpyHostToDevice);
   TVM_FFI_ICHECK_EQ(err, cudaSuccess)
       << "cudaMemcpy host to device failed: " << cudaGetErrorString(err);

   Run(x, y);

   std::vector<float> host_y(host_x.size());
   err = cudaMemcpy(host_y.data(), y.data_ptr(), nbytes, cudaMemcpyDeviceToHost);
   TVM_FFI_ICHECK_EQ(err, cudaSuccess)
       << "cudaMemcpy device to host failed: " << cudaGetErrorString(err);

   std::cout << "[ ";
   for (float value : host_y) {
     std::cout << value << " ";
   }
   std::cout << "]" << std::endl;

   return 0;
 }
 // [aux.end]
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	// [main.begin]
	// File: load/load_cuda.cc
	#include <tvm/ffi/extra/module.h>
	#include <tvm/ffi/tvm_ffi.h>

	namespace {
	namespace ffi = tvm::ffi;
	/*!
	* \brief Main logics of library loading and function calling with CUDA tensors.
	* \param x The input tensor on CUDA device.
	* \param y The output tensor on CUDA device.
	*/
	void Run(tvm::ffi::TensorView x, tvm::ffi::TensorView y) {
	// Load shared library `build/add_one_cuda.so`
	ffi::Module mod = ffi::Module::LoadFromFile("build/add_one_cuda.so");
	// Look up `add_one_cuda` function
	ffi::Function add_one_cuda = mod->GetFunction("add_one_cuda").value();
	// Call the function with CUDA tensors
	add_one_cuda(x, y);
	}
	} // namespace
	// [main.end]
	/*********** Auxiliary Logics ***********/
	// [aux.begin]
	#include <cuda_runtime.h>
	#include <tvm/ffi/error.h>

	#include <iostream>
	#include <vector>

	struct CUDANDAlloc {
	void AllocData(DLTensor* tensor) {
	size_t data_size = ffi::GetDataSize(*tensor);
	void* ptr = nullptr;
	cudaError_t err = cudaMalloc(&ptr, data_size);
	TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
	tensor->data = ptr;
	}

	void FreeData(DLTensor* tensor) {
	if (tensor->data != nullptr) {
	cudaError_t err = cudaFree(tensor->data);
	TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
	tensor->data = nullptr;
	}
	}
	};

	/*!
	* \brief Allocate a CUDA tensor with the given shape and data type.
	* \param shape The shape of the tensor.
	* \param dtype The data type of the tensor.
	* \param device The CUDA device.
	* \return The allocated CUDA tensor.
	*/
	inline ffi::Tensor Empty(ffi::Shape shape, DLDataType dtype, DLDevice device) {
	return ffi::Tensor::FromNDAlloc(CUDANDAlloc(), shape, dtype, device);
	}

	int main() {
	DLDataType f32_dtype{kDLFloat, 32, 1};
	DLDevice cuda_device{kDLCUDA, 0};

	constexpr int ARRAY_SIZE = 5;

	ffi::Tensor x = Empty({ARRAY_SIZE}, f32_dtype, cuda_device);
	ffi::Tensor y = Empty({ARRAY_SIZE}, f32_dtype, cuda_device);

	std::vector<float> host_x(ARRAY_SIZE);
	for (int i = 0; i < ARRAY_SIZE; ++i) {
	host_x[i] = static_cast<float>(i + 1);
	}

	size_t nbytes = host_x.size() * sizeof(float);
	cudaError_t err = cudaMemcpy(x.data_ptr(), host_x.data(), nbytes, cudaMemcpyHostToDevice);
	TVM_FFI_ICHECK_EQ(err, cudaSuccess)
	<< "cudaMemcpy host to device failed: " << cudaGetErrorString(err);

	Run(x, y);

	std::vector<float> host_y(host_x.size());
	err = cudaMemcpy(host_y.data(), y.data_ptr(), nbytes, cudaMemcpyDeviceToHost);
	TVM_FFI_ICHECK_EQ(err, cudaSuccess)
	<< "cudaMemcpy device to host failed: " << cudaGetErrorString(err);

	std::cout << "[ ";
	for (float value : host_y) {
	std::cout << value << " ";
	}
	std::cout << "]" << std::endl;

	return 0;
	}
	// [aux.end]