tests/cpp-runtime/opencl/texture_copy_test.cc - tvm - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #include <gtest/gtest.h>
 #include <tvm/ffi/function.h>
 #include <tvm/runtime/logging.h>

 #include <cmath>
 #include <random>

 #include "../src/runtime/opencl/opencl_common.h"

 using tvm::runtime::kAllocAlignment;
 using tvm::runtime::memory::AllocatorType;
 using tvm::runtime::memory::Buffer;
 using tvm::runtime::memory::MemoryManager;
 using tvm::runtime::memory::Storage;

 class TextureCopyTest : public ::testing::Test {
  protected:
   void SetUp() override {
     bool enabled = tvm::runtime::RuntimeEnabled("opencl");
     if (!enabled) {
       GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
     }
     // Check hardware support
     tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
     tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
     if (!workspace->IsBufferToImageSupported(thr->device.device_id)) {
       GTEST_SKIP() << "Skip test case as BufferToImage is not supported \n";
     }
     (void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
         thr->device, tvm::runtime::memory::AllocatorType::kPooled);
   }
 };

 TEST(TextureCopy, HostDeviceRT) {
   using namespace tvm;
   bool enabled = tvm::runtime::RuntimeEnabled("opencl");
   if (!enabled) {
     GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
   }
   tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
   tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
   (void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
       thr->device, tvm::runtime::memory::AllocatorType::kPooled);
   std::vector<int64_t> shape{16, 16, 4};
   auto cpu_arr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   auto cpu_arr1 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   ffi::String mem_scope = "global.texture";
   auto opencl_txarr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLOpenCL, 0}, mem_scope);

   size_t size = 1;
   for (size_t i = 0; i < shape.size(); ++i) {
     size *= static_cast<size_t>(shape[i]);
   }

   std::random_device dev;
   std::mt19937 mt(dev());
   std::uniform_real_distribution<> random(-10.0, 10.0);

   // Random initialize host ndarray
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr0->data)[i] = random(mt);
   }

   // Do a roundtrip from host storage to opencl texture storage and back
   cpu_arr0.CopyTo(opencl_txarr0);
   opencl_txarr0.CopyTo(cpu_arr1);
   for (size_t i = 0; i < size; ++i) {
     ICHECK_LT(
         std::fabs(static_cast<float*>(cpu_arr1->data)[i] - static_cast<float*>(cpu_arr0->data)[i]),
         1e-5);
   }
 }

 TEST_F(TextureCopyTest, ViewBufferAsBuffer) {
   using namespace tvm;
   std::vector<int64_t> shape{1, 16, 16, 8};
   std::vector<int64_t> same_shape{1, 8, 16, 16};
   auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

   ffi::String mem_scope = "global";

   DLDevice cl_dev = {kDLOpenCL, 0};
   auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
   auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
   auto stor = Storage(buffer, allocator);

   auto opencl_memobj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, mem_scope);
   auto opencl_memview =
       stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, mem_scope);

   std::random_device dev;
   std::mt19937 mt(dev());
   std::uniform_real_distribution<> random(-10.0, 10.0);

   size_t size = 1;
   for (size_t i = 0; i < shape.size(); ++i) {
     size *= static_cast<size_t>(shape[i]);
   }

   /* Check original object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_memobj);
   // Copy from OpenCLBuffer
   opencl_memobj.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }

   /* Check view object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_memview);
   // Copy from OpenCLBuffer
   opencl_memview.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }
 }

 TEST_F(TextureCopyTest, ViewBufferAsImage) {
   using namespace tvm;
   // Shape that doesn't cause padding for image row
   std::vector<int64_t> shape{1, 16, 16, 8, 4};
   std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
   auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

   DLDevice cl_dev = {kDLOpenCL, 0};
   auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
   auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
   auto stor = Storage(buffer, allocator);

   auto opencl_buf_obj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global");
   auto opencl_img_obj =
       stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");

   std::random_device dev;
   std::mt19937 mt(dev());
   std::uniform_real_distribution<> random(-10.0, 10.0);

   size_t size = 1;
   for (size_t i = 0; i < shape.size(); ++i) {
     size *= static_cast<size_t>(shape[i]);
   }

   /* Check original object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_buf_obj);
   // Copy from OpenCLBuffer
   opencl_buf_obj.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }

   /* Check view object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_img_obj);
   // Copy from OpenCLBuffer
   opencl_img_obj.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }
 }

 TEST_F(TextureCopyTest, ViewImageAsBuffer) {
   using namespace tvm;
   // Shape that doesn't cause padding for image row
   std::vector<int64_t> shape{1, 16, 16, 8, 4};
   std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
   auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

   DLDevice cl_dev = {kDLOpenCL, 0};
   auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
   auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
   auto stor = Storage(buffer, allocator);

   auto opencl_img_obj =
       stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
   auto opencl_buf_obj =
       stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global");

   std::random_device dev;
   std::mt19937 mt(dev());
   std::uniform_real_distribution<> random(-10.0, 10.0);

   size_t size = 1;
   for (size_t i = 0; i < shape.size(); ++i) {
     size *= static_cast<size_t>(shape[i]);
   }

   /* Check original object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_buf_obj);
   // Copy from OpenCLBuffer
   opencl_buf_obj.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }

   /* Check view object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_img_obj);
   // Copy from OpenCLBuffer
   opencl_img_obj.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }
 }

 TEST_F(TextureCopyTest, ViewImageAsImage) {
   using namespace tvm;
   // Shape that doesn't cause padding for image row
   std::vector<int64_t> shape{1, 16, 16, 8, 4};
   std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
   auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
   auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

   DLDevice cl_dev = {kDLOpenCL, 0};
   auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
   auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
   auto stor = Storage(buffer, allocator);

   auto opencl_img_obj_1 =
       stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
   auto opencl_img_obj_2 =
       stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");

   std::random_device dev;
   std::mt19937 mt(dev());
   std::uniform_real_distribution<> random(-10.0, 10.0);

   size_t size = 1;
   for (size_t i = 0; i < shape.size(); ++i) {
     size *= static_cast<size_t>(shape[i]);
   }

   /* Check original object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_img_obj_1);
   // Copy from OpenCLBuffer
   opencl_img_obj_1.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }

   /* Check view object round trip */
   // Random initialize host pool storage
   for (size_t i = 0; i < size; i++) {
     static_cast<float*>(cpu_arr->data)[i] = random(mt);
   }
   // Copy to OpenCLBuffer
   cpu_arr.CopyTo(opencl_img_obj_2);
   // Copy from OpenCLBuffer
   opencl_img_obj_2.CopyTo(cpu_arr_ret);
   for (size_t i = 0; i < size; i++) {
     ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
                         static_cast<float*>(cpu_arr_ret->data)[i]),
               1e-5);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	#include <gtest/gtest.h>
	#include <tvm/ffi/function.h>
	#include <tvm/runtime/logging.h>

	#include <cmath>
	#include <random>

	#include "../src/runtime/opencl/opencl_common.h"

	using tvm::runtime::kAllocAlignment;
	using tvm::runtime::memory::AllocatorType;
	using tvm::runtime::memory::Buffer;
	using tvm::runtime::memory::MemoryManager;
	using tvm::runtime::memory::Storage;

	class TextureCopyTest : public ::testing::Test {
	protected:
	void SetUp() override {
	bool enabled = tvm::runtime::RuntimeEnabled("opencl");
	if (!enabled) {
	GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
	}
	// Check hardware support
	tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
	tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
	if (!workspace->IsBufferToImageSupported(thr->device.device_id)) {
	GTEST_SKIP() << "Skip test case as BufferToImage is not supported \n";
	}
	(void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
	thr->device, tvm::runtime::memory::AllocatorType::kPooled);
	}
	};

	TEST(TextureCopy, HostDeviceRT) {
	using namespace tvm;
	bool enabled = tvm::runtime::RuntimeEnabled("opencl");
	if (!enabled) {
	GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
	}
	tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
	tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
	(void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
	thr->device, tvm::runtime::memory::AllocatorType::kPooled);
	std::vector<int64_t> shape{16, 16, 4};
	auto cpu_arr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	auto cpu_arr1 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	ffi::String mem_scope = "global.texture";
	auto opencl_txarr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLOpenCL, 0}, mem_scope);

	size_t size = 1;
	for (size_t i = 0; i < shape.size(); ++i) {
	size *= static_cast<size_t>(shape[i]);
	}

	std::random_device dev;
	std::mt19937 mt(dev());
	std::uniform_real_distribution<> random(-10.0, 10.0);

	// Random initialize host ndarray
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr0->data)[i] = random(mt);
	}

	// Do a roundtrip from host storage to opencl texture storage and back
	cpu_arr0.CopyTo(opencl_txarr0);
	opencl_txarr0.CopyTo(cpu_arr1);
	for (size_t i = 0; i < size; ++i) {
	ICHECK_LT(
	std::fabs(static_cast<float>(cpu_arr1->data)[i] - static_cast<float>(cpu_arr0->data)[i]),
	1e-5);
	}
	}

	TEST_F(TextureCopyTest, ViewBufferAsBuffer) {
	using namespace tvm;
	std::vector<int64_t> shape{1, 16, 16, 8};
	std::vector<int64_t> same_shape{1, 8, 16, 16};
	auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

	ffi::String mem_scope = "global";

	DLDevice cl_dev = {kDLOpenCL, 0};
	auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
	auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
	auto stor = Storage(buffer, allocator);

	auto opencl_memobj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, mem_scope);
	auto opencl_memview =
	stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, mem_scope);

	std::random_device dev;
	std::mt19937 mt(dev());
	std::uniform_real_distribution<> random(-10.0, 10.0);

	size_t size = 1;
	for (size_t i = 0; i < shape.size(); ++i) {
	size *= static_cast<size_t>(shape[i]);
	}

	/* Check original object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_memobj);
	// Copy from OpenCLBuffer
	opencl_memobj.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}

	/* Check view object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_memview);
	// Copy from OpenCLBuffer
	opencl_memview.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}
	}

	TEST_F(TextureCopyTest, ViewBufferAsImage) {
	using namespace tvm;
	// Shape that doesn't cause padding for image row
	std::vector<int64_t> shape{1, 16, 16, 8, 4};
	std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
	auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

	DLDevice cl_dev = {kDLOpenCL, 0};
	auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
	auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
	auto stor = Storage(buffer, allocator);

	auto opencl_buf_obj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global");
	auto opencl_img_obj =
	stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");

	std::random_device dev;
	std::mt19937 mt(dev());
	std::uniform_real_distribution<> random(-10.0, 10.0);

	size_t size = 1;
	for (size_t i = 0; i < shape.size(); ++i) {
	size *= static_cast<size_t>(shape[i]);
	}

	/* Check original object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_buf_obj);
	// Copy from OpenCLBuffer
	opencl_buf_obj.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}

	/* Check view object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_img_obj);
	// Copy from OpenCLBuffer
	opencl_img_obj.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}
	}

	TEST_F(TextureCopyTest, ViewImageAsBuffer) {
	using namespace tvm;
	// Shape that doesn't cause padding for image row
	std::vector<int64_t> shape{1, 16, 16, 8, 4};
	std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
	auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

	DLDevice cl_dev = {kDLOpenCL, 0};
	auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
	auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
	auto stor = Storage(buffer, allocator);

	auto opencl_img_obj =
	stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
	auto opencl_buf_obj =
	stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global");

	std::random_device dev;
	std::mt19937 mt(dev());
	std::uniform_real_distribution<> random(-10.0, 10.0);

	size_t size = 1;
	for (size_t i = 0; i < shape.size(); ++i) {
	size *= static_cast<size_t>(shape[i]);
	}

	/* Check original object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_buf_obj);
	// Copy from OpenCLBuffer
	opencl_buf_obj.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}

	/* Check view object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_img_obj);
	// Copy from OpenCLBuffer
	opencl_img_obj.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}
	}

	TEST_F(TextureCopyTest, ViewImageAsImage) {
	using namespace tvm;
	// Shape that doesn't cause padding for image row
	std::vector<int64_t> shape{1, 16, 16, 8, 4};
	std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
	auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
	auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});

	DLDevice cl_dev = {kDLOpenCL, 0};
	auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
	auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
	auto stor = Storage(buffer, allocator);

	auto opencl_img_obj_1 =
	stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
	auto opencl_img_obj_2 =
	stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");

	std::random_device dev;
	std::mt19937 mt(dev());
	std::uniform_real_distribution<> random(-10.0, 10.0);

	size_t size = 1;
	for (size_t i = 0; i < shape.size(); ++i) {
	size *= static_cast<size_t>(shape[i]);
	}

	/* Check original object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_img_obj_1);
	// Copy from OpenCLBuffer
	opencl_img_obj_1.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}

	/* Check view object round trip */
	// Random initialize host pool storage
	for (size_t i = 0; i < size; i++) {
	static_cast<float*>(cpu_arr->data)[i] = random(mt);
	}
	// Copy to OpenCLBuffer
	cpu_arr.CopyTo(opencl_img_obj_2);
	// Copy from OpenCLBuffer
	opencl_img_obj_2.CopyTo(cpu_arr_ret);
	for (size_t i = 0; i < size; i++) {
	ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
	static_cast<float*>(cpu_arr_ret->data)[i]),
	1e-5);
	}
	}