blob: 001e65b901268f5f531486267c353d7b529d6598 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <gtest/gtest.h>
#include <tvm/ffi/function.h>
#include <tvm/runtime/logging.h>
#include <cmath>
#include <random>
#include "../src/runtime/opencl/opencl_common.h"
using tvm::runtime::kAllocAlignment;
using tvm::runtime::memory::AllocatorType;
using tvm::runtime::memory::Buffer;
using tvm::runtime::memory::MemoryManager;
using tvm::runtime::memory::Storage;
class TextureCopyTest : public ::testing::Test {
protected:
void SetUp() override {
bool enabled = tvm::runtime::RuntimeEnabled("opencl");
if (!enabled) {
GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
}
// Check hardware support
tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
if (!workspace->IsBufferToImageSupported(thr->device.device_id)) {
GTEST_SKIP() << "Skip test case as BufferToImage is not supported \n";
}
(void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
thr->device, tvm::runtime::memory::AllocatorType::kPooled);
}
};
TEST(TextureCopy, HostDeviceRT) {
using namespace tvm;
bool enabled = tvm::runtime::RuntimeEnabled("opencl");
if (!enabled) {
GTEST_SKIP() << "Skip texture copy test because opencl runtime is disabled.\n";
}
tvm::runtime::cl::OpenCLWorkspace* workspace = tvm::runtime::cl::OpenCLWorkspace::Global();
tvm::runtime::cl::OpenCLThreadEntry* thr = workspace->GetThreadEntry();
(void)tvm::runtime::memory::MemoryManager::GetOrCreateAllocator(
thr->device, tvm::runtime::memory::AllocatorType::kPooled);
std::vector<int64_t> shape{16, 16, 4};
auto cpu_arr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto cpu_arr1 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
ffi::String mem_scope = "global.texture";
auto opencl_txarr0 = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLOpenCL, 0}, mem_scope);
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= static_cast<size_t>(shape[i]);
}
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_real_distribution<> random(-10.0, 10.0);
// Random initialize host ndarray
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr0->data)[i] = random(mt);
}
// Do a roundtrip from host storage to opencl texture storage and back
cpu_arr0.CopyTo(opencl_txarr0);
opencl_txarr0.CopyTo(cpu_arr1);
for (size_t i = 0; i < size; ++i) {
ICHECK_LT(
std::fabs(static_cast<float*>(cpu_arr1->data)[i] - static_cast<float*>(cpu_arr0->data)[i]),
1e-5);
}
}
TEST_F(TextureCopyTest, ViewBufferAsBuffer) {
using namespace tvm;
std::vector<int64_t> shape{1, 16, 16, 8};
std::vector<int64_t> same_shape{1, 8, 16, 16};
auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
ffi::String mem_scope = "global";
DLDevice cl_dev = {kDLOpenCL, 0};
auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
auto stor = Storage(buffer, allocator);
auto opencl_memobj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, mem_scope);
auto opencl_memview =
stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, mem_scope);
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_real_distribution<> random(-10.0, 10.0);
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= static_cast<size_t>(shape[i]);
}
/* Check original object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_memobj);
// Copy from OpenCLBuffer
opencl_memobj.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
/* Check view object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_memview);
// Copy from OpenCLBuffer
opencl_memview.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
}
TEST_F(TextureCopyTest, ViewBufferAsImage) {
using namespace tvm;
// Shape that doesn't cause padding for image row
std::vector<int64_t> shape{1, 16, 16, 8, 4};
std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
DLDevice cl_dev = {kDLOpenCL, 0};
auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
auto stor = Storage(buffer, allocator);
auto opencl_buf_obj = stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global");
auto opencl_img_obj =
stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_real_distribution<> random(-10.0, 10.0);
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= static_cast<size_t>(shape[i]);
}
/* Check original object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_buf_obj);
// Copy from OpenCLBuffer
opencl_buf_obj.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
/* Check view object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_img_obj);
// Copy from OpenCLBuffer
opencl_img_obj.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
}
TEST_F(TextureCopyTest, ViewImageAsBuffer) {
using namespace tvm;
// Shape that doesn't cause padding for image row
std::vector<int64_t> shape{1, 16, 16, 8, 4};
std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
DLDevice cl_dev = {kDLOpenCL, 0};
auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
auto stor = Storage(buffer, allocator);
auto opencl_img_obj =
stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
auto opencl_buf_obj =
stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global");
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_real_distribution<> random(-10.0, 10.0);
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= static_cast<size_t>(shape[i]);
}
/* Check original object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_buf_obj);
// Copy from OpenCLBuffer
opencl_buf_obj.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
/* Check view object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_img_obj);
// Copy from OpenCLBuffer
opencl_img_obj.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
}
TEST_F(TextureCopyTest, ViewImageAsImage) {
using namespace tvm;
// Shape that doesn't cause padding for image row
std::vector<int64_t> shape{1, 16, 16, 8, 4};
std::vector<int64_t> same_shape{1, 8, 16, 16, 4};
auto cpu_arr = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
auto cpu_arr_ret = runtime::Tensor::Empty(shape, {kDLFloat, 32, 1}, {kDLCPU, 0});
DLDevice cl_dev = {kDLOpenCL, 0};
auto allocator = MemoryManager::GetOrCreateAllocator(cl_dev, AllocatorType::kPooled);
auto buffer = allocator->Alloc(cl_dev, ffi::Shape(shape), {kDLFloat, 32, 1});
auto stor = Storage(buffer, allocator);
auto opencl_img_obj_1 =
stor->AllocTensorScoped(0, ffi::Shape(shape), {kDLFloat, 32, 1}, "global.texture");
auto opencl_img_obj_2 =
stor->AllocTensorScoped(0, ffi::Shape(same_shape), {kDLFloat, 32, 1}, "global.texture");
std::random_device dev;
std::mt19937 mt(dev());
std::uniform_real_distribution<> random(-10.0, 10.0);
size_t size = 1;
for (size_t i = 0; i < shape.size(); ++i) {
size *= static_cast<size_t>(shape[i]);
}
/* Check original object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_img_obj_1);
// Copy from OpenCLBuffer
opencl_img_obj_1.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
/* Check view object round trip */
// Random initialize host pool storage
for (size_t i = 0; i < size; i++) {
static_cast<float*>(cpu_arr->data)[i] = random(mt);
}
// Copy to OpenCLBuffer
cpu_arr.CopyTo(opencl_img_obj_2);
// Copy from OpenCLBuffer
opencl_img_obj_2.CopyTo(cpu_arr_ret);
for (size_t i = 0; i < size; i++) {
ICHECK_LT(std::fabs(static_cast<float*>(cpu_arr->data)[i] -
static_cast<float*>(cpu_arr_ret->data)[i]),
1e-5);
}
}