blob: 449c6973605077ddb7208803fe33ae6c00da90e4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include <dlpack/dlpack.h>
#include <gtest/gtest.h>
#include <cstdint>
#include <ctime>
#include <functional>
#include <string>
#include <tuple>
#include "conv2d.h"
#include "hexagon_conv_utils_test.h"
using namespace tvm::runtime::hexagon::conv_utils;
// Parameterized test fixture with 4 params representing n, h, w, c
class HexagonUtilsQuantActivationsBlockizeTest
: public HexagonUtilsTest<uint8_t>,
public ::testing::WithParamInterface<std::tuple<
std::tuple<int64_t, int64_t, int64_t, int64_t>, std::tuple<int, int, int, int>>> {};
// TODO (quic-sanirudh): See if we can test with random generated indices
INSTANTIATE_TEST_SUITE_P(
BlockizeDeblockizeTestFixtures, HexagonUtilsQuantActivationsBlockizeTest,
::testing::Combine(::testing::Values(std::make_tuple(1, 14, 7, 60)),
::testing::Values(std::make_tuple(0, 0, 0, 0), // first element
std::make_tuple(0, 7, 3, 31), // last element
// Remaining are random element tests
std::make_tuple(0, 13, 6, 59),
std::make_tuple(0, 0, 0, 32), std::make_tuple(0, 0, 4, 32),
std::make_tuple(0, 2, 3, 4), std::make_tuple(0, 5, 6, 7),
std::make_tuple(0, 10, 4, 12))),
[](const ::testing::TestParamInfo<HexagonUtilsQuantActivationsBlockizeTest::ParamType>& info) {
// Can use info.param here to generate the test suffix
auto indices = std::get<1>(info.param);
int h = std::get<1>(indices);
int w = std::get<2>(indices);
int c = std::get<3>(indices);
// Generate test name as "hwc0x0x0" if the indices of hwc are 0,0,0
std::string name =
"hwc" + std::to_string(h) + "x" + std::to_string(w) + "x" + std::to_string(c);
return name;
});
TEST_F(HexagonUtilsQuantActivationsBlockizeTest, prepare_nhwc) {
auto shape = std::make_tuple(1, 14, 7, 60);
auto [n, h, w, c] = shape;
setupTensor(shape, uint8);
// // copy_data is set to false here as there's a separate test for blockize when copy_data
// becomes true
auto blocked_tensor =
prepare_nhwc<uint8_t, 8, 8, 32>(device_api, &flat_tensor, /*copy_data=*/false);
EXPECT_EQ(blocked_tensor.shape[0], n);
EXPECT_EQ(blocked_tensor.shape[1], round_up(h, 8) / 8);
EXPECT_EQ(blocked_tensor.shape[2], round_up(w, 8) / 8);
EXPECT_EQ(blocked_tensor.shape[3], round_up(c, 32) / 32);
TearDownTensor();
release(device_api, blocked_tensor);
}
TEST_P(HexagonUtilsQuantActivationsBlockizeTest, blockize_hwc_8b) {
auto shape_tuple = std::get<0>(GetParam());
setupTensor(shape_tuple, uint8);
auto [n, h, w, c] = shape_tuple;
int64_t shape[] = {n, h, w, c};
int h_rounded = round_up(h, 8);
int w_rounded = round_up(w, 8);
int c_rounded = round_up(c, 32);
int64_t shape_2d[2] = {(n * h_rounded * w_rounded * c_rounded) / (8 * 8 * 32), 8 * 8 * 32};
void* blocked_mem = device_api->AllocDataSpace(hexagon_device, 2, shape_2d, uint8, vtcm_scope);
int64_t blocked_shape[] = {n, h_rounded / 8, w_rounded / 8, c_rounded / 32};
blockize_hwc<uint8_t, 8, 8, 32>(blocked_mem, flat_mem, h, w, c);
std::function<int(int, int, int, int, int64_t*)> flatten =
HexagonUtilsQuantActivationsBlockizeTest::flattened_idx;
auto getBlockedElem = [&blocked_shape, blocked_mem, flatten](int nn, int hh, int ww, int cc) {
auto* blocks = static_cast<uintptr_t*>(blocked_mem);
int blockIdx = flatten(nn, hh / 8, ww / 8, cc / 32, blocked_shape);
uint8_t* block = reinterpret_cast<uint8_t*>(blocks[blockIdx]);
return block[yxc_to_sm_8b(hh % 8, ww % 8, cc % 32)];
};
auto [nn, hh, ww, cc] = std::get<1>(GetParam());
EXPECT_EQ(flat_mem_data[flattened_idx(nn, hh, ww, cc, shape)], getBlockedElem(nn, hh, ww, cc));
TearDownTensor();
device_api->FreeDataSpace(hexagon_device, blocked_mem);
}
TEST_P(HexagonUtilsQuantActivationsBlockizeTest, deblockize_hwc_8b) {
auto shape_tuple = std::get<0>(GetParam());
setupTensor(shape_tuple, uint8);
auto [n, h, w, c] = shape_tuple;
int64_t shape[] = {n, h, w, c};
int64_t shape_1d[1] = {n * h * w * c};
int h_rounded = round_up(h, 8);
int w_rounded = round_up(w, 8);
int c_rounded = round_up(c, 32);
int64_t shape_2d[2] = {(n * h_rounded * w_rounded * c_rounded) / (8 * 8 * 32), 8 * 8 * 32};
void* blocked_mem = device_api->AllocDataSpace(hexagon_device, 2, shape_2d, uint8, vtcm_scope);
blockize_hwc<uint8_t, 8, 8, 32>(blocked_mem, flat_mem, h, w, c);
void* deblocked_flat_mem =
device_api->AllocDataSpace(hexagon_device, 1, shape_1d, uint8, vtcm_scope);
deblockize_hwc<uint8_t, 8, 8, 32>(deblocked_flat_mem, blocked_mem, h, w, c);
auto* deblocked_flat_mem_data = static_cast<uint8_t*>(deblocked_flat_mem);
auto [nn, hh, ww, cc] = std::get<1>(GetParam());
auto idx = flattened_idx(nn, hh, ww, cc, shape);
EXPECT_EQ(flat_mem_data[idx], deblocked_flat_mem_data[idx]);
TearDownTensor();
device_api->FreeDataSpace(hexagon_device, blocked_mem);
device_api->FreeDataSpace(hexagon_device, deblocked_flat_mem);
}
class HexagonUtilsQuantWeightsChunkifyTest
: public HexagonUtilsTest<int8_t>,
public ::testing::WithParamInterface<std::tuple<
std::tuple<int64_t, int64_t, int64_t, int64_t>, std::tuple<int, int, int, int>>> {};
INSTANTIATE_TEST_SUITE_P(
ChunkifyDechunkifyTests, HexagonUtilsQuantWeightsChunkifyTest,
::testing::Combine(::testing::Values(std::make_tuple(3, 3, 40, 40)),
::testing::Values(std::make_tuple(0, 0, 0, 0), // first element
std::make_tuple(2, 2, 39, 39), // Last element
// Remaining are random element tests
std::make_tuple(1, 1, 28, 33),
std::make_tuple(1, 2, 8, 38),
std::make_tuple(1, 0, 12, 15),
std::make_tuple(2, 1, 9, 22), std::make_tuple(0, 2, 6, 7),
std::make_tuple(1, 2, 3, 4))),
[](const ::testing::TestParamInfo<HexagonUtilsQuantWeightsChunkifyTest::ParamType>& info) {
// Can use info.param here to generate the test suffix
auto indices = std::get<1>(info.param);
int h = std::get<0>(indices);
int w = std::get<1>(indices);
int i = std::get<2>(indices);
int o = std::get<3>(indices);
// Generate test name as "hwc0x0x0" if the indices of hwc are 0,0,0
std::string name = "hwio" + std::to_string(h) + std::to_string(w) + "x" + std::to_string(i) +
"x" + std::to_string(o);
return name;
});
TEST_F(HexagonUtilsQuantWeightsChunkifyTest, calculate_num_weight_chunks) {
int64_t shape[] = {3, 3, 40, 40};
int num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32);
EXPECT_EQ(num_wgt_chunks, 4);
}
TEST_F(HexagonUtilsQuantWeightsChunkifyTest, prepare_hwio) {
int64_t shape[] = {3, 3, 40, 40};
auto [h, w, i, o] = shape;
auto shape_tuple = std::make_tuple(h, w, i, o);
setupTensor(shape_tuple, int8);
// copy_data is set to false here as there's a separate test for blockize when copy_data becomes
// true
auto num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32);
auto wgt_ptr_table =
reinterpret_cast<void**>(__builtin_alloca(num_wgt_chunks * sizeof(uintptr_t)));
auto chunked_tensor = prepare_hwio_8b(device_api, &flat_tensor, num_wgt_chunks, wgt_ptr_table);
EXPECT_EQ(chunked_tensor.shape[0], 1);
EXPECT_EQ(chunked_tensor.shape[1], 1);
EXPECT_EQ(chunked_tensor.shape[2], round_up(i, 32) / 32);
EXPECT_EQ(chunked_tensor.shape[3], round_up(o, 32) / 32);
release(device_api, chunked_tensor);
TearDownTensor();
}
TEST_P(HexagonUtilsQuantWeightsChunkifyTest, chunkify_hwio_8b) {
auto [shape_tuple, indices] = GetParam();
auto [h, w, i, o] = shape_tuple;
setupTensor(shape_tuple, int8);
int64_t shape[] = {h, w, i, o};
auto num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32);
auto wgt_ptr_table =
reinterpret_cast<void**>(__builtin_alloca(num_wgt_chunks * sizeof(uintptr_t)));
auto chunked_tensor = prepare_hwio_8b(device_api, &flat_tensor, num_wgt_chunks, wgt_ptr_table);
auto getChunkedElem = [width = w, chunked_tensor](int hh, int ww, int ii, int oo) {
auto data = static_cast<intptr_t*>(chunked_tensor.data);
auto chunk = data[ii / 32 * chunked_tensor.shape[3] + oo / 32];
auto chunk_int8 = reinterpret_cast<int8_t*>(chunk);
return chunk_int8[hwio_to_sm_8b(width, hh, ww, ii % 32, oo % 32)];
};
auto [hh, ww, ii, oo] = indices;
EXPECT_EQ(flat_mem_data[flattened_idx(hh, ww, ii, oo, shape)], getChunkedElem(hh, ww, ii, oo));
release(device_api, chunked_tensor);
}