| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <dlpack/dlpack.h> |
| #include <gtest/gtest.h> |
| |
| #include <cstdint> |
| #include <ctime> |
| #include <functional> |
| #include <string> |
| #include <tuple> |
| |
| #include "conv2d.h" |
| #include "hexagon_conv_utils_test.h" |
| |
| using namespace tvm::runtime::hexagon::conv_utils; |
| |
| // Parameterized test fixture with 4 params representing n, h, w, c |
| class HexagonUtilsQuantActivationsBlockizeTest |
| : public HexagonUtilsTest<uint8_t>, |
| public ::testing::WithParamInterface<std::tuple< |
| std::tuple<int64_t, int64_t, int64_t, int64_t>, std::tuple<int, int, int, int>>> {}; |
| |
| // TODO (quic-sanirudh): See if we can test with random generated indices |
| INSTANTIATE_TEST_SUITE_P( |
| BlockizeDeblockizeTestFixtures, HexagonUtilsQuantActivationsBlockizeTest, |
| ::testing::Combine(::testing::Values(std::make_tuple(1, 14, 7, 60)), |
| ::testing::Values(std::make_tuple(0, 0, 0, 0), // first element |
| std::make_tuple(0, 7, 3, 31), // last element |
| // Remaining are random element tests |
| std::make_tuple(0, 13, 6, 59), |
| std::make_tuple(0, 0, 0, 32), std::make_tuple(0, 0, 4, 32), |
| std::make_tuple(0, 2, 3, 4), std::make_tuple(0, 5, 6, 7), |
| std::make_tuple(0, 10, 4, 12))), |
| [](const ::testing::TestParamInfo<HexagonUtilsQuantActivationsBlockizeTest::ParamType>& info) { |
| // Can use info.param here to generate the test suffix |
| auto indices = std::get<1>(info.param); |
| int h = std::get<1>(indices); |
| int w = std::get<2>(indices); |
| int c = std::get<3>(indices); |
| // Generate test name as "hwc0x0x0" if the indices of hwc are 0,0,0 |
| std::string name = |
| "hwc" + std::to_string(h) + "x" + std::to_string(w) + "x" + std::to_string(c); |
| return name; |
| }); |
| |
| TEST_F(HexagonUtilsQuantActivationsBlockizeTest, prepare_nhwc) { |
| auto shape = std::make_tuple(1, 14, 7, 60); |
| auto [n, h, w, c] = shape; |
| setupTensor(shape, uint8); |
| |
| // // copy_data is set to false here as there's a separate test for blockize when copy_data |
| // becomes true |
| auto blocked_tensor = |
| prepare_nhwc<uint8_t, 8, 8, 32>(device_api, &flat_tensor, /*copy_data=*/false); |
| |
| EXPECT_EQ(blocked_tensor.shape[0], n); |
| EXPECT_EQ(blocked_tensor.shape[1], round_up(h, 8) / 8); |
| EXPECT_EQ(blocked_tensor.shape[2], round_up(w, 8) / 8); |
| EXPECT_EQ(blocked_tensor.shape[3], round_up(c, 32) / 32); |
| |
| TearDownTensor(); |
| release(device_api, blocked_tensor); |
| } |
| |
| TEST_P(HexagonUtilsQuantActivationsBlockizeTest, blockize_hwc_8b) { |
| auto shape_tuple = std::get<0>(GetParam()); |
| setupTensor(shape_tuple, uint8); |
| auto [n, h, w, c] = shape_tuple; |
| int64_t shape[] = {n, h, w, c}; |
| |
| int h_rounded = round_up(h, 8); |
| int w_rounded = round_up(w, 8); |
| int c_rounded = round_up(c, 32); |
| int64_t shape_2d[2] = {(n * h_rounded * w_rounded * c_rounded) / (8 * 8 * 32), 8 * 8 * 32}; |
| |
| void* blocked_mem = device_api->AllocDataSpace(hexagon_device, 2, shape_2d, uint8, vtcm_scope); |
| int64_t blocked_shape[] = {n, h_rounded / 8, w_rounded / 8, c_rounded / 32}; |
| blockize_hwc<uint8_t, 8, 8, 32>(blocked_mem, flat_mem, h, w, c); |
| |
| std::function<int(int, int, int, int, int64_t*)> flatten = |
| HexagonUtilsQuantActivationsBlockizeTest::flattened_idx; |
| |
| auto getBlockedElem = [&blocked_shape, blocked_mem, flatten](int nn, int hh, int ww, int cc) { |
| auto* blocks = static_cast<uintptr_t*>(blocked_mem); |
| int blockIdx = flatten(nn, hh / 8, ww / 8, cc / 32, blocked_shape); |
| uint8_t* block = reinterpret_cast<uint8_t*>(blocks[blockIdx]); |
| return block[yxc_to_sm_8b(hh % 8, ww % 8, cc % 32)]; |
| }; |
| |
| auto [nn, hh, ww, cc] = std::get<1>(GetParam()); |
| |
| EXPECT_EQ(flat_mem_data[flattened_idx(nn, hh, ww, cc, shape)], getBlockedElem(nn, hh, ww, cc)); |
| |
| TearDownTensor(); |
| device_api->FreeDataSpace(hexagon_device, blocked_mem); |
| } |
| |
| TEST_P(HexagonUtilsQuantActivationsBlockizeTest, deblockize_hwc_8b) { |
| auto shape_tuple = std::get<0>(GetParam()); |
| setupTensor(shape_tuple, uint8); |
| auto [n, h, w, c] = shape_tuple; |
| int64_t shape[] = {n, h, w, c}; |
| int64_t shape_1d[1] = {n * h * w * c}; |
| |
| int h_rounded = round_up(h, 8); |
| int w_rounded = round_up(w, 8); |
| int c_rounded = round_up(c, 32); |
| int64_t shape_2d[2] = {(n * h_rounded * w_rounded * c_rounded) / (8 * 8 * 32), 8 * 8 * 32}; |
| |
| void* blocked_mem = device_api->AllocDataSpace(hexagon_device, 2, shape_2d, uint8, vtcm_scope); |
| blockize_hwc<uint8_t, 8, 8, 32>(blocked_mem, flat_mem, h, w, c); |
| |
| void* deblocked_flat_mem = |
| device_api->AllocDataSpace(hexagon_device, 1, shape_1d, uint8, vtcm_scope); |
| deblockize_hwc<uint8_t, 8, 8, 32>(deblocked_flat_mem, blocked_mem, h, w, c); |
| auto* deblocked_flat_mem_data = static_cast<uint8_t*>(deblocked_flat_mem); |
| |
| auto [nn, hh, ww, cc] = std::get<1>(GetParam()); |
| |
| auto idx = flattened_idx(nn, hh, ww, cc, shape); |
| EXPECT_EQ(flat_mem_data[idx], deblocked_flat_mem_data[idx]); |
| |
| TearDownTensor(); |
| device_api->FreeDataSpace(hexagon_device, blocked_mem); |
| device_api->FreeDataSpace(hexagon_device, deblocked_flat_mem); |
| } |
| |
| class HexagonUtilsQuantWeightsChunkifyTest |
| : public HexagonUtilsTest<int8_t>, |
| public ::testing::WithParamInterface<std::tuple< |
| std::tuple<int64_t, int64_t, int64_t, int64_t>, std::tuple<int, int, int, int>>> {}; |
| |
| INSTANTIATE_TEST_SUITE_P( |
| ChunkifyDechunkifyTests, HexagonUtilsQuantWeightsChunkifyTest, |
| ::testing::Combine(::testing::Values(std::make_tuple(3, 3, 40, 40)), |
| ::testing::Values(std::make_tuple(0, 0, 0, 0), // first element |
| std::make_tuple(2, 2, 39, 39), // Last element |
| // Remaining are random element tests |
| std::make_tuple(1, 1, 28, 33), |
| std::make_tuple(1, 2, 8, 38), |
| std::make_tuple(1, 0, 12, 15), |
| std::make_tuple(2, 1, 9, 22), std::make_tuple(0, 2, 6, 7), |
| std::make_tuple(1, 2, 3, 4))), |
| [](const ::testing::TestParamInfo<HexagonUtilsQuantWeightsChunkifyTest::ParamType>& info) { |
| // Can use info.param here to generate the test suffix |
| auto indices = std::get<1>(info.param); |
| int h = std::get<0>(indices); |
| int w = std::get<1>(indices); |
| int i = std::get<2>(indices); |
| int o = std::get<3>(indices); |
| // Generate test name as "hwc0x0x0" if the indices of hwc are 0,0,0 |
| std::string name = "hwio" + std::to_string(h) + std::to_string(w) + "x" + std::to_string(i) + |
| "x" + std::to_string(o); |
| return name; |
| }); |
| |
| TEST_F(HexagonUtilsQuantWeightsChunkifyTest, calculate_num_weight_chunks) { |
| int64_t shape[] = {3, 3, 40, 40}; |
| int num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32); |
| EXPECT_EQ(num_wgt_chunks, 4); |
| } |
| |
| TEST_F(HexagonUtilsQuantWeightsChunkifyTest, prepare_hwio) { |
| int64_t shape[] = {3, 3, 40, 40}; |
| auto [h, w, i, o] = shape; |
| auto shape_tuple = std::make_tuple(h, w, i, o); |
| setupTensor(shape_tuple, int8); |
| |
| // copy_data is set to false here as there's a separate test for blockize when copy_data becomes |
| // true |
| auto num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32); |
| auto wgt_ptr_table = |
| reinterpret_cast<void**>(__builtin_alloca(num_wgt_chunks * sizeof(uintptr_t))); |
| auto chunked_tensor = prepare_hwio_8b(device_api, &flat_tensor, num_wgt_chunks, wgt_ptr_table); |
| |
| EXPECT_EQ(chunked_tensor.shape[0], 1); |
| EXPECT_EQ(chunked_tensor.shape[1], 1); |
| EXPECT_EQ(chunked_tensor.shape[2], round_up(i, 32) / 32); |
| EXPECT_EQ(chunked_tensor.shape[3], round_up(o, 32) / 32); |
| |
| release(device_api, chunked_tensor); |
| TearDownTensor(); |
| } |
| |
| TEST_P(HexagonUtilsQuantWeightsChunkifyTest, chunkify_hwio_8b) { |
| auto [shape_tuple, indices] = GetParam(); |
| auto [h, w, i, o] = shape_tuple; |
| setupTensor(shape_tuple, int8); |
| int64_t shape[] = {h, w, i, o}; |
| |
| auto num_wgt_chunks = calculate_num_weight_chunks(shape, shape[0], shape[1], 32, 32); |
| auto wgt_ptr_table = |
| reinterpret_cast<void**>(__builtin_alloca(num_wgt_chunks * sizeof(uintptr_t))); |
| auto chunked_tensor = prepare_hwio_8b(device_api, &flat_tensor, num_wgt_chunks, wgt_ptr_table); |
| |
| auto getChunkedElem = [width = w, chunked_tensor](int hh, int ww, int ii, int oo) { |
| auto data = static_cast<intptr_t*>(chunked_tensor.data); |
| auto chunk = data[ii / 32 * chunked_tensor.shape[3] + oo / 32]; |
| auto chunk_int8 = reinterpret_cast<int8_t*>(chunk); |
| return chunk_int8[hwio_to_sm_8b(width, hh, ww, ii % 32, oo % 32)]; |
| }; |
| |
| auto [hh, ww, ii, oo] = indices; |
| |
| EXPECT_EQ(flat_mem_data[flattened_idx(hh, ww, ii, oo, shape)], getChunkedElem(hh, ww, ii, oo)); |
| release(device_api, chunked_tensor); |
| } |