cpp/core/tests/FForCodecTest.cc - gluten - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "utils/tac/FForCodec.h"
 #include "utils/tac/TypeAwareCompressCodec.h"
 #include "utils/tac/ffor.hpp"

 #include <gtest/gtest.h>
 #include <cstring>
 #include <random>
 #include <vector>

 using namespace gluten::ffor;
 using namespace gluten;

 namespace {

 // Some non-TAC type values for negative testing.
 static constexpr int8_t kSomeUnsupportedType = 99;

 std::vector<uint64_t> genData(size_t n, uint64_t base, uint64_t range, uint64_t seed = 42) {
   std::mt19937_64 rng(seed);
   std::uniform_int_distribution<uint64_t> dist(0, range);
   std::vector<uint64_t> data(n);
   for (size_t i = 0; i < n; ++i) {
     data[i] = base + dist(rng);
   }
   return data;
 }

 std::vector<uint64_t> padToLanes(const std::vector<uint64_t>& data) {
   size_t padded = (data.size() + kLanes - 1) / kLanes * kLanes;
   auto result = data;
   result.resize(padded, data.empty() ? 0 : data.back());
   return result;
 }

 template <unsigned BW>
 void roundtripTest(const uint64_t* data, size_t n, uint64_t base) {
   size_t nPadded = (n + kLanes - 1) / kLanes * kLanes;
   size_t compN = compressedWords(nPadded, BW);

   std::vector<uint64_t> encoded(compN + kLanes, 0xDEADBEEFDEADBEEF);
   std::vector<uint64_t> decoded(nPadded, 0xDEADBEEFDEADBEEF);

   encode<BW>(data, encoded.data(), base, nPadded);
   decode<BW>(encoded.data(), decoded.data(), base, nPadded);

   for (size_t i = 0; i < n; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 void roundtripTestRt(const uint64_t* data, size_t n, uint64_t base, unsigned bw) {
   size_t nPadded = (n + kLanes - 1) / kLanes * kLanes;
   size_t compN = compressedWords(nPadded, bw);

   std::vector<uint64_t> encoded(compN + kLanes, 0);
   std::vector<uint64_t> decoded(nPadded, 0);

   encodeRt(data, encoded.data(), base, nPadded, bw);
   decodeRt(encoded.data(), decoded.data(), base, nPadded, bw);

   for (size_t i = 0; i < n; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 void compressRoundtrip(const uint64_t* data, size_t num) {
   std::vector<uint8_t> buf(compress64Bound(num));
   size_t written = compress64(data, num, buf.data());

   std::vector<uint64_t> decoded(num);
   size_t nDecoded = decompress64(buf.data(), written, decoded.data());

   ASSERT_EQ(nDecoded, num);
   for (size_t i = 0; i < num; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 } // namespace

 // Low-level encode/decode tests

 TEST(FForTest, Bw0Constant) {
   std::vector<uint64_t> data(256, 12345);
   roundtripTest<0>(data.data(), data.size(), 12345);
 }

 TEST(FForTest, Bw1Binary) {
   auto data = padToLanes(genData(256, 100, 1));
   uint64_t base;
   unsigned bw;
   analyze(data.data(), data.size(), base, bw);
   ASSERT_EQ(bw, 1u);
   roundtripTest<1>(data.data(), data.size(), base);
 }

 TEST(FForTest, Bw6Narrow) {
   auto data = padToLanes(genData(1024, 1000, 63));
   roundtripTest<6>(data.data(), data.size(), 1000);
 }

 TEST(FForTest, Bw16Medium) {
   auto data = padToLanes(genData(1024, 50000, 65535));
   roundtripTest<16>(data.data(), data.size(), 50000);
 }

 TEST(FForTest, Bw32Wide) {
   auto data = padToLanes(genData(512, 1000000, (1ULL << 32) - 1));
   roundtripTest<32>(data.data(), data.size(), 1000000);
 }

 TEST(FForTest, Bw64FullRange) {
   auto data = padToLanes(genData(256, 0, UINT64_MAX));
   roundtripTest<64>(data.data(), data.size(), 0);
 }

 TEST(FForTest, AllBitwidthsSmall) {
   for (unsigned bw = 0; bw <= 64; ++bw) {
     uint64_t range = (bw == 0) ? 0 : (bw == 64) ? UINT64_MAX : ((1ULL << bw) - 1);
     auto data = padToLanes(genData(64, 42, range, 100 + bw));
     roundtripTestRt(data.data(), data.size(), 42, bw);
   }
 }

 TEST(FForTest, AllBitwidthsLarge) {
   for (unsigned bw = 0; bw <= 64; ++bw) {
     uint64_t range = (bw == 0) ? 0 : (bw == 64) ? UINT64_MAX : ((1ULL << bw) - 1);
     auto data = padToLanes(genData(4096, 42, range, 200 + bw));
     roundtripTestRt(data.data(), data.size(), 42, bw);
   }
 }

 TEST(FForTest, VariousSizes) {
   for (size_t n : {4, 8, 12, 16, 20, 28, 32, 60, 64, 100, 128, 255, 256, 500, 1000, 1024, 4096}) {
     auto data = padToLanes(genData(n, 100, 255, n));
     roundtripTest<8>(data.data(), (n + kLanes - 1) / kLanes * kLanes, 100);
   }
 }

 TEST(FForTest, MinSize) {
   uint64_t data[4] = {10, 11, 12, 13};
   roundtripTest<4>(data, 4, 10);
 }

 TEST(FForTest, AllSame) {
   std::vector<uint64_t> data(1024, 999999);
   roundtripTest<0>(data.data(), data.size(), 999999);
 }

 TEST(FForTest, Sequential) {
   std::vector<uint64_t> data(1024);
   for (size_t i = 0; i < 1024; ++i)
     data[i] = 1000 + i;
   uint64_t base;
   unsigned bw;
   analyze(data.data(), data.size(), base, bw);
   ASSERT_EQ(base, uint64_t(1000));
   ASSERT_EQ(bw, 10u);
   roundtripTest<10>(data.data(), data.size(), base);
 }

 TEST(FForTest, LargeBase) {
   uint64_t largeBase = UINT64_MAX - 1000;
   auto data = padToLanes(genData(256, largeBase, 100));
   roundtripTest<7>(data.data(), data.size(), largeBase);
 }

 TEST(FForTest, AnalyzeCorrectness) {
   uint64_t data1[] = {5, 5, 5, 5};
   uint64_t b;
   unsigned w;
   analyze(data1, 4, b, w);
   ASSERT_EQ(b, uint64_t(5));
   ASSERT_EQ(w, 0u);

   uint64_t data2[] = {10, 11, 10, 11};
   analyze(data2, 4, b, w);
   ASSERT_EQ(b, uint64_t(10));
   ASSERT_EQ(w, 1u);

   uint64_t data3[] = {0, 255, 128, 64};
   analyze(data3, 4, b, w);
   ASSERT_EQ(b, uint64_t(0));
   ASSERT_EQ(w, 8u);
 }

 TEST(FForTest, CompressedSize) {
   ASSERT_EQ(compressedWords(256, 6), size_t(24));
   ASSERT_EQ(compressedWords(256, 1), size_t(4));
   ASSERT_EQ(compressedWords(256, 64), size_t(256));
   ASSERT_EQ(compressedWords(256, 0), size_t(0));
 }

 // compress64 / decompress64 tests

 TEST(FForTest, Compress64Basic) {
   auto data = genData(256, 1000, 99);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64WithTail1) {
   auto data = genData(5, 100, 50);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64WithTail2) {
   auto data = genData(6, 100, 50);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64WithTail3) {
   auto data = genData(7, 100, 50);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64ExactLanes) {
   auto data = genData(4, 100, 50);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64OnlyTail) {
   for (size_t n = 1; n <= 3; ++n) {
     auto data = genData(n, 42, 10);
     compressRoundtrip(data.data(), data.size());
   }
 }

 TEST(FForTest, Compress64Large) {
   auto data = genData(10000, 5000, 255);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64LargeWithTail) {
   auto data = genData(10001, 5000, 255);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64AllSame) {
   std::vector<uint64_t> data(128, 42);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64FullRange) {
   auto data = genData(256, 0, UINT64_MAX);
   compressRoundtrip(data.data(), data.size());
 }

 TEST(FForTest, Compress64SizeCheck) {
   auto narrow = genData(256, 1000, 63); // bw=6
   std::vector<uint8_t> buf(compress64Bound(256));
   size_t written = compress64(narrow.data(), narrow.size(), buf.data());

   // block header(16) + packed data + tail header(16)
   size_t expected = kHeaderSize + compressedWords(256, 6) * sizeof(uint64_t) + kHeaderSize;
   ASSERT_EQ(written, expected);

   size_t raw = 256 * sizeof(uint64_t);
   double ratio = double(raw) / double(written);
   ASSERT_GT(ratio, 9.0) << "Ratio too low: " << ratio;
 }

 TEST(FForTest, Compress64AllSizes1To20) {
   for (size_t n = 1; n <= 20; ++n) {
     auto data = genData(n, 100, 200, n * 7);
     compressRoundtrip(data.data(), data.size());
   }
 }

 // OOB read test — decode() reads past the end of the compressed buffer on the
 // last group when newBitPos hits a 64-bit boundary. To detect this, we place
 // the compressed buffer at the end of an mmap'd page with a PROT_NONE guard
 // page immediately after, so any OOB read causes a SIGSEGV.
 //
 // Example: BW=32, 8 values (2 groups of 4). compressedWords = 4.
 // decode pre-loads in[0..3]. After group 1: newBitPos=64, overflow=0,
 // the else branch loads in[4..7] — 4 words past end of 4-word buffer.
 #if defined(__linux__) || defined(__APPLE__)
 #include <sys/mman.h>
 #include <unistd.h>

 // Allocate `size` bytes at the END of a page, with a guard page after.
 // Returns {base_ptr (to munmap), usable_ptr, total_mmap_size}.
 static std::tuple<void*, uint8_t*, size_t> allocAtPageEnd(size_t size) {
   long pageSize = sysconf(_SC_PAGESIZE);
   // Round up to cover `size` bytes + 1 guard page.
   size_t dataPages = (size + pageSize - 1) / pageSize;
   size_t totalSize = (dataPages + 1) * pageSize; // +1 for guard
   void* base = mmap(nullptr, totalSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   EXPECT_NE(base, MAP_FAILED);
   // Make the last page a guard (no access).
   void* guardPage = static_cast<uint8_t*>(base) + dataPages * pageSize;
   mprotect(guardPage, pageSize, PROT_NONE);
   // Return pointer to the last `size` bytes before the guard page.
   auto* usable = static_cast<uint8_t*>(guardPage) - size;
   return {base, usable, totalSize};
 }

 static void freePageEnd(void* base, size_t totalSize) {
   munmap(base, totalSize);
 }

 // BW=32, nValues=8: clean 64-bit boundary on last group, triggers OOB pre-load.
 TEST(FForTest, DecodeBw32OobGuardPage) {
   constexpr unsigned BW = 32;
   constexpr size_t N = 8; // 2 groups of 4
   uint64_t data[N];
   for (size_t i = 0; i < N; ++i) {
     data[i] = 1000 + i;
   }

   // Encode into exact-size buffer (no padding).
   size_t compN = compressedWords(N, BW);
   size_t compBytes = compN * sizeof(uint64_t);
   auto [encBase, encBuf, encTotalSize] = allocAtPageEnd(compBytes);
   auto* encPtr = reinterpret_cast<uint64_t*>(encBuf);
   encode<BW>(data, encPtr, 1000, N);

   // Decode from the exact-size buffer at page end — OOB read hits guard page.
   uint64_t decoded[N] = {};
   decode<BW>(encPtr, decoded, 1000, N);

   for (size_t i = 0; i < N; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at i=" << i;
   }
   freePageEnd(encBase, encTotalSize);
 }

 // BW=16, nValues=16: newBitPos=64 with overflow=0 on groups 3,7,11,15.
 // Last group (g=3) triggers OOB.
 TEST(FForTest, DecodeBw16OobGuardPage) {
   constexpr unsigned BW = 16;
   constexpr size_t N = 16; // 4 groups of 4
   uint64_t data[N];
   for (size_t i = 0; i < N; ++i) {
     data[i] = 50000 + i;
   }

   size_t compN = compressedWords(N, BW);
   size_t compBytes = compN * sizeof(uint64_t);
   auto [encBase, encBuf, encTotalSize] = allocAtPageEnd(compBytes);
   auto* encPtr = reinterpret_cast<uint64_t*>(encBuf);
   encode<BW>(data, encPtr, 50000, N);

   uint64_t decoded[N] = {};
   decode<BW>(encPtr, decoded, 50000, N);

   for (size_t i = 0; i < N; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at i=" << i;
   }
   freePageEnd(encBase, encTotalSize);
 }

 // BW=7, nValues=256: overflow > 0 on last group, triggers OOB in the
 // "load next words" branch (lines 177-180).
 TEST(FForTest, DecodeBw7OobGuardPage) {
   constexpr unsigned BW = 7;
   constexpr size_t N = 256;
   auto data = padToLanes(genData(N, 1000, 99));

   size_t compN = compressedWords(N, BW);
   size_t compBytes = compN * sizeof(uint64_t);
   auto [encBase, encBuf, encTotalSize] = allocAtPageEnd(compBytes);
   auto* encPtr = reinterpret_cast<uint64_t*>(encBuf);
   encode<BW>(data.data(), encPtr, 1000, N);

   uint64_t decoded[N] = {};
   decode<BW>(encPtr, decoded, 1000, N);

   for (size_t i = 0; i < N; ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at i=" << i;
   }
   freePageEnd(encBase, encTotalSize);
 }

 #endif // __linux__ || __APPLE__

 // Misalignment tests — verify compress64/decompress64 handle unaligned pointers.

 TEST(FForTest, Compress64MisalignedOutput) {
   auto data = genData(256, 1000, 99);
   std::vector<uint8_t> buf(compress64Bound(256) + 16);

   for (size_t offset = 0; offset < 8; ++offset) {
     uint8_t* out = buf.data() + offset;
     size_t written = compress64(data.data(), data.size(), out);

     std::vector<uint64_t> decoded(256);
     size_t n = decompress64(out, written, decoded.data());
     ASSERT_EQ(n, size_t(256));
     for (size_t i = 0; i < 256; ++i) {
       ASSERT_EQ(decoded[i], data[i]) << "offset=" << offset << " i=" << i;
     }
   }
 }

 TEST(FForTest, Compress64MisalignedInput) {
   auto raw = genData(256, 1000, 99);
   std::vector<uint8_t> inputBuf(256 * sizeof(uint64_t) + 16);

   for (size_t offset = 0; offset < 8; ++offset) {
     std::memcpy(inputBuf.data() + offset, raw.data(), 256 * sizeof(uint64_t));
     const auto* misalignedInput = reinterpret_cast<const uint64_t*>(inputBuf.data() + offset);

     std::vector<uint8_t> comp(compress64Bound(256));
     size_t written = compress64(misalignedInput, 256, comp.data());

     std::vector<uint64_t> decoded(256);
     size_t n = decompress64(comp.data(), written, decoded.data());
     ASSERT_EQ(n, size_t(256));
     for (size_t i = 0; i < 256; ++i) {
       ASSERT_EQ(decoded[i], raw[i]) << "offset=" << offset << " i=" << i;
     }
   }
 }

 TEST(FForTest, Decompress64MisalignedOutput) {
   auto data = genData(256, 1000, 99);
   std::vector<uint8_t> comp(compress64Bound(256));
   size_t written = compress64(data.data(), data.size(), comp.data());

   std::vector<uint8_t> outBuf(256 * sizeof(uint64_t) + 16);
   for (size_t offset = 0; offset < 8; ++offset) {
     auto* misalignedOutput = reinterpret_cast<uint64_t*>(outBuf.data() + offset);
     size_t n = decompress64(comp.data(), written, misalignedOutput);
     ASSERT_EQ(n, size_t(256));
     for (size_t i = 0; i < 256; ++i) {
       uint64_t val;
       std::memcpy(&val, reinterpret_cast<uint8_t*>(misalignedOutput) + i * sizeof(uint64_t), sizeof(val));
       ASSERT_EQ(val, data[i]) << "offset=" << offset << " i=" << i;
     }
   }
 }

 TEST(FForTest, Compress64AllMisaligned) {
   auto raw = genData(256, 1000, 99);
   std::vector<uint8_t> inputBuf(256 * sizeof(uint64_t) + 16);
   std::vector<uint8_t> compBuf(compress64Bound(256) + 16);
   std::vector<uint8_t> outBuf(256 * sizeof(uint64_t) + 16);

   for (size_t inOff = 1; inOff < 8; inOff += 3) {
     for (size_t compOff = 1; compOff < 8; compOff += 3) {
       for (size_t outOff = 1; outOff < 8; outOff += 3) {
         std::memcpy(inputBuf.data() + inOff, raw.data(), 256 * sizeof(uint64_t));
         const auto* inPtr = reinterpret_cast<const uint64_t*>(inputBuf.data() + inOff);

         size_t written = compress64(inPtr, 256, compBuf.data() + compOff);

         auto* outPtr = reinterpret_cast<uint64_t*>(outBuf.data() + outOff);
         size_t n = decompress64(compBuf.data() + compOff, written, outPtr);
         ASSERT_EQ(n, size_t(256));
         for (size_t i = 0; i < 256; ++i) {
           uint64_t val;
           std::memcpy(&val, reinterpret_cast<uint8_t*>(outPtr) + i * sizeof(uint64_t), sizeof(val));
           ASSERT_EQ(val, raw[i]) << "inOff=" << inOff << " compOff=" << compOff << " outOff=" << outOff << " i=" << i;
         }
       }
     }
   }
 }

 // FForCodec wrapper tests

 TEST(FForCodecTest, CompressDecompressRoundtrip) {
   auto data = genData(1024, 5000, 255);
   int64_t inputSize = data.size() * sizeof(uint64_t);

   auto maxLen = FForCodec::maxCompressedLength(inputSize);
   std::vector<uint8_t> compressed(maxLen);

   auto compResult =
       FForCodec::compress(reinterpret_cast<const uint8_t*>(data.data()), inputSize, compressed.data(), maxLen);
   ASSERT_TRUE(compResult.ok()) << compResult.status().ToString();
   auto compressedSize = *compResult;
   ASSERT_GT(compressedSize, 0);
   ASSERT_LT(compressedSize, inputSize);

   std::vector<uint64_t> decoded(data.size());
   auto decResult =
       FForCodec::decompress(compressed.data(), compressedSize, reinterpret_cast<uint8_t*>(decoded.data()), inputSize);
   ASSERT_TRUE(decResult.ok()) << decResult.status().ToString();

   for (size_t i = 0; i < data.size(); ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 TEST(FForCodecTest, EmptyInput) {
   auto result = FForCodec::compress(nullptr, 0, nullptr, 0);
   ASSERT_TRUE(result.ok());
   ASSERT_EQ(*result, 0);
 }

 TEST(FForCodecTest, InvalidInputSize) {
   uint8_t dummy[7] = {};
   auto result = FForCodec::compress(dummy, 7, dummy, 100);
   ASSERT_FALSE(result.ok());
 }

 // Full-range random data: bw=64, FFOR can't compress below raw size.
 // This exercises the fallback path in compressTypeAwareBuffer where
 // compressed size >= uncompressed size and kUncompressedBuffer is used.
 TEST(FForCodecTest, FullRangeDataRoundtrip) {
   auto data = genData(256, 0, UINT64_MAX);
   int64_t inputSize = data.size() * sizeof(uint64_t);

   auto maxLen = FForCodec::maxCompressedLength(inputSize);
   std::vector<uint8_t> compressed(maxLen);

   auto compResult =
       FForCodec::compress(reinterpret_cast<const uint8_t*>(data.data()), inputSize, compressed.data(), maxLen);
   ASSERT_TRUE(compResult.ok()) << compResult.status().ToString();
   auto compressedSize = *compResult;
   // Full-range data: compressed >= raw (FFOR adds overhead at bw=64).
   ASSERT_GE(compressedSize, inputSize);

   std::vector<uint64_t> decoded(data.size());
   auto decResult =
       FForCodec::decompress(compressed.data(), compressedSize, reinterpret_cast<uint8_t*>(decoded.data()), inputSize);
   ASSERT_TRUE(decResult.ok()) << decResult.status().ToString();

   for (size_t i = 0; i < data.size(); ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 // TypeAwareCompressCodec roundtrip tests

 TEST(TypeAwareCompressCodecTest, SupportedTypes) {
   // Supported TAC types.
   ASSERT_TRUE(TypeAwareCompressCodec::support(tac::kUInt64));

   // Not supported.
   ASSERT_FALSE(TypeAwareCompressCodec::support(tac::kUnsupported));
   ASSERT_FALSE(TypeAwareCompressCodec::support(kSomeUnsupportedType));
 }

 TEST(TypeAwareCompressCodecTest, NarrowDataRoundtrip) {
   // Narrow range data: compresses well.
   auto data = genData(1024, 5000, 255);
   int64_t inputSize = data.size() * sizeof(uint64_t);

   auto maxLen = TypeAwareCompressCodec::maxCompressedLen(inputSize, tac::kUInt64);
   std::vector<uint8_t> compressed(maxLen);

   auto compResult = TypeAwareCompressCodec::compress(
       reinterpret_cast<const uint8_t*>(data.data()), inputSize, compressed.data(), maxLen, tac::kUInt64);
   ASSERT_TRUE(compResult.ok()) << compResult.status().ToString();
   auto compressedSize = *compResult;
   ASSERT_GT(compressedSize, 0);
   ASSERT_LT(compressedSize, inputSize);

   std::vector<uint64_t> decoded(data.size());
   auto decResult = TypeAwareCompressCodec::decompress(
       compressed.data(), compressedSize, reinterpret_cast<uint8_t*>(decoded.data()), inputSize);
   ASSERT_TRUE(decResult.ok()) << decResult.status().ToString();

   for (size_t i = 0; i < data.size(); ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 // Full-range random data through TypeAwareCompressCodec.
 // FFOR produces output >= input size. The caller (compressTypeAwareBuffer) would
 // fall back to kUncompressedBuffer, but TypeAwareCompressCodec itself still
 // produces valid (just large) output that roundtrips correctly.
 TEST(TypeAwareCompressCodecTest, FullRangeDataRoundtrip) {
   auto data = genData(256, 0, UINT64_MAX);
   int64_t inputSize = data.size() * sizeof(uint64_t);

   auto maxLen = TypeAwareCompressCodec::maxCompressedLen(inputSize, tac::kUInt64);
   std::vector<uint8_t> compressed(maxLen);

   auto compResult = TypeAwareCompressCodec::compress(
       reinterpret_cast<const uint8_t*>(data.data()), inputSize, compressed.data(), maxLen, tac::kUInt64);
   ASSERT_TRUE(compResult.ok()) << compResult.status().ToString();
   auto compressedSize = *compResult;
   // Compressed size >= input because full-range data can't be compressed.
   ASSERT_GE(compressedSize, inputSize);

   std::vector<uint64_t> decoded(data.size());
   auto decResult = TypeAwareCompressCodec::decompress(
       compressed.data(), compressedSize, reinterpret_cast<uint8_t*>(decoded.data()), inputSize);
   ASSERT_TRUE(decResult.ok()) << decResult.status().ToString();

   for (size_t i = 0; i < data.size(); ++i) {
     ASSERT_EQ(decoded[i], data[i]) << "Mismatch at index " << i;
   }
 }

 TEST(TypeAwareCompressCodecTest, DoubleTypeRoundtrip) {
   // Doubles reinterpreted as uint64 — exercises the codec with DOUBLE type.
   std::vector<double> doubles(512);
   std::mt19937_64 rng(99);
   std::uniform_real_distribution<double> dist(1000.0, 1001.0); // narrow range
   for (auto& d : doubles) {
     d = dist(rng);
   }

   int64_t inputSize = doubles.size() * sizeof(double);
   auto maxLen = TypeAwareCompressCodec::maxCompressedLen(inputSize, tac::kUInt64);
   std::vector<uint8_t> compressed(maxLen);

   auto compResult = TypeAwareCompressCodec::compress(
       reinterpret_cast<const uint8_t*>(doubles.data()), inputSize, compressed.data(), maxLen, tac::kUInt64);
   ASSERT_TRUE(compResult.ok()) << compResult.status().ToString();

   std::vector<double> decoded(doubles.size());
   auto decResult = TypeAwareCompressCodec::decompress(
       compressed.data(), *compResult, reinterpret_cast<uint8_t*>(decoded.data()), inputSize);
   ASSERT_TRUE(decResult.ok()) << decResult.status().ToString();

   for (size_t i = 0; i < doubles.size(); ++i) {
     ASSERT_EQ(*reinterpret_cast<const uint64_t*>(&decoded[i]), *reinterpret_cast<const uint64_t*>(&doubles[i]))
         << "Mismatch at index " << i;
   }
 }

 TEST(TypeAwareCompressCodecTest, EmptyInput) {
   auto result = TypeAwareCompressCodec::compress(nullptr, 0, nullptr, 0, tac::kUInt64);
   ASSERT_TRUE(result.ok());
   ASSERT_EQ(*result, 0);
 }

 TEST(TypeAwareCompressCodecTest, UnsupportedType) {
   uint8_t dummy[8] = {};
   auto result = TypeAwareCompressCodec::compress(dummy, 8, dummy, 100, kSomeUnsupportedType);
   ASSERT_FALSE(result.ok());
 }