blob: 82c21e765cca7deb6752f4e74ba40b5594a42a4d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <benchmark/benchmark.h>
#include <random>
#include <vector>
#include "util/frame_of_reference_coding.h"
namespace doris {
// original bit_unpack function
template <typename T>
void bit_unpack(const uint8_t* input, uint8_t in_num, int bit_width, T* output) {
unsigned char in_mask = 0x80;
int bit_index = 0;
while (in_num > 0) {
*output = 0;
for (int i = 0; i < bit_width; i++) {
if (bit_index > 7) {
input++;
bit_index = 0;
}
*output |= ((T)((*input & (in_mask >> bit_index)) >> (7 - bit_index)))
<< (bit_width - i - 1);
bit_index++;
}
output++;
in_num--;
}
}
static void BM_BitUnpack(benchmark::State& state) {
int w = state.range(0);
int n = 255;
std::default_random_engine e;
std::uniform_int_distribution<__int128_t> u;
ForEncoder<__int128_t> encoder(nullptr);
ForDecoder<__int128_t> decoder(nullptr, 0);
std::vector<__int128_t> test_data(n);
__int128_t in_mask = (((__int128_t)1) << w) - 1;
for (int i = 0; i < n; i++) {
test_data[i] = u(e) & in_mask;
}
std::vector<uint8_t> o((n * w + 7) / 8);
encoder.bit_pack(test_data.data(), n, w, o.data());
std::vector<__int128_t> output(n);
for (auto _ : state) {
benchmark::DoNotOptimize(o.data());
benchmark::DoNotOptimize(output.data());
bit_unpack(o.data(), n, w, output.data());
benchmark::ClobberMemory();
}
int64_t size = o.size();
state.SetBytesProcessed(int64_t(state.iterations()) * size);
}
static void BM_BitUnpackOptimized(benchmark::State& state) {
int w = state.range(0);
int n = 255;
std::default_random_engine e;
std::uniform_int_distribution<__int128_t> u;
ForEncoder<__int128_t> encoder(nullptr);
ForDecoder<__int128_t> decoder(nullptr, 0);
std::vector<__int128_t> test_data(n);
__int128_t in_mask = (((__int128_t)1) << w) - 1;
for (int i = 0; i < n; i++) {
test_data[i] = u(e) & in_mask;
}
std::vector<uint8_t> o((n * w + 7) / 8);
encoder.bit_pack(test_data.data(), n, w, o.data());
std::vector<__int128_t> output(n);
for (auto _ : state) {
benchmark::DoNotOptimize(o.data());
benchmark::DoNotOptimize(output.data());
decoder.bit_unpack(o.data(), n, w, output.data());
benchmark::ClobberMemory();
}
int64_t size = o.size();
state.SetBytesProcessed(int64_t(state.iterations()) * size);
}
BENCHMARK(BM_BitUnpack)->DenseRange(1, 127)->Unit(benchmark::kNanosecond);
BENCHMARK(BM_BitUnpackOptimized)->DenseRange(1, 127)->Unit(benchmark::kNanosecond);
} // namespace doris