blob: a4f269dd4710cd2e4f35e406add4bc5bfa6481c0 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <benchmark/benchmark.h>
#include <random>
#include <vector>
#include "util/frame_of_reference_coding.h"
namespace doris {
// original bit_pack function
template <typename T>
void bit_pack(const T* input, uint8_t in_num, int bit_width, uint8_t* output) {
if (in_num == 0 || bit_width == 0) {
return;
}
T in_mask = 0;
int bit_index = 0;
*output = 0;
for (int i = 0; i < in_num; i++) {
in_mask = ((T)1) << (bit_width - 1);
for (int k = 0; k < bit_width; k++) {
if (bit_index > 7) {
bit_index = 0;
output++;
*output = 0;
}
*output |= (((input[i] & in_mask) >> (bit_width - k - 1)) << (7 - bit_index));
in_mask >>= 1;
bit_index++;
}
}
}
static void BM_BitPack(benchmark::State& state) {
int w = state.range(0);
int n = 255;
std::default_random_engine e;
std::uniform_int_distribution<int64_t> u;
std::vector<__int128_t> test_data(n);
__int128_t in_mask = ((__int128_t(1)) << w) - 1;
for (int i = 0; i < n; i++) {
test_data[i] = u(e) & in_mask;
}
int size = (n * w + 7) / 8;
std::vector<uint8_t> output(size);
for (auto _ : state) {
benchmark::DoNotOptimize(test_data.data());
benchmark::DoNotOptimize(output.data());
bit_pack(test_data.data(), n, w, output.data());
benchmark::ClobberMemory();
}
state.SetBytesProcessed(int64_t(state.iterations()) * size);
}
static void BM_BitPackOptimized(benchmark::State& state) {
int w = state.range(0);
int n = 255;
std::default_random_engine e;
std::uniform_int_distribution<int64_t> u;
std::vector<__int128_t> test_data(n);
__int128_t in_mask = ((__int128_t(1)) << w) - 1;
for (int i = 0; i < n; i++) {
test_data[i] = u(e) & in_mask;
}
int size = (n * w + 7) / 8;
std::vector<uint8_t> output(size);
ForEncoder<__int128_t> forEncoder(nullptr);
for (auto _ : state) {
benchmark::DoNotOptimize(test_data.data());
benchmark::DoNotOptimize(output.data());
forEncoder.bit_pack(test_data.data(), n, w, output.data());
benchmark::ClobberMemory();
}
state.SetBytesProcessed(int64_t(state.iterations()) * size);
}
BENCHMARK(BM_BitPack)->DenseRange(1, 127, 16)->Unit(benchmark::kNanosecond);
BENCHMARK(BM_BitPackOptimized)->DenseRange(1, 127, 16)->Unit(benchmark::kNanosecond);
} // namespace doris