blob: 8265e1d22ae0e78d7343b2fce6a0de4bc669ccc8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cassert>
#include <cstdint>
#include <cstring>
#include "arrow/util/bit_run_reader.h"
namespace arrow {
namespace util {
namespace internal {
/// \brief Compress the buffer to spaced, excluding the null entries.
///
/// \param[in] src the source buffer
/// \param[in] num_values the size of source buffer
/// \param[in] valid_bits bitmap data indicating position of valid slots
/// \param[in] valid_bits_offset offset into valid_bits
/// \param[out] output the output buffer spaced
/// \return The size of spaced buffer.
template <typename T>
inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
int64_t valid_bits_offset, T* output) {
int num_valid_values = 0;
arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
while (true) {
const auto run = reader.NextRun();
if (run.length == 0) {
break;
}
std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
num_valid_values += static_cast<int32_t>(run.length);
}
return num_valid_values;
}
/// \brief Relocate values in buffer into positions of non-null values as indicated by
/// a validity bitmap.
///
/// \param[in, out] buffer the in-place buffer
/// \param[in] num_values total size of buffer including null slots
/// \param[in] null_count number of null slots
/// \param[in] valid_bits bitmap data indicating position of valid slots
/// \param[in] valid_bits_offset offset into valid_bits
/// \return The number of values expanded, including nulls.
template <typename T>
inline int SpacedExpand(T* buffer, int num_values, int null_count,
const uint8_t* valid_bits, int64_t valid_bits_offset) {
// Point to end as we add the spacing from the back.
int idx_decode = num_values - null_count;
// Depending on the number of nulls, some of the value slots in buffer may
// be uninitialized, and this will cause valgrind warnings / potentially UB
std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
if (idx_decode == 0) {
// All nulls, nothing more to do
return num_values;
}
arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
num_values);
while (true) {
const auto run = reader.NextRun();
if (run.length == 0) {
break;
}
idx_decode -= static_cast<int32_t>(run.length);
assert(idx_decode >= 0);
std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
}
// Otherwise caller gave an incorrect null_count
assert(idx_decode == 0);
return num_values;
}
} // namespace internal
} // namespace util
} // namespace arrow