blob: 059fbf1696a26a9e10e0c535e984db7d0fc3c4fa [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/common/rowblock.h"
#include <limits>
#include <numeric>
#include <vector>
#include <glog/logging.h>
#include "kudu/gutil/bits.h"
#include "kudu/gutil/cpu.h"
#include "kudu/gutil/port.h"
#include "kudu/util/bitmap.h"
using std::vector;
namespace kudu {
SelectionVector::SelectionVector(size_t row_capacity)
: bytes_capacity_(BitmapSize(row_capacity)),
n_rows_(row_capacity),
n_bytes_(bytes_capacity_),
bitmap_(new uint8_t[n_bytes_]) {
CHECK_GT(n_bytes_, 0);
PadExtraBitsWithZeroes();
}
void SelectionVector::Resize(size_t n_rows) {
if (PREDICT_FALSE(n_rows == n_rows_)) {
return;
}
size_t new_bytes = BitmapSize(n_rows);
CHECK_LE(new_bytes, bytes_capacity_);
n_rows_ = n_rows;
n_bytes_ = new_bytes;
PadExtraBitsWithZeroes();
}
void SelectionVector::ClearToSelectAtMost(size_t max_rows) {
if (max_rows < n_rows_) {
BitmapIterator iter(&bitmap_[0], n_rows_);
bool selected;
size_t run_size;
size_t end_idx = 0;
// Adjust the end index until we have selected 'max_rows' rows.
while ((run_size = iter.Next(&selected)) && max_rows > 0) {
if (selected) {
if (run_size >= max_rows) {
end_idx += max_rows;
break;
}
max_rows -= run_size;
}
end_idx += run_size;
}
// If the limit is reached, zero out the rest of the selection vector.
if (n_rows_ > end_idx) {
BitmapChangeBits(&bitmap_[0], end_idx, n_rows_ - end_idx, false);
}
}
}
template<bool BMI>
static void GetSelectedRowsInternal(const uint8_t* __restrict__ bitmap,
int n_bytes,
uint16_t* __restrict__ dst) {
ForEachSetBit(bitmap, n_bytes * 8,
[&](int bit) {
*dst++ = bit;
});
}
static const bool kHasBmi = base::CPU().has_bmi();
#ifdef __x86_64__
// Explicit instantiation with the BMI instruction set enabled, which
// makes this slightly faster.
template
__attribute__((target("bmi")))
void GetSelectedRowsInternal<true>(const uint8_t* __restrict__ bitmap,
int n_bytes,
uint16_t* __restrict__ dst);
#endif
SelectedRows SelectionVector::GetSelectedRows() const {
CHECK_LE(n_rows_, std::numeric_limits<uint16_t>::max());
int n_selected = CountSelected();
if (n_selected == n_rows_) {
return SelectedRows(this);
}
vector<uint16_t> selected;
if (n_selected > 0) {
selected.resize(n_selected);
if (kHasBmi) {
GetSelectedRowsInternal<true>(&bitmap_[0], n_bytes_, selected.data());
} else {
GetSelectedRowsInternal<false>(&bitmap_[0], n_bytes_, selected.data());
}
}
return SelectedRows(this, std::move(selected));
}
size_t SelectionVector::CountSelected() const {
return Bits::Count(&bitmap_[0], n_bytes_);
}
bool SelectionVector::AnySelected() const {
size_t rem = n_bytes_;
const uint32_t *p32 = reinterpret_cast<const uint32_t *>(
&bitmap_[0]);
while (rem >= 4) {
if (*p32 != 0) {
return true;
}
p32++;
rem -= 4;
}
const uint8_t *p8 = reinterpret_cast<const uint8_t *>(p32);
while (rem > 0) {
if (*p8 != 0) {
return true;
}
p8++;
rem--;
}
return false;
}
bool operator==(const SelectionVector& a, const SelectionVector& b) {
if (a.nrows() != b.nrows()) {
return false;
}
return BitmapEquals(a.bitmap(), b.bitmap(), a.nrows());
}
bool operator!=(const SelectionVector& a, const SelectionVector& b) {
return !(a == b);
}
std::vector<uint16_t> SelectedRows::CreateRowIndexes() {
std::vector<uint16_t> ret(num_selected());
std::iota(ret.begin(), ret.end(), 0);
return ret;
}
//////////////////////////////
// RowBlock
//////////////////////////////
RowBlock::RowBlock(const Schema* schema,
size_t nrows,
RowBlockMemory* memory)
: schema_(schema),
columns_data_(schema->num_columns()),
column_non_null_bitmaps_(schema->num_columns()),
row_capacity_(nrows),
nrows_(nrows),
memory_(memory),
sel_vec_(nrows) {
CHECK_GT(row_capacity_, 0);
size_t bitmap_size = BitmapSize(row_capacity_);
for (size_t i = 0; i < schema->num_columns(); ++i) {
const ColumnSchema& col_schema = schema->column(i);
size_t col_size = row_capacity_ * col_schema.type_info()->size();
columns_data_[i] = new uint8_t[col_size];
if (col_schema.is_nullable()) {
column_non_null_bitmaps_[i] = new uint8_t[bitmap_size];
}
}
}
RowBlock::~RowBlock() {
for (uint8_t* column_data : columns_data_) {
delete[] column_data;
}
for (uint8_t* bitmap_data : column_non_null_bitmaps_) {
delete[] bitmap_data;
}
}
void RowBlock::Resize(size_t n_rows) {
if (PREDICT_FALSE(n_rows == nrows_)) {
return;
}
CHECK_LE(n_rows, row_capacity_);
nrows_ = n_rows;
sel_vec_.Resize(n_rows);
}
} // namespace kudu