be/src/olap/selection_vector.h - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #pragma once

 #include <memory>

 #include "util/bitmap.h"

 namespace doris {

 // Bit-vector representing the selection status of each row in a row block.
 //
 // It is used to scan data with column predicate.
 // the bit in the selection vector is used to indicate whether the given row is live.
 class SelectionVector {
 public:
     // Construct a new vector. The bits are initially in an indeterminate state.
     // Call set_all_true() if you require all rows to be initially selected.
     explicit SelectionVector(size_t row_capacity)
             : _n_rows(row_capacity),
               _n_bytes(BitmapSize(row_capacity)),
               _bitmap(new uint8_t[_n_bytes]) {
         CHECK_GT(_n_bytes, 0);
         set_all_false();
     }

     // return the number of selected rows.
     size_t count_selected() const { return Bits::Count(_bitmap.get(), _n_bytes); }

     // Return true if any rows are selected, or false
     // This is equivalent to (count_selected() > 0), but faster.
     bool any_selected() const;

     bool is_row_selected(size_t row) const {
         DCHECK_LT(row, _n_rows);
         return BitmapTest(_bitmap.get(), row);
     }

     void set_row_selected(size_t row) {
         DCHECK_LT(row, _n_rows);
         BitmapSet(_bitmap.get(), row);
     }

     void set_all_true() {
         memset(_bitmap.get(), 0xff, _n_bytes);
         pad_extra_bits_wit_zeroes();
     }

     void set_all_false() { memset(_bitmap.get(), 0, _n_bytes); }

     void clear_bit(size_t row) {
         DCHECK_LT(row, _n_rows);
         return BitmapClear(_bitmap.get(), row);
     }

     uint8_t* mutable_bitmap() { return _bitmap.get(); }

     const uint8_t* bitmap() const { return _bitmap.get(); }

     size_t nrows() const { return _n_rows; }

     std::string to_string() const { return BitmapToString(_bitmap.get(), _n_rows); }

 private:
     // Pads any non-byte-aligned bits at the end of the SelectionVector with
     // zeroes.
     //
     // To improve performance, CountSelected() and AnySelected() evaluate the
     // SelectionVector's bitmap in terms of bytes. As such, they consider all of
     // the trailing bits, even if the bitmap's bit length is not byte-aligned and
     // some trailing bits aren't part of the bitmap.
     //
     // To address this without sacrificing performance, we need to zero out all
     // trailing bits at construction time, or after any operation that sets all
     // bytes in bulk.

     void pad_extra_bits_wit_zeroes() {
         size_t bits_in_last_byte = _n_rows & 7;
         if (bits_in_last_byte > 0) {
             BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false);
         }
     }

 private:
     // row capacity
     size_t _n_rows;
     size_t _n_bytes;
     std::unique_ptr<uint8_t[]> _bitmap;
     DISALLOW_COPY_AND_ASSIGN(SelectionVector);
 };

 inline bool SelectionVector::any_selected() const {
     size_t rem = _n_bytes;
     const uint32_t* p32 = reinterpret_cast<const uint32_t*>(_bitmap.get());
     while (rem >= 4) {
         if (*p32 != 0) {
             return true;
         }
         ++p32;
         rem -= 4;
     }

     const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
     while (rem > 0) {
         if (*p8 != 0) {
             return true;
         }
         ++p8;
         --rem;
     }
     return false;
 }

 // A SelectionVectorView keeps track of where in the selection vector a given
 // batch will start from. After processing a batch, Advance() should be called
 // and the view will move forward by the appropriate amount. In this way, the
 // underlying selection vector can easily be updated batch-by-batch.
 class SelectionVectorView {
 public:
     // Constructs a new SelectionVectorView.
     //
     // The 'sel_vec' object must outlive this SelectionVectorView.
     explicit SelectionVectorView(SelectionVector* sel_vec) : _sel_vec(sel_vec), _row_offset(0) {}
     void advance(size_t skip) {
         DCHECK_LE(skip, _sel_vec->nrows() - _row_offset);
         _row_offset += skip;
     }
     void set_bit(size_t row_idx) {
         DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
         BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
     }
     void clear_bit(size_t row_idx) {
         DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
         BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
     }
     bool test_bit(size_t row_idx) {
         DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
         return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx);
     }
     void clear_bits(size_t nrows) {
         DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset);
         BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false);
     }

 private:
     SelectionVector* _sel_vec = nullptr;
     size_t _row_offset;
 };

 } // namespace doris
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#pragma once

	#include <memory>

	#include "util/bitmap.h"

	namespace doris {

	// Bit-vector representing the selection status of each row in a row block.
	//
	// It is used to scan data with column predicate.
	// the bit in the selection vector is used to indicate whether the given row is live.
	class SelectionVector {
	public:
	// Construct a new vector. The bits are initially in an indeterminate state.
	// Call set_all_true() if you require all rows to be initially selected.
	explicit SelectionVector(size_t row_capacity)
	: _n_rows(row_capacity),
	_n_bytes(BitmapSize(row_capacity)),
	_bitmap(new uint8_t[_n_bytes]) {
	CHECK_GT(_n_bytes, 0);
	set_all_false();
	}

	// return the number of selected rows.
	size_t count_selected() const { return Bits::Count(_bitmap.get(), _n_bytes); }

	// Return true if any rows are selected, or false
	// This is equivalent to (count_selected() > 0), but faster.
	bool any_selected() const;

	bool is_row_selected(size_t row) const {
	DCHECK_LT(row, _n_rows);
	return BitmapTest(_bitmap.get(), row);
	}

	void set_row_selected(size_t row) {
	DCHECK_LT(row, _n_rows);
	BitmapSet(_bitmap.get(), row);
	}

	void set_all_true() {
	memset(_bitmap.get(), 0xff, _n_bytes);
	pad_extra_bits_wit_zeroes();
	}

	void set_all_false() { memset(_bitmap.get(), 0, _n_bytes); }

	void clear_bit(size_t row) {
	DCHECK_LT(row, _n_rows);
	return BitmapClear(_bitmap.get(), row);
	}

	uint8_t* mutable_bitmap() { return _bitmap.get(); }

	const uint8_t* bitmap() const { return _bitmap.get(); }

	size_t nrows() const { return _n_rows; }

	std::string to_string() const { return BitmapToString(_bitmap.get(), _n_rows); }

	private:
	// Pads any non-byte-aligned bits at the end of the SelectionVector with
	// zeroes.
	//
	// To improve performance, CountSelected() and AnySelected() evaluate the
	// SelectionVector's bitmap in terms of bytes. As such, they consider all of
	// the trailing bits, even if the bitmap's bit length is not byte-aligned and
	// some trailing bits aren't part of the bitmap.
	//
	// To address this without sacrificing performance, we need to zero out all
	// trailing bits at construction time, or after any operation that sets all
	// bytes in bulk.

	void pad_extra_bits_wit_zeroes() {
	size_t bits_in_last_byte = _n_rows & 7;
	if (bits_in_last_byte > 0) {
	BitmapChangeBits(_bitmap.get(), _n_rows, 8 - bits_in_last_byte, false);
	}
	}

	private:
	// row capacity
	size_t _n_rows;
	size_t _n_bytes;
	std::unique_ptr<uint8_t[]> _bitmap;
	DISALLOW_COPY_AND_ASSIGN(SelectionVector);
	};

	inline bool SelectionVector::any_selected() const {
	size_t rem = _n_bytes;
	const uint32_t* p32 = reinterpret_cast<const uint32_t*>(_bitmap.get());
	while (rem >= 4) {
	if (*p32 != 0) {
	return true;
	}
	++p32;
	rem -= 4;
	}

	const uint8_t* p8 = reinterpret_cast<const uint8_t*>(p32);
	while (rem > 0) {
	if (*p8 != 0) {
	return true;
	}
	++p8;
	--rem;
	}
	return false;
	}

	// A SelectionVectorView keeps track of where in the selection vector a given
	// batch will start from. After processing a batch, Advance() should be called
	// and the view will move forward by the appropriate amount. In this way, the
	// underlying selection vector can easily be updated batch-by-batch.
	class SelectionVectorView {
	public:
	// Constructs a new SelectionVectorView.
	//
	// The 'sel_vec' object must outlive this SelectionVectorView.
	explicit SelectionVectorView(SelectionVector* sel_vec) : _sel_vec(sel_vec), _row_offset(0) {}
	void advance(size_t skip) {
	DCHECK_LE(skip, _sel_vec->nrows() - _row_offset);
	_row_offset += skip;
	}
	void set_bit(size_t row_idx) {
	DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
	BitmapSet(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
	}
	void clear_bit(size_t row_idx) {
	DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
	BitmapClear(_sel_vec->mutable_bitmap(), _row_offset + row_idx);
	}
	bool test_bit(size_t row_idx) {
	DCHECK_LE(row_idx, _sel_vec->nrows() - _row_offset);
	return BitmapTest(_sel_vec->bitmap(), _row_offset + row_idx);
	}
	void clear_bits(size_t nrows) {
	DCHECK_LE(nrows, _sel_vec->nrows() - _row_offset);
	BitmapChangeBits(_sel_vec->mutable_bitmap(), _row_offset, nrows, false);
	}

	private:
	SelectionVector* _sel_vec = nullptr;
	size_t _row_offset;
	};

	} // namespace doris