blob: a5c04615893474236abfd3fae4a88e6fe964ac78 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstddef>
#include <cstdint>
#include <ostream>
#include <string>
#include <glog/logging.h>
#include "kudu/common/rowblock_memory.h"
#include "kudu/common/types.h"
#include "kudu/gutil/strings/fastmem.h"
#include "kudu/gutil/strings/stringpiece.h"
#include "kudu/util/bitmap.h"
#include "kudu/util/memory/overwrite.h"
#include "kudu/util/status.h"
namespace kudu {
class Arena;
class ColumnBlockCell;
class SelectionVector;
// A block of data all belonging to a single column.
// This is simply a view into a buffer - it does not have any associated
// storage in and of itself. It does, however, maintain its type
// information, which can be used for extra type safety in debug mode.
class ColumnBlock {
public:
typedef ColumnBlockCell Cell;
ColumnBlock(const TypeInfo* type,
uint8_t* non_null_bitmap,
void* data,
size_t nrows,
RowBlockMemory* memory)
: type_(type),
non_null_bitmap_(non_null_bitmap),
data_(reinterpret_cast<uint8_t*>(data)),
nrows_(nrows),
memory_(memory) {
DCHECK(data_) << "null data";
}
void SetCellIsNull(size_t idx, bool is_null) {
DCHECK(is_nullable());
BitmapChange(non_null_bitmap_, idx, !is_null);
}
void SetCellValue(size_t idx, const void *new_val) {
strings::memcpy_inlined(mutable_cell_ptr(idx), new_val, type_->size());
}
#ifndef NDEBUG
void OverwriteWithPattern(size_t idx, StringPiece pattern) {
char *col_data = reinterpret_cast<char *>(mutable_cell_ptr(idx));
kudu::OverwriteWithPattern(col_data, type_->size(), pattern);
}
#endif
// Return a pointer to the given cell.
const uint8_t *cell_ptr(size_t idx) const {
DCHECK_LT(idx, nrows_);
return data_ + type_->size() * idx;
}
// Returns a pointer to the given cell or NULL.
const uint8_t *nullable_cell_ptr(size_t idx) const {
return is_null(idx) ? NULL : cell_ptr(idx);
}
Cell cell(size_t idx) const;
// Return the bitmap indicating whether each cell is non-NULL.
// A set bit indicates non-NULL.
//
// This returns nullptr for a non-nullable column.
uint8_t *non_null_bitmap() const {
return non_null_bitmap_;
}
bool is_nullable() const {
return non_null_bitmap_ != nullptr;
}
bool is_null(size_t idx) const {
DCHECK(is_nullable());
DCHECK_LT(idx, nrows_);
return !BitmapTest(non_null_bitmap_, idx);
}
size_t stride() const { return type_->size(); }
const uint8_t* data() const { return data_; }
uint8_t* data() { return data_; }
size_t nrows() const { return nrows_; }
RowBlockMemory* memory() { return memory_; }
Arena* arena() { return &memory_->arena; }
const TypeInfo* type_info() const {
return type_;
}
std::string ToString() const {
std::string s;
for (int i = 0; i < nrows(); i++) {
if (i > 0) {
s.append(" ");
}
if (is_nullable() && is_null(i)) {
s.append("NULL");
} else {
type_->AppendDebugStringForValue(cell_ptr(i), &s);
}
}
return s;
}
// Copies a range of cells between two ColumnBlocks.
//
// The extent of the range is designated by 'src_cell_off' and 'num_cells'. It
// is copied to 'dst' at 'dst_cell_off'.
//
// Note: The inclusion of 'sel_vec' in this function is an admission that
// ColumnBlocks are always used via RowBlocks, and a requirement for safe
// handling of types with indirect data (i.e. deselected cells are not
// relocated because doing so would be unsafe).
//
// TODO(adar): for columns with indirect data, existing arena allocations
// belonging to cells in 'dst' that are overwritten will NOT be deallocated.
Status CopyTo(const SelectionVector& sel_vec,
ColumnBlock* dst, size_t src_cell_off,
size_t dst_cell_off, size_t num_cells) const;
private:
friend class ColumnBlockCell;
friend class ColumnDataView;
// Return a pointer to the given cell.
uint8_t *mutable_cell_ptr(size_t idx) {
DCHECK_LT(idx, nrows_);
return data_ + type_->size() * idx;
}
const TypeInfo *type_;
uint8_t *non_null_bitmap_;
uint8_t *data_;
size_t nrows_;
RowBlockMemory* memory_;
};
inline bool operator==(const ColumnBlock& a, const ColumnBlock& b) {
// 1. Same number of rows.
if (a.nrows() != b.nrows()) {
return false;
}
// 2. Same nullability.
if (a.is_nullable() != b.is_nullable()) {
return false;
}
// 3. If nullable, same null bitmap contents.
if (a.is_nullable() &&
!BitmapEquals(a.non_null_bitmap(), b.non_null_bitmap(), a.nrows())) {
return false;
}
// 4. Same data. We can't just compare the raw data because some entries may
// be pointers to the actual data elsewhere.
for (int i = 0; i < a.nrows(); i++) {
if (a.is_nullable() && a.is_null(i)) {
continue;
}
if (a.type_info()->Compare(a.cell_ptr(i), b.cell_ptr(i)) != 0) {
return false;
}
}
return true;
}
inline bool operator!=(const ColumnBlock& a, const ColumnBlock& b) {
return !(a == b);
}
// One of the cells in a ColumnBlock.
class ColumnBlockCell {
public:
ColumnBlockCell(ColumnBlock block, size_t row_idx)
: block_(block), row_idx_(row_idx) {}
const TypeInfo* typeinfo() const { return block_.type_info(); }
size_t size() const { return block_.type_info()->size(); }
const void* ptr() const {
return is_nullable() ? block_.nullable_cell_ptr(row_idx_)
: block_.cell_ptr(row_idx_);
}
void* mutable_ptr() { return block_.mutable_cell_ptr(row_idx_); }
bool is_nullable() const { return block_.is_nullable(); }
bool is_null() const { return block_.is_null(row_idx_); }
void set_null(bool is_null) { block_.SetCellIsNull(row_idx_, is_null); }
protected:
ColumnBlock block_;
size_t row_idx_;
};
inline ColumnBlockCell ColumnBlock::cell(size_t idx) const {
return ColumnBlockCell(*this, idx);
}
// Wrap the ColumnBlock to expose a directly raw block at the specified offset.
// Used by the reader and block encoders to read/write raw data.
class ColumnDataView {
public:
explicit ColumnDataView(ColumnBlock *column_block, size_t first_row_idx = 0)
: column_block_(column_block), row_offset_(0) {
Advance(first_row_idx);
}
void Advance(size_t skip) {
// Check <= here, not <, since you can skip to
// the very end of the data (leaving an empty block)
DCHECK_LE(skip, column_block_->nrows());
row_offset_ += skip;
}
size_t first_row_index() const {
return row_offset_;
}
// Set 'nrows' bits of the the null-bitmap to "value"
// true if not null, false if null.
void SetNullBits(size_t nrows, bool value) {
BitmapChangeBits(column_block_->non_null_bitmap(), row_offset_, nrows, value);
}
uint8_t *data() {
return column_block_->mutable_cell_ptr(row_offset_);
}
const uint8_t *data() const {
return column_block_->cell_ptr(row_offset_);
}
RowBlockMemory* memory() { return column_block_->memory(); }
Arena* arena() { return &memory()->arena; }
size_t nrows() const {
return column_block_->nrows() - row_offset_;
}
const size_t stride() const {
return column_block_->stride();
}
const TypeInfo* type_info() const {
return column_block_->type_info();
}
private:
ColumnBlock *column_block_;
size_t row_offset_;
};
} // namespace kudu