| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "storage/row_cursor.h" |
| |
| #include <glog/logging.h> |
| |
| #include <algorithm> |
| #include <numeric> |
| #include <ostream> |
| |
| #include "common/cast_set.h" |
| #include "common/consts.h" |
| #include "core/data_type/primitive_type.h" |
| #include "core/field.h" |
| #include "storage/key_coder.h" |
| #include "storage/olap_common.h" |
| #include "storage/olap_define.h" |
| #include "storage/tablet/tablet_schema.h" |
| #include "storage/types.h" |
| #include "util/slice.h" |
| |
| namespace doris { |
| using namespace ErrorCode; |
| |
| RowCursor::RowCursor() = default; |
| RowCursor::~RowCursor() = default; |
| RowCursor::RowCursor(RowCursor&&) noexcept = default; |
| RowCursor& RowCursor::operator=(RowCursor&&) noexcept = default; |
| |
| void RowCursor::_init_schema(TabletSchemaSPtr schema, uint32_t column_count) { |
| std::vector<uint32_t> columns(column_count); |
| std::iota(columns.begin(), columns.end(), 0); |
| _schema.reset(new Schema(schema->columns(), columns)); |
| } |
| |
| void RowCursor::_init_schema(const std::shared_ptr<Schema>& shared_schema, uint32_t column_count) { |
| _schema.reset(new Schema(*shared_schema)); |
| } |
| |
| Status RowCursor::init(TabletSchemaSPtr schema, size_t num_columns) { |
| if (num_columns > schema->num_columns()) { |
| return Status::Error<INVALID_ARGUMENT>( |
| "Input param are invalid. Column count is bigger than num_columns of schema. " |
| "column_count={}, schema.num_columns={}", |
| num_columns, schema->num_columns()); |
| } |
| _init_schema(schema, cast_set<uint32_t>(num_columns)); |
| // Initialize all fields as null (TYPE_NULL). |
| _fields.resize(num_columns); |
| return Status::OK(); |
| } |
| |
| Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple) { |
| size_t key_size = tuple.size(); |
| if (key_size > schema->num_columns()) { |
| return Status::Error<INVALID_ARGUMENT>( |
| "Input param are invalid. Column count is bigger than num_columns of schema. " |
| "column_count={}, schema.num_columns={}", |
| key_size, schema->num_columns()); |
| } |
| _init_schema(schema, cast_set<uint32_t>(key_size)); |
| return from_tuple(tuple); |
| } |
| |
| Status RowCursor::init(TabletSchemaSPtr schema, const OlapTuple& tuple, |
| const std::shared_ptr<Schema>& shared_schema) { |
| size_t key_size = tuple.size(); |
| if (key_size > schema->num_columns()) { |
| return Status::Error<INVALID_ARGUMENT>( |
| "Input param are invalid. Column count is bigger than num_columns of schema. " |
| "column_count={}, schema.num_columns={}", |
| key_size, schema->num_columns()); |
| } |
| _init_schema(shared_schema, cast_set<uint32_t>(key_size)); |
| return from_tuple(tuple); |
| } |
| |
| Status RowCursor::init_scan_key(TabletSchemaSPtr schema, std::vector<Field> fields) { |
| size_t key_size = fields.size(); |
| if (key_size > schema->num_columns()) { |
| return Status::Error<INVALID_ARGUMENT>( |
| "Input param are invalid. Column count is bigger than num_columns of schema. " |
| "column_count={}, schema.num_columns={}", |
| key_size, schema->num_columns()); |
| } |
| _init_schema(schema, cast_set<uint32_t>(key_size)); |
| _fields = std::move(fields); |
| return Status::OK(); |
| } |
| |
| Status RowCursor::from_tuple(const OlapTuple& tuple) { |
| if (tuple.size() != _schema->num_column_ids()) { |
| return Status::Error<INVALID_ARGUMENT>( |
| "column count does not match. tuple_size={}, field_count={}", tuple.size(), |
| _schema->num_column_ids()); |
| } |
| _fields.resize(tuple.size()); |
| for (size_t i = 0; i < tuple.size(); ++i) { |
| _fields[i] = tuple.get_field(i); |
| } |
| return Status::OK(); |
| } |
| |
| RowCursor RowCursor::clone() const { |
| RowCursor result; |
| result._schema = std::make_unique<Schema>(*_schema); |
| result._fields = _fields; |
| return result; |
| } |
| |
| void RowCursor::pad_char_fields() { |
| for (size_t i = 0; i < _fields.size(); ++i) { |
| const TabletColumn* col = _schema->column(cast_set<uint32_t>(i)); |
| if (col->type() == FieldType::OLAP_FIELD_TYPE_CHAR && !_fields[i].is_null()) { |
| String padded = _fields[i].get<TYPE_CHAR>(); |
| padded.resize(col->length(), '\0'); |
| _fields[i] = Field::create_field<TYPE_CHAR>(std::move(padded)); |
| } |
| } |
| } |
| |
| std::string RowCursor::to_string() const { |
| std::string result; |
| for (size_t i = 0; i < _fields.size(); ++i) { |
| if (i > 0) { |
| result.append("|"); |
| } |
| if (_fields[i].is_null()) { |
| result.append("1&NULL"); |
| } else { |
| result.append("0&"); |
| result.append( |
| _fields[i].to_debug_string(_schema->column(cast_set<uint32_t>(i))->frac())); |
| } |
| } |
| return result; |
| } |
| |
| void RowCursor::_encode_column_value(const TabletColumn* column, const Field& value, |
| bool full_encode, std::string* buf) const { |
| FieldType ft = column->type(); |
| const KeyCoder* coder = get_key_coder(ft); |
| |
| if (field_is_slice_type(ft)) { |
| // String types: CHAR, VARCHAR, STRING — all stored as String in Field. |
| const String& str = value.get<TYPE_STRING>(); |
| |
| if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) { |
| // CHAR type: must pad with \0 to the declared column length |
| size_t col_len = column->length(); |
| String padded(col_len, '\0'); |
| memcpy(padded.data(), str.data(), std::min(str.size(), col_len)); |
| |
| Slice slice(padded.data(), col_len); |
| if (full_encode) { |
| coder->full_encode_ascending(&slice, buf); |
| } else { |
| coder->encode_ascending(&slice, column->index_length(), buf); |
| } |
| } else { |
| // VARCHAR / STRING: use actual length |
| Slice slice(str.data(), str.size()); |
| if (full_encode) { |
| coder->full_encode_ascending(&slice, buf); |
| } else { |
| coder->encode_ascending(&slice, column->index_length(), buf); |
| } |
| } |
| return; |
| } |
| |
| // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending |
| // ignores `index_size` and delegates to full_encode_ascending, so the |
| // `full_encode` flag here is a no-op and we always call the full helper. |
| switch (ft) { |
| #define CASE(FT, PT) \ |
| case FieldType::FT: \ |
| full_encode_field_as_key<PrimitiveType::PT>(value, coder, buf); \ |
| break; |
| DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) |
| #undef CASE |
| default: |
| LOG(FATAL) << "unsupported field type for encoding: " << int(ft); |
| break; |
| } |
| } |
| |
| template <bool is_mow> |
| void RowCursor::encode_key_with_padding(std::string* buf, size_t num_keys, |
| bool padding_minimal) const { |
| for (uint32_t cid = 0; cid < num_keys; cid++) { |
| auto* column = _schema->column(cid); |
| if (column == nullptr) { |
| if (padding_minimal) { |
| buf->push_back(KeyConsts::KEY_MINIMAL_MARKER); |
| } else { |
| if (is_mow) { |
| buf->push_back(KeyConsts::KEY_NORMAL_NEXT_MARKER); |
| } else { |
| buf->push_back(KeyConsts::KEY_MAXIMAL_MARKER); |
| } |
| } |
| break; |
| } |
| |
| if (cid >= _fields.size() || _fields[cid].is_null()) { |
| buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
| continue; |
| } |
| |
| buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
| _encode_column_value(column, _fields[cid], is_mow, buf); |
| } |
| } |
| |
| // Explicit template instantiations |
| template void RowCursor::encode_key_with_padding<false>(std::string*, size_t, bool) const; |
| template void RowCursor::encode_key_with_padding<true>(std::string*, size_t, bool) const; |
| |
| template <bool full_encode> |
| void RowCursor::encode_key(std::string* buf, size_t num_keys) const { |
| for (uint32_t cid = 0; cid < num_keys; cid++) { |
| if (cid >= _fields.size() || _fields[cid].is_null()) { |
| buf->push_back(KeyConsts::KEY_NULL_FIRST_MARKER); |
| continue; |
| } |
| buf->push_back(KeyConsts::KEY_NORMAL_MARKER); |
| _encode_column_value(_schema->column(cid), _fields[cid], full_encode, buf); |
| } |
| } |
| |
| template void RowCursor::encode_key<false>(std::string*, size_t) const; |
| template void RowCursor::encode_key<true>(std::string*, size_t) const; |
| |
| } // namespace doris |