| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <cstddef> |
| #include <sstream> |
| #include <string> |
| |
| #include "core/arena.h" |
| #include "core/value/map_value.h" |
| #include "runtime/collection_value.h" |
| #include "storage/key_coder.h" |
| #include "storage/olap_common.h" |
| #include "storage/olap_define.h" |
| #include "storage/tablet/tablet_schema.h" |
| #include "storage/types.h" |
| #include "storage/utils.h" |
| #include "util/hash_util.hpp" |
| #include "util/json/path_in_data.h" |
| #include "util/slice.h" |
| |
| namespace doris { |
| // A Field is used to represent a column in memory format. |
| // User can use this class to access or deal with column data in memory. |
| class StorageField { |
| public: |
| StorageField(const TabletColumn& column) |
| : _type(column.type()), |
| _desc(column), |
| _length(column.length()), |
| _key_coder(get_key_coder(column.type())), |
| _name(column.name()), |
| _index_size(column.index_length()), |
| _is_nullable(column.is_nullable()), |
| _unique_id(column.unique_id()), |
| _parent_unique_id(column.parent_unique_id()), |
| _is_extracted_column(column.is_extracted_column()), |
| _path(column.path_info_ptr()) {} |
| |
| virtual ~StorageField() = default; |
| |
| size_t size() const { return field_type_size(_type); } |
| size_t length() const { return _length; } |
| size_t field_size() const { return size() + 1; } |
| size_t index_size() const { return _index_size; } |
| int32_t unique_id() const { return _unique_id; } |
| int32_t parent_unique_id() const { return _parent_unique_id; } |
| bool is_extracted_column() const { return _is_extracted_column; } |
| const std::string& name() const { return _name; } |
| const PathInDataPtr& path() const { return _path; } |
| |
| virtual StorageField* clone() const { |
| auto* local = new StorageField(_desc); |
| this->clone(local); |
| return local; |
| } |
| |
| FieldType type() const { return _type; } |
| bool is_nullable() const { return _is_nullable; } |
| |
| // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. |
| // only applicable to string type |
| void encode_ascending(const void* value, std::string* buf) const { |
| _key_coder->encode_ascending(value, _index_size, buf); |
| } |
| |
| // encode the provided `value` into `buf`. |
| void full_encode_ascending(const void* value, std::string* buf) const { |
| _key_coder->full_encode_ascending(value, buf); |
| } |
| |
| const KeyCoder* key_coder() const { return _key_coder; } |
| void add_sub_field(std::unique_ptr<StorageField> sub_field) { |
| _sub_fields.emplace_back(std::move(sub_field)); |
| } |
| StorageField* get_sub_field(size_t i) const { return _sub_fields[i].get(); } |
| size_t get_sub_field_count() const { return _sub_fields.size(); } |
| |
| void set_precision(int32_t precision) { _precision = precision; } |
| void set_scale(int32_t scale) { _scale = scale; } |
| int32_t get_precision() const { return _precision; } |
| int32_t get_scale() const { return _scale; } |
| const TabletColumn& get_desc() const { return _desc; } |
| |
| int32_t get_unique_id() const { |
| return is_extracted_column() ? parent_unique_id() : unique_id(); |
| } |
| |
| protected: |
| FieldType _type; |
| TabletColumn _desc; |
| // unit : byte |
| // except for strings, other types have fixed lengths |
| // Note that, the struct type itself has fixed length, but due to |
| // its number of subfields is a variable, so the actual length of |
| // a struct field is not fixed. |
| size_t _length; |
| |
| void clone(StorageField* other) const { |
| other->_type = this->_type; |
| other->_key_coder = this->_key_coder; |
| other->_name = this->_name; |
| other->_index_size = this->_index_size; |
| other->_is_nullable = this->_is_nullable; |
| other->_sub_fields.clear(); |
| other->_precision = this->_precision; |
| other->_scale = this->_scale; |
| other->_unique_id = this->_unique_id; |
| other->_parent_unique_id = this->_parent_unique_id; |
| other->_is_extracted_column = this->_is_extracted_column; |
| for (const auto& f : _sub_fields) { |
| StorageField* item = f->clone(); |
| other->add_sub_field(std::unique_ptr<StorageField>(item)); |
| } |
| } |
| |
| private: |
| // maximum length of Field, unit : bytes |
| // usually equal to length, except for variable-length strings |
| const KeyCoder* _key_coder; |
| std::string _name; |
| size_t _index_size; |
| bool _is_nullable; |
| std::vector<std::unique_ptr<StorageField>> _sub_fields; |
| int32_t _precision; |
| int32_t _scale; |
| int32_t _unique_id; |
| int32_t _parent_unique_id; |
| bool _is_extracted_column = false; |
| PathInDataPtr _path; |
| }; |
| |
| class MapField : public StorageField { |
| public: |
| MapField(const TabletColumn& column) : StorageField(column) {} |
| }; |
| |
| class StructField : public StorageField { |
| public: |
| StructField(const TabletColumn& column) : StorageField(column) {} |
| }; |
| |
| class ArrayField : public StorageField { |
| public: |
| ArrayField(const TabletColumn& column) : StorageField(column) {} |
| }; |
| |
| class CharField : public StorageField { |
| public: |
| CharField(const TabletColumn& column) : StorageField(column) {} |
| |
| CharField* clone() const override { |
| auto* local = new CharField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class VarcharField : public StorageField { |
| public: |
| VarcharField(const TabletColumn& column) : StorageField(column) {} |
| |
| VarcharField* clone() const override { |
| auto* local = new VarcharField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| class StringField : public StorageField { |
| public: |
| StringField(const TabletColumn& column) : StorageField(column) {} |
| |
| StringField* clone() const override { |
| auto* local = new StringField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class BitmapAggField : public StorageField { |
| public: |
| BitmapAggField(const TabletColumn& column) : StorageField(column) {} |
| |
| BitmapAggField* clone() const override { |
| auto* local = new BitmapAggField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class QuantileStateAggField : public StorageField { |
| public: |
| QuantileStateAggField(const TabletColumn& column) : StorageField(column) {} |
| |
| QuantileStateAggField* clone() const override { |
| auto* local = new QuantileStateAggField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class AggStateField : public StorageField { |
| public: |
| AggStateField(const TabletColumn& column) : StorageField(column) {} |
| |
| AggStateField* clone() const override { |
| auto* local = new AggStateField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class HllAggField : public StorageField { |
| public: |
| HllAggField(const TabletColumn& column) : StorageField(column) {} |
| |
| HllAggField* clone() const override { |
| auto* local = new HllAggField(_desc); |
| StorageField::clone(local); |
| return local; |
| } |
| }; |
| |
| class StorageFieldFactory { |
| public: |
| static StorageField* create(const TabletColumn& column) { |
| // for key column |
| if (column.is_key()) { |
| switch (column.type()) { |
| case FieldType::OLAP_FIELD_TYPE_CHAR: |
| return new CharField(column); |
| case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
| case FieldType::OLAP_FIELD_TYPE_STRING: |
| return new StringField(column); |
| case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
| auto* local = new StructField(column); |
| for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
| std::unique_ptr<StorageField> sub_field( |
| StorageFieldFactory::create(column.get_sub_column(i))); |
| local->add_sub_field(std::move(sub_field)); |
| } |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
| std::unique_ptr<StorageField> item_field( |
| StorageFieldFactory::create(column.get_sub_column(0))); |
| auto* local = new ArrayField(column); |
| local->add_sub_field(std::move(item_field)); |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_MAP: { |
| std::unique_ptr<StorageField> key_field( |
| StorageFieldFactory::create(column.get_sub_column(0))); |
| std::unique_ptr<StorageField> val_field( |
| StorageFieldFactory::create(column.get_sub_column(1))); |
| auto* local = new MapField(column); |
| local->add_sub_field(std::move(key_field)); |
| local->add_sub_field(std::move(val_field)); |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
| StorageField* field = new StorageField(column); |
| field->set_precision(column.precision()); |
| field->set_scale(column.frac()); |
| return field; |
| } |
| default: |
| return new StorageField(column); |
| } |
| } |
| |
| // for value column |
| switch (column.aggregation()) { |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE: |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM: |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MIN: |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_MAX: |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE: |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL: |
| switch (column.type()) { |
| case FieldType::OLAP_FIELD_TYPE_CHAR: |
| return new CharField(column); |
| case FieldType::OLAP_FIELD_TYPE_VARCHAR: |
| return new VarcharField(column); |
| case FieldType::OLAP_FIELD_TYPE_STRING: |
| return new StringField(column); |
| case FieldType::OLAP_FIELD_TYPE_STRUCT: { |
| auto* local = new StructField(column); |
| for (uint32_t i = 0; i < column.get_subtype_count(); i++) { |
| std::unique_ptr<StorageField> sub_field( |
| StorageFieldFactory::create(column.get_sub_column(i))); |
| local->add_sub_field(std::move(sub_field)); |
| } |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_ARRAY: { |
| std::unique_ptr<StorageField> item_field( |
| StorageFieldFactory::create(column.get_sub_column(0))); |
| auto* local = new ArrayField(column); |
| local->add_sub_field(std::move(item_field)); |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_MAP: { |
| DCHECK(column.get_subtype_count() == 2); |
| auto* local = new MapField(column); |
| std::unique_ptr<StorageField> key_field( |
| StorageFieldFactory::create(column.get_sub_column(0))); |
| std::unique_ptr<StorageField> value_field( |
| StorageFieldFactory::create(column.get_sub_column(1))); |
| local->add_sub_field(std::move(key_field)); |
| local->add_sub_field(std::move(value_field)); |
| return local; |
| } |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL32: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL64: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DECIMAL256: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: |
| [[fallthrough]]; |
| case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: { |
| StorageField* field = new StorageField(column); |
| field->set_precision(column.precision()); |
| field->set_scale(column.frac()); |
| return field; |
| } |
| default: |
| return new StorageField(column); |
| } |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_HLL_UNION: |
| return new HllAggField(column); |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_BITMAP_UNION: |
| return new BitmapAggField(column); |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_QUANTILE_UNION: |
| return new QuantileStateAggField(column); |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_GENERIC: |
| return new AggStateField(column); |
| case FieldAggregationMethod::OLAP_FIELD_AGGREGATION_UNKNOWN: |
| CHECK(false) << ", value column no agg type"; |
| return nullptr; |
| } |
| return nullptr; |
| } |
| |
| static StorageField* create_by_type(const FieldType& type) { |
| TabletColumn column(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE, type); |
| return create(column); |
| } |
| }; |
| } // namespace doris |