| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "runtime/descriptors.h" |
| |
| #include <boost/algorithm/string/join.hpp> |
| #include <gutil/strings/substitute.h> |
| #include <ios> |
| #include <sstream> |
| |
| #include <llvm/ExecutionEngine/ExecutionEngine.h> |
| #include <llvm/IR/DataLayout.h> |
| |
| #include "codegen/codegen-anyval.h" |
| #include "codegen/llvm-codegen.h" |
| #include "common/object-pool.h" |
| #include "common/status.h" |
| #include "exprs/scalar-expr.h" |
| #include "exprs/scalar-expr-evaluator.h" |
| #include "gen-cpp/Descriptors_types.h" |
| #include "gen-cpp/PlanNodes_types.h" |
| #include "rpc/thrift-util.h" |
| #include "runtime/runtime-state.h" |
| |
| #include "common/names.h" |
| |
| using boost::algorithm::join; |
| using namespace strings; |
| |
| // In 'thrift_partition', the location is stored in a compressed format that references |
| // the 'partition_prefixes' of 'thrift_table'. This function decompresses that format into |
| // a string and stores it in 'result'. If 'location' is not set in the THdfsPartition, |
| // 'result' is set to the empty string. |
| static void DecompressLocation(const impala::THdfsTable& thrift_table, |
| const impala::THdfsPartition& thrift_partition, string* result) { |
| if (!thrift_partition.__isset.location) { |
| result->clear(); |
| return; |
| } |
| *result = thrift_partition.location.suffix; |
| if (thrift_partition.location.prefix_index != -1) { |
| // -1 means an uncompressed location |
| DCHECK_GE(thrift_partition.location.prefix_index, 0); |
| DCHECK_LT( |
| thrift_partition.location.prefix_index, thrift_table.partition_prefixes.size()); |
| *result = |
| thrift_table.partition_prefixes[thrift_partition.location.prefix_index] + *result; |
| } |
| } |
| |
| namespace impala { |
| |
| const int SchemaPathConstants::ARRAY_ITEM; |
| const int SchemaPathConstants::ARRAY_POS; |
| const int SchemaPathConstants::MAP_KEY; |
| const int SchemaPathConstants::MAP_VALUE; |
| |
| const int RowDescriptor::INVALID_IDX; |
| |
| const char* TupleDescriptor::LLVM_CLASS_NAME = "class.impala::TupleDescriptor"; |
| const char* NullIndicatorOffset::LLVM_CLASS_NAME = "struct.impala::NullIndicatorOffset"; |
| |
| string NullIndicatorOffset::DebugString() const { |
| stringstream out; |
| out << "(offset=" << byte_offset |
| << " mask=" << hex << static_cast<int>(bit_mask) << dec << ")"; |
| return out.str(); |
| } |
| |
| llvm::Constant* NullIndicatorOffset::ToIR(LlvmCodeGen* codegen) const { |
| llvm::StructType* null_indicator_offset_type = |
| codegen->GetStructType<NullIndicatorOffset>(); |
| // Populate padding at end of struct with zeroes. |
| llvm::ConstantAggregateZero* zeroes = |
| llvm::ConstantAggregateZero::get(null_indicator_offset_type); |
| return llvm::ConstantStruct::get(null_indicator_offset_type, |
| {codegen->GetI32Constant(byte_offset), |
| codegen->GetI8Constant(bit_mask), |
| zeroes->getStructElement(2)}); |
| } |
| |
| ostream& operator<<(ostream& os, const NullIndicatorOffset& null_indicator) { |
| os << null_indicator.DebugString(); |
| return os; |
| } |
| |
| SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc, |
| const TupleDescriptor* parent, const TupleDescriptor* children_tuple_descriptor) |
| : id_(tdesc.id), |
| type_(ColumnType::FromThrift(tdesc.slotType)), |
| parent_(parent), |
| children_tuple_descriptor_(children_tuple_descriptor), |
| col_path_(tdesc.materializedPath), |
| tuple_offset_(tdesc.byteOffset), |
| null_indicator_offset_(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit), |
| slot_idx_(tdesc.slotIdx), |
| slot_size_(type_.GetSlotSize()), |
| virtual_column_type_(tdesc.virtual_col_type) { |
| DCHECK(parent_ != nullptr) << tdesc.parent; |
| if (type_.IsComplexType()) { |
| DCHECK(tdesc.__isset.itemTupleId); |
| DCHECK(children_tuple_descriptor_ != nullptr) << tdesc.itemTupleId; |
| } else { |
| DCHECK(!tdesc.__isset.itemTupleId); |
| DCHECK(children_tuple_descriptor == nullptr); |
| } |
| } |
| |
| bool SlotDescriptor::ColPathLessThan(const SlotDescriptor* a, const SlotDescriptor* b) { |
| int common_levels = min(a->col_path().size(), b->col_path().size()); |
| for (int i = 0; i < common_levels; ++i) { |
| if (a->col_path()[i] == b->col_path()[i]) continue; |
| return a->col_path()[i] < b->col_path()[i]; |
| } |
| return a->col_path().size() < b->col_path().size(); |
| } |
| |
| string SlotDescriptor::DebugString() const { |
| stringstream out; |
| out << "Slot(id=" << id_ << " type=" << type_.DebugString() |
| << " col_path=["; |
| if (col_path_.size() > 0) out << col_path_[0]; |
| for (int i = 1; i < col_path_.size(); ++i) { |
| out << ","; |
| out << col_path_[i]; |
| } |
| out << "]"; |
| if (children_tuple_descriptor_ != nullptr) { |
| out << " children_tuple_id=" << children_tuple_descriptor_->id(); |
| } |
| out << " offset=" << tuple_offset_ << " null=" << null_indicator_offset_.DebugString() |
| << " slot_idx=" << slot_idx_ << " field_idx=" << slot_idx_; |
| if (IsVirtual()) { |
| out << " virtual_column_type=" << virtual_column_type_; |
| } |
| out << ")"; |
| return out.str(); |
| } |
| |
| bool SlotDescriptor::LayoutEquals(const SlotDescriptor& other_desc) const { |
| if (type() != other_desc.type()) return false; |
| if (is_nullable() != other_desc.is_nullable()) return false; |
| if (slot_size() != other_desc.slot_size()) return false; |
| if (tuple_offset() != other_desc.tuple_offset()) return false; |
| if (!null_indicator_offset().Equals(other_desc.null_indicator_offset())) return false; |
| return true; |
| } |
| |
| inline bool SlotDescriptor::IsChildOfStruct() const { |
| return parent_->isTupleOfStructSlot(); |
| } |
| |
| ColumnDescriptor::ColumnDescriptor(const TColumnDescriptor& tdesc) |
| : name_(tdesc.name), |
| type_(ColumnType::FromThrift(tdesc.type)), |
| aux_type_(tdesc.type) { |
| if (tdesc.__isset.icebergFieldId) { |
| field_id_ = tdesc.icebergFieldId; |
| // Get key and value field_id for Iceberg table column with Map type. |
| field_map_key_id_ = tdesc.icebergFieldMapKeyId; |
| field_map_value_id_ = tdesc.icebergFieldMapValueId; |
| } |
| } |
| |
| string ColumnDescriptor::DebugString() const { |
| return Substitute("$0: $1$2", name_, type_.DebugString(), |
| field_id_ != -1 ? " field_id: " + std::to_string(field_id_): ""); |
| } |
| |
| TableDescriptor::TableDescriptor(const TTableDescriptor& tdesc) |
| : name_(tdesc.tableName), |
| database_(tdesc.dbName), |
| id_(tdesc.id), |
| type_(tdesc.tableType), |
| num_clustering_cols_(tdesc.numClusteringCols) { |
| for (int i = 0; i < tdesc.columnDescriptors.size(); ++i) { |
| col_descs_.push_back(ColumnDescriptor(tdesc.columnDescriptors[i])); |
| } |
| } |
| |
| string TableDescriptor::fully_qualified_name() const { |
| return Substitute("$0.$1", database_, name_); |
| } |
| |
| string TableDescriptor::DebugString() const { |
| vector<string> cols; |
| for (const ColumnDescriptor& col_desc: col_descs_) { |
| cols.push_back(col_desc.DebugString()); |
| } |
| stringstream out; |
| out << "#cols=" << num_cols() << " #clustering_cols=" << num_clustering_cols_; |
| out << " cols=["; |
| out << join(cols, ", "); |
| out << "]"; |
| return out.str(); |
| } |
| |
| HdfsPartitionDescriptor::HdfsPartitionDescriptor( |
| const THdfsTable& thrift_table, const THdfsPartition& thrift_partition) |
| : id_(thrift_partition.id), |
| thrift_partition_key_exprs_(thrift_partition.partitionKeyExprs) { |
| THdfsStorageDescriptor sd = thrift_partition.hdfs_storage_descriptor; |
| line_delim_ = sd.lineDelim; |
| field_delim_ = sd.fieldDelim; |
| collection_delim_ = sd.collectionDelim; |
| escape_char_ = sd.escapeChar; |
| block_size_ = sd.blockSize; |
| file_format_ = sd.fileFormat; |
| DecompressLocation(thrift_table, thrift_partition, &location_); |
| } |
| |
| string HdfsPartitionDescriptor::DebugString() const { |
| stringstream out; |
| out << " file_format=" << file_format_ << "'" |
| << " line_delim='" << line_delim_ << "'" |
| << " field_delim='" << field_delim_ << "'" |
| << " coll_delim='" << collection_delim_ << "'" |
| << " escape_char='" << escape_char_ << "')"; |
| return out.str(); |
| } |
| |
| string DataSourceTableDescriptor::DebugString() const { |
| stringstream out; |
| out << "DataSourceTable(" << TableDescriptor::DebugString() << ")"; |
| return out.str(); |
| } |
| |
| HdfsTableDescriptor::HdfsTableDescriptor(const TTableDescriptor& tdesc, ObjectPool* pool) |
| : TableDescriptor(tdesc), |
| hdfs_base_dir_(tdesc.hdfsTable.hdfsBaseDir), |
| null_partition_key_value_(tdesc.hdfsTable.nullPartitionKeyValue), |
| null_column_value_(tdesc.hdfsTable.nullColumnValue) { |
| for (const auto& entry : tdesc.hdfsTable.partitions) { |
| HdfsPartitionDescriptor* partition = |
| pool->Add(new HdfsPartitionDescriptor(tdesc.hdfsTable, entry.second)); |
| partition_descriptors_[entry.first] = partition; |
| } |
| prototype_partition_descriptor_ = pool->Add(new HdfsPartitionDescriptor( |
| tdesc.hdfsTable, tdesc.hdfsTable.prototype_partition)); |
| avro_schema_ = tdesc.hdfsTable.__isset.avroSchema ? tdesc.hdfsTable.avroSchema : ""; |
| is_full_acid_ = tdesc.hdfsTable.is_full_acid; |
| valid_write_id_list_ = tdesc.hdfsTable.valid_write_ids; |
| if (tdesc.__isset.icebergTable) { |
| is_iceberg_ = true; |
| iceberg_table_location_ = tdesc.icebergTable.table_location; |
| iceberg_spec_id_ = tdesc.icebergTable.default_partition_spec_id; |
| iceberg_partition_specs_ = tdesc.icebergTable.partition_spec; |
| const TIcebergPartitionSpec& spec = iceberg_partition_specs_[iceberg_spec_id_]; |
| DCHECK_EQ(spec.spec_id, iceberg_spec_id_); |
| for (const TIcebergPartitionField& spec_field : spec.partition_fields) { |
| auto transform_type = spec_field.transform.transform_type; |
| if (transform_type == TIcebergPartitionTransformType::VOID) continue; |
| iceberg_non_void_partition_fields_.push_back(spec_field); |
| } |
| iceberg_parquet_compression_codec_ = tdesc.icebergTable.parquet_compression_codec; |
| iceberg_parquet_row_group_size_ = tdesc.icebergTable.parquet_row_group_size; |
| iceberg_parquet_plain_page_size_ = tdesc.icebergTable.parquet_plain_page_size; |
| iceberg_parquet_dict_page_size_ = tdesc.icebergTable.parquet_dict_page_size; |
| } |
| } |
| |
| void HdfsTableDescriptor::ReleaseResources() { |
| for (const auto& part_entry: partition_descriptors_) { |
| for (ScalarExprEvaluator* eval : |
| part_entry.second->partition_key_value_evals()) { |
| eval->Close(nullptr); |
| const_cast<ScalarExpr&>(eval->root()).Close(); |
| } |
| } |
| } |
| |
| string HdfsTableDescriptor::DebugString() const { |
| stringstream out; |
| out << "HdfsTable(" << TableDescriptor::DebugString() |
| << " hdfs_base_dir='" << hdfs_base_dir_ << "'"; |
| out << " partitions=["; |
| vector<string> partition_strings; |
| map<int64_t, HdfsPartitionDescriptor*>::const_iterator it; |
| for (it = partition_descriptors_.begin(); it != partition_descriptors_.end(); ++it) { |
| stringstream s; |
| s << " (id: " << it->first << ", partition: " << it->second->DebugString() << ")"; |
| partition_strings.push_back(s.str()); |
| } |
| out << join(partition_strings, ",") << "]"; |
| |
| out << " null_partition_key_value='" << null_partition_key_value_ << "'"; |
| out << " null_column_value='" << null_column_value_ << "'"; |
| out << " is_full_acid=" << std::boolalpha << is_full_acid_; |
| return out.str(); |
| } |
| |
| HBaseTableDescriptor::HBaseTableDescriptor(const TTableDescriptor& tdesc) |
| : TableDescriptor(tdesc), |
| table_name_(tdesc.hbaseTable.tableName) { |
| for (int i = 0; i < tdesc.hbaseTable.families.size(); ++i) { |
| bool is_binary_encoded = tdesc.hbaseTable.__isset.binary_encoded && |
| tdesc.hbaseTable.binary_encoded[i]; |
| cols_.push_back(HBaseTableDescriptor::HBaseColumnDescriptor( |
| tdesc.hbaseTable.families[i], tdesc.hbaseTable.qualifiers[i], is_binary_encoded)); |
| } |
| } |
| |
| string HBaseTableDescriptor::DebugString() const { |
| stringstream out; |
| out << "HBaseTable(" << TableDescriptor::DebugString() << " table=" << table_name_; |
| out << " cols=["; |
| for (int i = 0; i < cols_.size(); ++i) { |
| out << (i > 0 ? " " : "") << cols_[i].family << ":" << cols_[i].qualifier << ":" |
| << cols_[i].binary_encoded; |
| } |
| out << "])"; |
| return out.str(); |
| } |
| |
| KuduTableDescriptor::KuduTableDescriptor(const TTableDescriptor& tdesc) |
| : TableDescriptor(tdesc), |
| table_name_(tdesc.kuduTable.table_name), |
| key_columns_(tdesc.kuduTable.key_columns), |
| master_addresses_(tdesc.kuduTable.master_addresses) { |
| } |
| |
| string KuduTableDescriptor::DebugString() const { |
| stringstream out; |
| out << "KuduTable(" << TableDescriptor::DebugString() << " table=" << table_name_; |
| out << " master_addrs=[" << join(master_addresses_, ",") << "]"; |
| out << " key_columns=["; |
| out << join(key_columns_, ":"); |
| out << "])"; |
| return out.str(); |
| } |
| |
| TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc) |
| : id_(tdesc.id), |
| byte_size_(tdesc.byteSize), |
| num_null_bytes_(tdesc.numNullBytes), |
| null_bytes_offset_(tdesc.byteSize - tdesc.numNullBytes), |
| has_varlen_slots_(false), |
| tuple_path_(tdesc.tuplePath) { |
| } |
| |
| void TupleDescriptor::AddSlot(SlotDescriptor* slot) { |
| slots_.push_back(slot); |
| // If this is a tuple for struct children then we populate the 'string_slots_' field (in |
| // case of a var len string type) or the 'collection_slots_' field (in case of a |
| // collection type) of the topmost tuple and not this one. |
| TupleDescriptor* const target_tuple = isTupleOfStructSlot() ? master_tuple_ : this; |
| if (slot->type().IsVarLenStringType()) { |
| target_tuple->string_slots_.push_back(slot); |
| target_tuple->has_varlen_slots_ = true; |
| } |
| if (slot->type().IsCollectionType()) { |
| target_tuple->collection_slots_.push_back(slot); |
| target_tuple->has_varlen_slots_ = true; |
| } |
| } |
| |
| string TupleDescriptor::DebugString() const { |
| stringstream out; |
| out << "Tuple(id=" << id_ << " size=" << byte_size_; |
| if (table_desc_ != NULL) { |
| //out << " " << table_desc_->DebugString(); |
| } |
| out << " slots=["; |
| for (size_t i = 0; i < slots_.size(); ++i) { |
| if (i > 0) out << ", "; |
| out << slots_[i]->DebugString(); |
| } |
| out << "]"; |
| out << " tuple_path=["; |
| for (size_t i = 0; i < tuple_path_.size(); ++i) { |
| if (i > 0) out << ", "; |
| out << tuple_path_[i]; |
| } |
| out << "]"; |
| out << ")"; |
| return out.str(); |
| } |
| |
| bool TupleDescriptor::LayoutEquals(const TupleDescriptor& other_desc) const { |
| if (byte_size() != other_desc.byte_size()) return false; |
| if (slots().size() != other_desc.slots().size()) return false; |
| |
| vector<SlotDescriptor*> slots = SlotsOrderedByIdx(); |
| vector<SlotDescriptor*> other_slots = other_desc.SlotsOrderedByIdx(); |
| for (int i = 0; i < slots.size(); ++i) { |
| if (!slots[i]->LayoutEquals(*other_slots[i])) return false; |
| } |
| return true; |
| } |
| |
| RowDescriptor::RowDescriptor(const DescriptorTbl& desc_tbl, |
| const vector<TTupleId>& row_tuples, |
| const vector<bool>& nullable_tuples) |
| : tuple_idx_nullable_map_(nullable_tuples) { |
| DCHECK_EQ(nullable_tuples.size(), row_tuples.size()); |
| DCHECK_GT(row_tuples.size(), 0); |
| for (int i = 0; i < row_tuples.size(); ++i) { |
| tuple_desc_map_.push_back(desc_tbl.GetTupleDescriptor(row_tuples[i])); |
| DCHECK(tuple_desc_map_.back() != NULL); |
| } |
| InitTupleIdxMap(); |
| InitHasVarlenSlots(); |
| } |
| |
| RowDescriptor::RowDescriptor(const RowDescriptor& lhs_row_desc, |
| const RowDescriptor& rhs_row_desc) { |
| tuple_desc_map_.insert(tuple_desc_map_.end(), lhs_row_desc.tuple_desc_map_.begin(), |
| lhs_row_desc.tuple_desc_map_.end()); |
| tuple_desc_map_.insert(tuple_desc_map_.end(), rhs_row_desc.tuple_desc_map_.begin(), |
| rhs_row_desc.tuple_desc_map_.end()); |
| tuple_idx_nullable_map_.insert(tuple_idx_nullable_map_.end(), |
| lhs_row_desc.tuple_idx_nullable_map_.begin(), |
| lhs_row_desc.tuple_idx_nullable_map_.end()); |
| tuple_idx_nullable_map_.insert(tuple_idx_nullable_map_.end(), |
| rhs_row_desc.tuple_idx_nullable_map_.begin(), |
| rhs_row_desc.tuple_idx_nullable_map_.end()); |
| InitTupleIdxMap(); |
| InitHasVarlenSlots(); |
| } |
| |
| RowDescriptor::RowDescriptor(TupleDescriptor* tuple_desc, bool is_nullable) |
| : tuple_desc_map_(1, tuple_desc), |
| tuple_idx_nullable_map_(1, is_nullable) { |
| InitTupleIdxMap(); |
| InitHasVarlenSlots(); |
| } |
| |
| void RowDescriptor::InitTupleIdxMap() { |
| // find max id |
| TupleId max_id = 0; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| max_id = max(tuple_desc_map_[i]->id(), max_id); |
| } |
| |
| tuple_idx_map_.resize(max_id + 1, INVALID_IDX); |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| tuple_idx_map_[tuple_desc_map_[i]->id()] = i; |
| } |
| } |
| |
| void RowDescriptor::InitHasVarlenSlots() { |
| has_varlen_slots_ = false; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| if (tuple_desc_map_[i]->HasVarlenSlots()) { |
| has_varlen_slots_ = true; |
| break; |
| } |
| } |
| } |
| |
| int RowDescriptor::GetRowSize() const { |
| int size = 0; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| size += tuple_desc_map_[i]->byte_size(); |
| } |
| return size; |
| } |
| |
| int RowDescriptor::GetTupleIdx(TupleId id) const { |
| DCHECK_LT(id, tuple_idx_map_.size()) << "RowDescriptor: " << DebugString(); |
| return tuple_idx_map_[id]; |
| } |
| |
| bool RowDescriptor::TupleIsNullable(int tuple_idx) const { |
| DCHECK_LT(tuple_idx, tuple_idx_nullable_map_.size()); |
| return tuple_idx_nullable_map_[tuple_idx]; |
| } |
| |
| bool RowDescriptor::IsAnyTupleNullable() const { |
| for (int i = 0; i < tuple_idx_nullable_map_.size(); ++i) { |
| if (tuple_idx_nullable_map_[i]) return true; |
| } |
| return false; |
| } |
| |
| void RowDescriptor::ToThrift(vector<TTupleId>* row_tuple_ids) const { |
| row_tuple_ids->clear(); |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| row_tuple_ids->push_back(tuple_desc_map_[i]->id()); |
| } |
| } |
| |
| bool RowDescriptor::IsPrefixOf(const RowDescriptor& other_desc) const { |
| if (tuple_desc_map_.size() > other_desc.tuple_desc_map_.size()) return false; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| // pointer comparison okay, descriptors are unique |
| if (tuple_desc_map_[i] != other_desc.tuple_desc_map_[i]) return false; |
| } |
| return true; |
| } |
| |
| bool RowDescriptor::Equals(const RowDescriptor& other_desc) const { |
| if (tuple_desc_map_.size() != other_desc.tuple_desc_map_.size()) return false; |
| return IsPrefixOf(other_desc); |
| } |
| |
| bool RowDescriptor::LayoutIsPrefixOf(const RowDescriptor& other_desc) const { |
| if (tuple_desc_map_.size() > other_desc.tuple_desc_map_.size()) return false; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| if (!tuple_desc_map_[i]->LayoutEquals(*other_desc.tuple_desc_map_[i])) return false; |
| } |
| return true; |
| } |
| |
| bool RowDescriptor::LayoutEquals(const RowDescriptor& other_desc) const { |
| if (tuple_desc_map_.size() != other_desc.tuple_desc_map_.size()) return false; |
| return LayoutIsPrefixOf(other_desc); |
| } |
| |
| string RowDescriptor::DebugString() const { |
| stringstream ss; |
| for (int i = 0; i < tuple_desc_map_.size(); ++i) { |
| ss << tuple_desc_map_[i]->DebugString() << endl; |
| } |
| return ss.str(); |
| } |
| |
| Status DescriptorTbl::CreatePartKeyExprs( |
| const HdfsTableDescriptor& hdfs_tbl, ObjectPool* pool) { |
| // Prepare and open partition exprs |
| for (const auto& part_entry : hdfs_tbl.partition_descriptors()) { |
| HdfsPartitionDescriptor* part_desc = part_entry.second; |
| vector<ScalarExpr*> partition_key_value_exprs; |
| RETURN_IF_ERROR(ScalarExpr::Create(part_desc->thrift_partition_key_exprs_, |
| RowDescriptor(), nullptr, pool, &partition_key_value_exprs)); |
| for (const ScalarExpr* partition_expr : partition_key_value_exprs) { |
| DCHECK(partition_expr->IsLiteral()); |
| DCHECK(!partition_expr->HasFnCtx()); |
| DCHECK_EQ(partition_expr->GetNumChildren(), 0); |
| } |
| // TODO: RowDescriptor should arguably be optional in Prepare for known literals. |
| // Partition exprs are not used in the codegen case. Don't codegen them. |
| RETURN_IF_ERROR(ScalarExprEvaluator::Create(partition_key_value_exprs, nullptr, |
| pool, nullptr, nullptr, &part_desc->partition_key_value_evals_)); |
| RETURN_IF_ERROR(ScalarExprEvaluator::Open( |
| part_desc->partition_key_value_evals_, nullptr)); |
| } |
| return Status::OK(); |
| } |
| |
| Status DescriptorTbl::DeserializeThrift(const TDescriptorTableSerialized& serial_tbl, |
| TDescriptorTable* desc_tbl) { |
| uint32_t serial_tbl_len = serial_tbl.thrift_desc_tbl.length(); |
| return DeserializeThriftMsg( |
| reinterpret_cast<const uint8_t*>(serial_tbl.thrift_desc_tbl.data()), |
| &serial_tbl_len, false, desc_tbl); |
| } |
| |
| Status DescriptorTbl::CreateHdfsTblDescriptor( |
| const TDescriptorTableSerialized& serialized_thrift_tbl, |
| TableId tbl_id, ObjectPool* pool, HdfsTableDescriptor** desc) { |
| TDescriptorTable thrift_tbl; |
| RETURN_IF_ERROR(DeserializeThrift(serialized_thrift_tbl, &thrift_tbl)); |
| for (const TTableDescriptor& tdesc: thrift_tbl.tableDescriptors) { |
| if (tdesc.id == tbl_id) { |
| DCHECK(tdesc.__isset.hdfsTable); |
| RETURN_IF_ERROR(CreateTblDescriptorInternal( |
| tdesc, pool, reinterpret_cast<TableDescriptor**>(desc))); |
| return Status::OK(); |
| } |
| } |
| string error = Substitute("table $0 not found in descriptor table", tbl_id); |
| DCHECK(false) << error; |
| return Status(error); |
| } |
| |
| Status DescriptorTbl::CreateTblDescriptorInternal(const TTableDescriptor& tdesc, |
| ObjectPool* pool, TableDescriptor** desc) { |
| *desc = nullptr; |
| switch (tdesc.tableType) { |
| case TTableType::ICEBERG_TABLE: |
| case TTableType::HDFS_TABLE: { |
| HdfsTableDescriptor* hdfs_tbl = pool->Add(new HdfsTableDescriptor(tdesc, pool)); |
| *desc = hdfs_tbl; |
| RETURN_IF_ERROR(CreatePartKeyExprs(*hdfs_tbl, pool)); |
| break; |
| } |
| case TTableType::HBASE_TABLE: |
| *desc = pool->Add(new HBaseTableDescriptor(tdesc)); |
| break; |
| case TTableType::DATA_SOURCE_TABLE: |
| *desc = pool->Add(new DataSourceTableDescriptor(tdesc)); |
| break; |
| case TTableType::KUDU_TABLE: |
| *desc = pool->Add(new KuduTableDescriptor(tdesc)); |
| break; |
| default: |
| DCHECK(false) << "invalid table type: " << tdesc.tableType; |
| } |
| return Status::OK(); |
| } |
| |
| Status DescriptorTbl::Create(ObjectPool* pool, |
| const TDescriptorTableSerialized& serialized_thrift_tbl, DescriptorTbl** tbl) { |
| TDescriptorTable thrift_tbl; |
| RETURN_IF_ERROR(DeserializeThrift(serialized_thrift_tbl, &thrift_tbl)); |
| return CreateInternal(pool, thrift_tbl, tbl); |
| } |
| |
| Status DescriptorTbl::CreateInternal(ObjectPool* pool, const TDescriptorTable& thrift_tbl, |
| DescriptorTbl** tbl) { |
| *tbl = pool->Add(new DescriptorTbl()); |
| // deserialize table descriptors first, they are being referenced by tuple descriptors |
| for (const TTableDescriptor& tdesc: thrift_tbl.tableDescriptors) { |
| TableDescriptor* desc; |
| RETURN_IF_ERROR(CreateTblDescriptorInternal(tdesc, pool, &desc)); |
| (*tbl)->tbl_desc_map_[tdesc.id] = desc; |
| } |
| |
| for (const TTupleDescriptor& tdesc : thrift_tbl.tupleDescriptors) { |
| TupleDescriptor* desc = pool->Add(new TupleDescriptor(tdesc)); |
| // fix up table pointer |
| if (tdesc.__isset.tableId) { |
| desc->table_desc_ = (*tbl)->GetTableDescriptor(tdesc.tableId); |
| } |
| (*tbl)->tuple_desc_map_[tdesc.id] = desc; |
| } |
| |
| for (const TSlotDescriptor& tdesc : thrift_tbl.slotDescriptors) { |
| // Tuple descriptors are already populated in tbl |
| TupleDescriptor* parent = (*tbl)->GetTupleDescriptor(tdesc.parent); |
| DCHECK(parent != nullptr); |
| TupleDescriptor* children_tuple_descriptor = tdesc.__isset.itemTupleId ? |
| (*tbl)->GetTupleDescriptor(tdesc.itemTupleId) : nullptr; |
| SlotDescriptor* slot_d = pool->Add( |
| new SlotDescriptor(tdesc, parent, children_tuple_descriptor)); |
| if (slot_d->type().IsStructType() && children_tuple_descriptor != nullptr && |
| children_tuple_descriptor->getMasterTuple() == nullptr) { |
| TupleDescriptor* master_tuple = parent; |
| // If this struct is nested into other struct(s) then get the topmost tuple for the |
| // master. |
| if (parent->getMasterTuple() != nullptr) master_tuple = parent->getMasterTuple(); |
| children_tuple_descriptor->setMasterTuple(master_tuple); |
| } |
| (*tbl)->slot_desc_map_[tdesc.id] = slot_d; |
| parent->AddSlot(slot_d); |
| } |
| return Status::OK(); |
| } |
| |
| void DescriptorTbl::ReleaseResources() { |
| // close partition exprs of hdfs tables |
| for (auto entry: tbl_desc_map_) { |
| if (entry.second->type() != TTableType::HDFS_TABLE) continue; |
| static_cast<HdfsTableDescriptor*>(entry.second)->ReleaseResources(); |
| } |
| } |
| |
| TableDescriptor* DescriptorTbl::GetTableDescriptor(TableId id) const { |
| TableDescriptorMap::const_iterator i = tbl_desc_map_.find(id); |
| return i == tbl_desc_map_.end() ? nullptr : i->second; |
| } |
| |
| TupleDescriptor* DescriptorTbl::GetTupleDescriptor(TupleId id) const { |
| TupleDescriptorMap::const_iterator i = tuple_desc_map_.find(id); |
| return i == tuple_desc_map_.end() ? nullptr : i->second; |
| } |
| |
| SlotDescriptor* DescriptorTbl::GetSlotDescriptor(SlotId id) const { |
| SlotDescriptorMap::const_iterator i = slot_desc_map_.find(id); |
| return i == slot_desc_map_.end() ? nullptr : i->second; |
| } |
| |
| void DescriptorTbl::GetTupleDescs(vector<TupleDescriptor*>* descs) const { |
| descs->clear(); |
| for (TupleDescriptorMap::const_iterator i = tuple_desc_map_.begin(); |
| i != tuple_desc_map_.end(); ++i) { |
| descs->push_back(i->second); |
| } |
| } |
| |
| void SlotDescriptor::CodegenLoadAnyVal(CodegenAnyVal* any_val, llvm::Value* raw_val_ptr) { |
| DCHECK(raw_val_ptr->getType()->isPointerTy()); |
| llvm::Type* raw_val_type = raw_val_ptr->getType()->getPointerElementType(); |
| LlvmCodeGen* const codegen = any_val->codegen(); |
| LlvmBuilder* const builder = any_val->builder(); |
| const ColumnType& type = any_val->type(); |
| DCHECK_EQ(raw_val_type, codegen->GetSlotType(type)) |
| << endl |
| << LlvmCodeGen::Print(raw_val_ptr) << endl |
| << type << " => " << LlvmCodeGen::Print( |
| codegen->GetSlotType(type)); |
| switch (type.type) { |
| case TYPE_STRING: |
| case TYPE_VARCHAR: { |
| // Convert StringValue to StringVal |
| llvm::Function* str_ptr_fn = codegen->GetFunction( |
| IRFunction::STRING_VALUE_PTR, false); |
| llvm::Function* str_len_fn = codegen->GetFunction( |
| IRFunction::STRING_VALUE_LEN, false); |
| |
| llvm::Value* ptr = builder->CreateCall(str_ptr_fn, |
| llvm::ArrayRef<llvm::Value*>({raw_val_ptr}), "ptr"); |
| llvm::Value* len = builder->CreateCall(str_len_fn, |
| llvm::ArrayRef<llvm::Value*>({raw_val_ptr}), "len"); |
| |
| any_val->SetPtr(ptr); |
| any_val->SetLen(len); |
| break; |
| } |
| case TYPE_CHAR: |
| case TYPE_FIXED_UDA_INTERMEDIATE: { |
| // Convert fixed-size slot to StringVal. |
| any_val->SetPtr(builder->CreateBitCast(raw_val_ptr, codegen->ptr_type())); |
| any_val->SetLen(codegen->GetI32Constant(type.len)); |
| break; |
| } |
| case TYPE_TIMESTAMP: { |
| // Convert TimestampValue to TimestampVal |
| // TimestampValue has type |
| // { boost::posix_time::time_duration, boost::gregorian::date } |
| // = { {{{i64}}}, {{i32}} } |
| |
| llvm::Value* ts_value = builder->CreateLoad(raw_val_ptr, "ts_value"); |
| // Extract time_of_day i64 from boost::posix_time::time_duration. |
| uint32_t time_of_day_idxs[] = {0, 0, 0, 0}; |
| llvm::Value* time_of_day = |
| builder->CreateExtractValue(ts_value, time_of_day_idxs, "time_of_day"); |
| DCHECK(time_of_day->getType()->isIntegerTy(64)); |
| any_val->SetTimeOfDay(time_of_day); |
| // Extract i32 from boost::gregorian::date |
| uint32_t date_idxs[] = {1, 0, 0}; |
| llvm::Value* date = builder->CreateExtractValue(ts_value, date_idxs, "date"); |
| DCHECK(date->getType()->isIntegerTy(32)); |
| any_val->SetDate(date); |
| break; |
| } |
| case TYPE_BOOLEAN: |
| case TYPE_TINYINT: |
| case TYPE_SMALLINT: |
| case TYPE_INT: |
| case TYPE_BIGINT: |
| case TYPE_FLOAT: |
| case TYPE_DOUBLE: |
| case TYPE_DECIMAL: |
| case TYPE_DATE: |
| any_val->SetVal(builder->CreateLoad(raw_val_ptr, "raw_val")); |
| break; |
| default: |
| DCHECK(false) << "NYI: " << type.DebugString(); |
| break; |
| } |
| } |
| |
| llvm::Value* SlotDescriptor::CodegenIsNull( |
| LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Value* tuple) const { |
| return CodegenIsNull(codegen, builder, null_indicator_offset_, tuple); |
| } |
| |
| // Example IR for getting the first null bit: |
| // %0 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* |
| // %null_byte_ptr = getelementptr i8, i8* %0, i32 0 |
| // %null_byte = load i8, i8* %null_byte_ptr |
| // %null_mask = and i8 %null_byte, 1 |
| // %is_null = icmp ne i8 %null_mask, 0 |
| llvm::Value* SlotDescriptor::CodegenIsNull(LlvmCodeGen* codegen, LlvmBuilder* builder, |
| const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple) { |
| llvm::Value* null_byte = |
| CodegenGetNullByte(codegen, builder, null_indicator_offset, tuple, nullptr); |
| llvm::Constant* mask = codegen->GetI8Constant(null_indicator_offset.bit_mask); |
| llvm::Value* null_mask = builder->CreateAnd(null_byte, mask, "null_mask"); |
| llvm::Constant* zero = codegen->GetI8Constant(0); |
| return builder->CreateICmpNE(null_mask, zero, "is_null"); |
| } |
| |
| // Example IR for setting the first null bit to a non-constant 'is_null' value: |
| // %14 = bitcast { i8, [7 x i8], %"class.impala::TimestampValue" }* %agg_tuple to i8* |
| // %null_byte_ptr3 = getelementptr i8, i8* %14, i32 0 |
| // %null_byte4 = load i8, i8* %null_byte_ptr3 |
| // %null_bit_cleared = and i8 %null_byte4, -2 |
| // %15 = sext i1 %result_is_null to i8 |
| // %null_bit = and i8 %15, 1 |
| // %null_bit_set = or i8 %null_bit_cleared, %null_bit |
| // store i8 %null_bit_set, i8* %null_byte_ptr3 |
| void SlotDescriptor::CodegenSetNullIndicator( |
| LlvmCodeGen* codegen, LlvmBuilder* builder, llvm::Value* tuple, llvm::Value* is_null) |
| const { |
| DCHECK_EQ(is_null->getType(), codegen->bool_type()); |
| llvm::Value* null_byte_ptr; |
| llvm::Value* null_byte = |
| CodegenGetNullByte(codegen, builder, null_indicator_offset_, tuple, &null_byte_ptr); |
| llvm::Constant* mask = codegen->GetI8Constant(null_indicator_offset_.bit_mask); |
| llvm::Constant* not_mask = codegen->GetI8Constant(~null_indicator_offset_.bit_mask); |
| |
| llvm::ConstantInt* constant_is_null = llvm::dyn_cast<llvm::ConstantInt>(is_null); |
| llvm::Value* result = nullptr; |
| if (constant_is_null != nullptr) { |
| if (constant_is_null->isOne()) { |
| result = builder->CreateOr(null_byte, mask, "null_bit_set"); |
| } else { |
| DCHECK(constant_is_null->isZero()); |
| result = builder->CreateAnd(null_byte, not_mask, "null_bit_cleared"); |
| } |
| } else { |
| // Avoid branching by computing the new byte as: |
| // (null_byte & ~mask) | (-null & mask); |
| llvm::Value* byte_with_cleared_bit = |
| builder->CreateAnd(null_byte, not_mask, "null_bit_cleared"); |
| llvm::Value* sign_extended_null = |
| builder->CreateSExt(is_null, codegen->i8_type()); |
| llvm::Value* bit_only = builder->CreateAnd(sign_extended_null, mask, "null_bit"); |
| result = builder->CreateOr(byte_with_cleared_bit, bit_only, "null_bit_set"); |
| } |
| |
| builder->CreateStore(result, null_byte_ptr); |
| } |
| |
| // Example IR for materializing a string column with non-NULL 'pool'. Includes the part |
| // that is generated by CodegenAnyVal::ToReadWriteInfo(). |
| // |
| // Produced for the following query as part of the @MaterializeExprs() function. |
| // select string_col from functional_orc_def.alltypes order by string_col limit 2; |
| // |
| // ; [insert point starts here] |
| // br label %entry1 |
| // |
| // entry1: ; preds = %entry |
| // %1 = extractvalue { i64, i8* } %src, 0 |
| // %is_null = trunc i64 %1 to i1 |
| // br i1 %is_null, label %null, label %non_null |
| // |
| // non_null: ; preds = %entry1 |
| // %src2 = extractvalue { i64, i8* } %src, 1 |
| // %2 = extractvalue { i64, i8* } %src, 0 |
| // %3 = ashr i64 %2, 32 |
| // %4 = trunc i64 %3 to i32 |
| // %slot = getelementptr inbounds <{ %"struct.impala::StringValue", i8 }>, |
| // <{ %"struct.impala::StringValue", i8 }>* %tuple, |
| // i32 0, |
| // i32 0 |
| // %5 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %4, 1 |
| // %6 = sext i32 %4 to i64 |
| // %new_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhli( |
| // %"class.impala::MemPool"* %pool, |
| // i64 %6, |
| // i32 8) |
| // call void @llvm.memcpy.p0i8.p0i8.i32( |
| // i8* %new_ptr, |
| // i8* %src2, |
| // i32 %4, |
| // i32 0, |
| // i1 false) |
| // %7 = insertvalue %"struct.impala::StringValue" %5, i8* %new_ptr, 0 |
| // store %"struct.impala::StringValue" %7, %"struct.impala::StringValue"* %slot |
| // br label %end_write |
| // |
| // null: ; preds = %entry1 |
| // %8 = bitcast <{ %"struct.impala::StringValue", i8 }>* %tuple to i8* |
| // %null_byte_ptr = getelementptr inbounds i8, i8* %8, i32 12 |
| // %null_byte = load i8, i8* %null_byte_ptr |
| // %null_bit_set = or i8 %null_byte, 1 |
| // store i8 %null_bit_set, i8* %null_byte_ptr |
| // br label %end_write |
| // |
| // end_write: ; preds = %null, %non_null |
| // ; [insert point ends here] |
| void SlotDescriptor::CodegenWriteToSlot(const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* tuple_llvm_struct_ptr, llvm::Value* pool_val, |
| llvm::BasicBlock* insert_before) const { |
| DCHECK(tuple_llvm_struct_ptr->getType()->isPointerTy()); |
| DCHECK(tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy()); |
| LlvmBuilder* builder = read_write_info.builder(); |
| llvm::LLVMContext& context = read_write_info.codegen()->context(); |
| llvm::Function* fn = builder->GetInsertBlock()->getParent(); |
| |
| // Create new block that will come after conditional blocks if necessary |
| if (insert_before == nullptr) { |
| insert_before = llvm::BasicBlock::Create(context, "end_write", fn); |
| } |
| |
| read_write_info.entry_block().BranchTo(builder); |
| |
| CodegenWriteToSlotHelper(read_write_info, tuple_llvm_struct_ptr, |
| tuple_llvm_struct_ptr, pool_val, NonWritableBasicBlock(insert_before)); |
| |
| // Leave builder_ after conditional blocks |
| builder->SetInsertPoint(insert_before); |
| } |
| |
| llvm::Value* SlotDescriptor::CodegenGetNullByte( |
| LlvmCodeGen* codegen, LlvmBuilder* builder, |
| const NullIndicatorOffset& null_indicator_offset, llvm::Value* tuple, |
| llvm::Value** null_byte_ptr) { |
| llvm::Constant* byte_offset = |
| codegen->GetI32Constant(null_indicator_offset.byte_offset); |
| llvm::Value* tuple_bytes = builder->CreateBitCast(tuple, codegen->ptr_type()); |
| llvm::Value* byte_ptr = |
| builder->CreateInBoundsGEP(tuple_bytes, byte_offset, "null_byte_ptr"); |
| if (null_byte_ptr != nullptr) *null_byte_ptr = byte_ptr; |
| return builder->CreateLoad(byte_ptr, "null_byte"); |
| } |
| |
| // TODO: Maybe separate null handling and non-null-handling so that it is easier to insert |
| // a different null handling mechanism (for example in hash tables when structs are |
| // supported there). |
| void SlotDescriptor::CodegenWriteToSlotHelper( |
| const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* main_tuple_llvm_struct_ptr, llvm::Value* tuple_llvm_struct_ptr, |
| llvm::Value* pool_val, |
| const NonWritableBasicBlock& insert_before) const { |
| DCHECK(main_tuple_llvm_struct_ptr->getType()->isPointerTy()); |
| DCHECK(main_tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy()); |
| DCHECK(tuple_llvm_struct_ptr->getType()->isPointerTy()); |
| DCHECK(tuple_llvm_struct_ptr->getType()->getPointerElementType()->isStructTy()); |
| LlvmBuilder* builder = read_write_info.builder(); |
| |
| // Non-null block: write slot |
| builder->SetInsertPoint(read_write_info.non_null_block()); |
| llvm::Value* slot = builder->CreateStructGEP(nullptr, tuple_llvm_struct_ptr, |
| llvm_field_idx(), "slot"); |
| if (read_write_info.type().IsStructType()) { |
| CodegenStoreStructToNativePtr(read_write_info, main_tuple_llvm_struct_ptr, |
| slot, pool_val, insert_before); |
| } else { |
| CodegenStoreNonNullAnyVal(read_write_info, slot, pool_val, this, insert_before); |
| |
| // We only need this branch if we are not a struct, because for structs, the last leaf |
| // (non-struct) field will add this branch. |
| insert_before.BranchTo(builder); |
| } |
| |
| // Null block: set null bit |
| builder->SetInsertPoint(read_write_info.null_block()); |
| CodegenSetToNull(read_write_info, main_tuple_llvm_struct_ptr); |
| insert_before.BranchTo(builder); |
| } |
| |
| void SlotDescriptor::CodegenStoreStructToNativePtr( |
| const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* main_tuple_ptr, |
| llvm::Value* struct_slot_ptr, llvm::Value* pool_val, |
| const NonWritableBasicBlock& insert_before) const { |
| DCHECK(type_.IsStructType()); |
| DCHECK(children_tuple_descriptor_ != nullptr); |
| DCHECK(read_write_info.type().IsStructType()); |
| DCHECK(main_tuple_ptr->getType()->isPointerTy()); |
| DCHECK(main_tuple_ptr->getType()->getPointerElementType()->isStructTy()); |
| DCHECK(struct_slot_ptr->getType()->isPointerTy()); |
| DCHECK(struct_slot_ptr->getType()->getPointerElementType()->isStructTy()); |
| |
| LlvmBuilder* builder = read_write_info.builder(); |
| const std::vector<SlotDescriptor*>& slots = children_tuple_descriptor_->slots(); |
| DCHECK_GE(slots.size(), 1); |
| DCHECK_EQ(slots.size(), read_write_info.children().size()); |
| |
| read_write_info.children()[0].entry_block().BranchTo(builder); |
| for (int i = 0; i < slots.size(); ++i) { |
| const SlotDescriptor* const child_slot_desc = slots[i]; |
| const CodegenAnyValReadWriteInfo& child_read_write_info = |
| read_write_info.children()[i]; |
| |
| NonWritableBasicBlock next_block = i == slots.size() - 1 |
| ? insert_before : read_write_info.children()[i+1].entry_block(); |
| child_slot_desc->CodegenWriteToSlotHelper(child_read_write_info, main_tuple_ptr, |
| struct_slot_ptr, pool_val, next_block); |
| } |
| } |
| |
| // Create a 'CodegenAnyValReadWriteInfo' but without creating basic blocks for null |
| // handling as this function should only be called if we assume that the value is not |
| // null. |
| CodegenAnyValReadWriteInfo CodegenAnyValToReadWriteInfo(CodegenAnyVal& any_val, |
| llvm::Value* pool_val) { |
| CodegenAnyValReadWriteInfo rwi(any_val.codegen(), any_val.builder(), any_val.type()); |
| |
| switch (rwi.type().type) { |
| case TYPE_STRING: |
| case TYPE_VARCHAR: |
| case TYPE_ARRAY: // CollectionVal has same memory layout as StringVal. |
| case TYPE_MAP: { // CollectionVal has same memory layout as StringVal. |
| rwi.SetPtrAndLen(any_val.GetPtr(), any_val.GetLen()); |
| break; |
| } |
| case TYPE_CHAR: |
| rwi.SetPtrAndLen(any_val.GetPtr(), rwi.codegen()->GetI32Constant(rwi.type().len)); |
| break; |
| case TYPE_FIXED_UDA_INTERMEDIATE: |
| DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the " |
| << "StringVal must be set up to point to the output slot"; |
| break; |
| case TYPE_TIMESTAMP: { |
| rwi.SetTimeAndDate(any_val.GetTimeOfDay(), any_val.GetDate()); |
| break; |
| } |
| case TYPE_BOOLEAN: |
| case TYPE_TINYINT: |
| case TYPE_SMALLINT: |
| case TYPE_INT: |
| case TYPE_BIGINT: |
| case TYPE_FLOAT: |
| case TYPE_DOUBLE: |
| case TYPE_DECIMAL: |
| case TYPE_DATE: |
| // The representations of the types match - just store the value. |
| rwi.SetSimpleVal(any_val.GetVal()); |
| break; |
| case TYPE_STRUCT: |
| DCHECK(false) << "Invalid type for this function. " |
| << "Call 'StoreStructToNativePtr()' instead."; |
| break; |
| default: |
| DCHECK(false) << "NYI: " << rwi.type().DebugString(); |
| break; |
| } |
| |
| return rwi; |
| } |
| |
| void SlotDescriptor::CodegenStoreNonNullAnyVal(CodegenAnyVal& any_val, |
| llvm::Value* raw_val_ptr, llvm::Value* pool_val, |
| const SlotDescriptor* slot_desc, const NonWritableBasicBlock& insert_before) { |
| CodegenAnyValReadWriteInfo rwi = CodegenAnyValToReadWriteInfo(any_val, pool_val); |
| CodegenStoreNonNullAnyVal(rwi, raw_val_ptr, pool_val, slot_desc, insert_before); |
| } |
| |
| void SlotDescriptor::CodegenStoreNonNullAnyVal( |
| const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* raw_val_ptr, |
| llvm::Value* pool_val, const SlotDescriptor* slot_desc, |
| const NonWritableBasicBlock& insert_before) { |
| LlvmBuilder* builder = read_write_info.builder(); |
| const ColumnType& type = read_write_info.type(); |
| switch (type.type) { |
| case TYPE_STRING: |
| case TYPE_VARCHAR: |
| case TYPE_ARRAY: |
| case TYPE_MAP: { |
| CodegenWriteStringOrCollectionToSlot(read_write_info, raw_val_ptr, |
| pool_val, slot_desc, insert_before); |
| break; |
| } |
| case TYPE_CHAR: |
| read_write_info.codegen()->CodegenMemcpy( |
| builder, raw_val_ptr, read_write_info.GetPtrAndLen().ptr, type.len); |
| break; |
| case TYPE_FIXED_UDA_INTERMEDIATE: |
| DCHECK(false) << "FIXED_UDA_INTERMEDIATE does not need to be copied: the " |
| << "StringVal must be set up to point to the output slot"; |
| break; |
| case TYPE_TIMESTAMP: { |
| llvm::Value* timestamp_value = CodegenToTimestampValue(read_write_info); |
| builder->CreateStore(timestamp_value, raw_val_ptr); |
| break; |
| } |
| case TYPE_BOOLEAN: { |
| llvm::Value* bool_as_i1 = builder->CreateTrunc( |
| read_write_info.GetSimpleVal(), builder->getInt1Ty(), "bool_as_i1"); |
| builder->CreateStore(bool_as_i1, raw_val_ptr); |
| break; |
| } |
| case TYPE_TINYINT: |
| case TYPE_SMALLINT: |
| case TYPE_INT: |
| case TYPE_BIGINT: |
| case TYPE_FLOAT: |
| case TYPE_DOUBLE: |
| case TYPE_DECIMAL: |
| case TYPE_DATE: |
| // The representations of the types match - just store the value. |
| builder->CreateStore(read_write_info.GetSimpleVal(), raw_val_ptr); |
| break; |
| case TYPE_STRUCT: |
| DCHECK(false) << "Invalid type for this function. " |
| << "Call 'StoreStructToNativePtr()' instead."; |
| break; |
| default: |
| DCHECK(false) << "NYI: " << type.DebugString(); |
| break; |
| } |
| } |
| |
| llvm::Value* SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca( |
| const CodegenAnyValReadWriteInfo& read_write_info, llvm::Value* pool_val) { |
| LlvmCodeGen* codegen = read_write_info.codegen(); |
| llvm::Value* native_ptr = codegen->CreateEntryBlockAlloca(*read_write_info.builder(), |
| codegen->GetSlotType(read_write_info.type())); |
| SlotDescriptor::CodegenStoreNonNullAnyVal(read_write_info, native_ptr, pool_val); |
| return native_ptr; |
| } |
| |
| llvm::Value* SlotDescriptor::CodegenStoreNonNullAnyValToNewAlloca( |
| CodegenAnyVal& any_val, llvm::Value* pool_val) { |
| CodegenAnyValReadWriteInfo rwi = CodegenAnyValToReadWriteInfo(any_val, pool_val); |
| return CodegenStoreNonNullAnyValToNewAlloca(rwi, pool_val); |
| } |
| |
| void SlotDescriptor::CodegenSetToNull(const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* tuple) const { |
| LlvmCodeGen* codegen = read_write_info.codegen(); |
| LlvmBuilder* builder = read_write_info.builder(); |
| CodegenSetNullIndicator( |
| codegen, builder, tuple, codegen->true_value()); |
| if (type_.IsStructType()) { |
| DCHECK(children_tuple_descriptor_ != nullptr); |
| for (SlotDescriptor* child_slot_desc : children_tuple_descriptor_->slots()) { |
| child_slot_desc->CodegenSetToNull(read_write_info, tuple); |
| } |
| } |
| } |
| |
| // Example IR for materializing an int and an array<string> column with non-NULL 'pool'. |
| // Includes the part that is generated by CodegenAnyVal::ToReadWriteInfo(). |
| // |
| // Produced for the following query as part of the @MaterializeExprs() function. |
| // select id, arr_string_1d from functional_parquet.collection_tbl order by id limit 2; |
| // |
| // ; [insert point starts here] |
| // br label %entry1 |
| // |
| // entry1: ; preds = %entry |
| // %is_null = trunc i64 %src to i1 |
| // br i1 %is_null, label %null, label %non_null |
| // |
| // non_null: ; preds = %entry1 |
| // %1 = ashr i64 %src, 32 |
| // %2 = trunc i64 %1 to i32 |
| // %slot = getelementptr inbounds <{ %"struct.impala::CollectionValue", i32, i8 }>, |
| // <{ %"struct.impala::CollectionValue", i32, i8 }>* %tuple, i32 0, i32 1 |
| // store i32 %2, i32* %slot |
| // br label %end_write |
| // |
| // null: ; preds = %entry1 |
| // %3 = bitcast <{ %"struct.impala::CollectionValue", i32, i8 }>* %tuple to i8* |
| // %null_byte_ptr = getelementptr inbounds i8, i8* %3, i32 16 |
| // %null_byte = load i8, i8* %null_byte_ptr |
| // %null_bit_set = or i8 %null_byte, 2 |
| // store i8 %null_bit_set, i8* %null_byte_ptr |
| // br label %end_write |
| // |
| // end_write: ; preds = %null, %non_null |
| // %4 = getelementptr %"class.impala::ScalarExprEvaluator"*, |
| // %"class.impala::ScalarExprEvaluator"** %slot_materialize_expr_evals, i32 1 |
| // %expr_eval2 = load %"class.impala::ScalarExprEvaluator"*, |
| // %"class.impala::ScalarExprEvaluator"** %4 |
| // %src3 = call { i64, i8* } @GetSlotRef.4( |
| // %"class.impala::ScalarExprEvaluator"* %expr_eval2, |
| // %"class.impala::TupleRow"* %row) |
| // br label %entry4 |
| // |
| // entry4: ; preds = %end_write |
| // %5 = extractvalue { i64, i8* } %src3, 0 |
| // %is_null7 = trunc i64 %5 to i1 |
| // br i1 %is_null7, label %null6, label %non_null5 |
| // |
| // non_null5: ; preds = %entry4 |
| // %src8 = extractvalue { i64, i8* } %src3, 1 |
| // %6 = extractvalue { i64, i8* } %src3, 0 |
| // %7 = ashr i64 %6, 32 |
| // %8 = trunc i64 %7 to i32 |
| // %slot10 = getelementptr inbounds <{ %"struct.impala::CollectionValue", i32, i8 }>, |
| // <{ %"struct.impala::CollectionValue", i32, i8 }>* %tuple, i32 0, i32 0 |
| // %9 = insertvalue %"struct.impala::CollectionValue" zeroinitializer, i32 %8, 1 |
| // %coll_tuple_byte_len = mul i32 %8, 13 |
| // %10 = sext i32 %coll_tuple_byte_len to i64 |
| // %new_coll_val_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhli( |
| // %"class.impala::MemPool"* %pool, i64 %10, i32 8) |
| // call void @llvm.memcpy.p0i8.p0i8.i32(i8* %new_coll_val_ptr, i8* %src8, |
| // i32 %coll_tuple_byte_len, i32 0, i1 false) |
| // %11 = insertvalue %"struct.impala::CollectionValue" %9, i8* %new_coll_val_ptr, 0 |
| // store i32 0, i32* %item_index_addr |
| // br label %loop_condition_block |
| // |
| // null6: ; preds = %entry4 |
| // %12 = bitcast <{ %"struct.impala::CollectionValue", i32, i8 }>* %tuple to i8* |
| // %null_byte_ptr14 = getelementptr inbounds i8, i8* %12, i32 16 |
| // %null_byte15 = load i8, i8* %null_byte_ptr14 |
| // %null_bit_set16 = or i8 %null_byte15, 1 |
| // store i8 %null_bit_set16, i8* %null_byte_ptr14 |
| // br label %end_write9 |
| // |
| // loop_condition_block: ; preds = %loop_increment_block, %non_null5 |
| // %item_index = load i32, i32* %item_index_addr |
| // %continue_loop = icmp slt i32 %item_index, %8 |
| // br i1 %continue_loop, label %loop_body_block, label %loop_exit_block |
| // |
| // loop_body_block: ; preds = %loop_condition_block |
| // %children_tuple_array = bitcast i8* %new_coll_val_ptr |
| // to <{ %"class.impala::StringValue", i8 }>* |
| // %children_tuple = getelementptr inbounds <{ %"class.impala::StringValue", i8 }>, |
| // <{ %"class.impala::StringValue", i8 }>* %children_tuple_array, i32 %item_index |
| // %13 = bitcast <{ %"class.impala::StringValue", i8 }>* %children_tuple to i8* |
| // %null_byte_ptr11 = getelementptr inbounds i8, i8* %13, i32 12 |
| // %null_byte12 = load i8, i8* %null_byte_ptr11 |
| // %null_mask = and i8 %null_byte12, 1 |
| // %is_null13 = icmp ne i8 %null_mask, 0 |
| // br i1 %is_null13, label %next_block_after_child_is_written, |
| // label %child_non_null_block |
| // |
| // loop_increment_block: ; preds = %next_block_after_child_is_written |
| // %item_index_incremented = add i32 %item_index, 1 |
| // store i32 %item_index_incremented, i32* %item_index_addr |
| // br label %loop_condition_block |
| // |
| // loop_exit_block: ; preds = %loop_condition_block |
| // store %"struct.impala::CollectionValue" %11, |
| // %"struct.impala::CollectionValue"* %slot10 |
| // br label %end_write9 |
| // |
| // child_non_null_block: ; preds = %loop_body_block |
| // %child_str_or_coll_value_addr = getelementptr inbounds |
| // <{ %"class.impala::StringValue", i8 }>, |
| // <{ %"class.impala::StringValue", i8 }>* %children_tuple, i32 0, i32 0 |
| // %child_str_or_coll_value_ptr = call i8* @_ZNK6impala11StringValue5IrPtrEv( |
| // %"class.impala::StringValue"* %child_str_or_coll_value_addr) |
| // %child_str_or_coll_value_len = call i32 @_ZNK6impala11StringValue5IrLenEv( |
| // %"class.impala::StringValue"* %child_str_or_coll_value_addr) |
| // %14 = sext i32 %child_str_or_coll_value_len to i64 |
| // %new_ptr = call i8* @_ZN6impala7MemPool8AllocateILb0EEEPhli( |
| // %"class.impala::MemPool"* %pool, i64 %14, i32 8) |
| // call void @llvm.memcpy.p0i8.p0i8.i32(i8* %new_ptr, i8* %child_str_or_coll_value_ptr, |
| // i32 %child_str_or_coll_value_len, i32 0, i1 false) |
| // call void @_ZN6impala11StringValue8IrAssignEPci( |
| // %"class.impala::StringValue"* %child_str_or_coll_value_addr, i8* %new_ptr, |
| // i32 %child_str_or_coll_value_len) |
| // br label %next_block_after_child_is_written |
| // |
| // next_block_after_child_is_written: ; preds = %child_non_null_block, %loop_body_block |
| // br label %loop_increment_block |
| // |
| // end_write9: ; preds = %null6, %loop_exit_block |
| // ; [insert point ends here] |
| void SlotDescriptor::CodegenWriteStringOrCollectionToSlot( |
| const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* slot_ptr, llvm::Value* pool_val, const SlotDescriptor* slot_desc, |
| const NonWritableBasicBlock& insert_before) { |
| const ColumnType& type = read_write_info.type(); |
| if (type.IsStringType()) { |
| CodegenWriteStringToSlot(read_write_info, slot_ptr, pool_val, slot_desc); |
| } else { |
| DCHECK(type.IsCollectionType()); |
| CodegenWriteCollectionToSlot(read_write_info, slot_ptr, pool_val, slot_desc, |
| insert_before); |
| } |
| } |
| |
| namespace { |
| constexpr int COLL_VALUE_PTR_IDX = 0; |
| constexpr int COLL_VALUE_LEN_IDX = 1; |
| |
| llvm::Value* CodegenStrOrCollValueGetPtr(LlvmCodeGen* codegen, LlvmBuilder* builder, |
| llvm::Value* str_or_coll_value_addr, const string& name = "") { |
| if (str_or_coll_value_addr->getType() == |
| codegen->GetStructType<StringValue>()->getPointerTo()) { |
| llvm::Function* str_ptr_fn = codegen->GetFunction( |
| IRFunction::STRING_VALUE_PTR, false); |
| return builder->CreateCall(str_ptr_fn, |
| llvm::ArrayRef<llvm::Value*>({str_or_coll_value_addr}), name); |
| } else { |
| DCHECK(str_or_coll_value_addr->getType() == |
| codegen->GetStructType<CollectionValue>()->getPointerTo()); |
| llvm::Value* ptr_addr = builder->CreateStructGEP(nullptr, str_or_coll_value_addr, |
| COLL_VALUE_PTR_IDX, name + "_addr"); |
| return builder->CreateLoad(ptr_addr, name); |
| } |
| } |
| |
| llvm::Value* CodegenStrOrCollValueGetLen(LlvmCodeGen* codegen, LlvmBuilder* builder, |
| llvm::Value* str_or_coll_value_addr, const string& name = "") { |
| if (str_or_coll_value_addr->getType() == |
| codegen->GetStructType<StringValue>()->getPointerTo()) { |
| llvm::Function* str_len_fn = codegen->GetFunction( |
| IRFunction::STRING_VALUE_LEN, false); |
| return builder->CreateCall(str_len_fn, |
| llvm::ArrayRef<llvm::Value*>({str_or_coll_value_addr}), name); |
| } else { |
| DCHECK(str_or_coll_value_addr->getType() == |
| codegen->GetStructType<CollectionValue>()->getPointerTo()); |
| llvm::Value* len_addr = builder->CreateStructGEP(nullptr, str_or_coll_value_addr, |
| COLL_VALUE_LEN_IDX, name + "_addr"); |
| return builder->CreateLoad(len_addr, name); |
| } |
| } |
| |
| llvm::Value* CodegenCollValueSetPtr(LlvmCodeGen* codegen, LlvmBuilder* builder, |
| llvm::Value* str_or_coll_value, llvm::Value* ptr, const string& name = "") { |
| DCHECK(str_or_coll_value->getType() == codegen->GetStructType<CollectionValue>()); |
| return builder->CreateInsertValue(str_or_coll_value, ptr, COLL_VALUE_PTR_IDX, |
| name); |
| } |
| |
| llvm::Value* CodegenCollValueSetLen(LlvmCodeGen* codegen, LlvmBuilder* builder, |
| llvm::Value* str_or_coll_value, llvm::Value* len, const string& name = "") { |
| DCHECK(str_or_coll_value->getType() == codegen->GetStructType<CollectionValue>()); |
| return builder->CreateInsertValue(str_or_coll_value, len, COLL_VALUE_LEN_IDX, |
| name); |
| } |
| } /* anonymous namespace */ |
| |
| void SlotDescriptor::CodegenWriteCollectionToSlot( |
| const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* slot_ptr, llvm::Value* pool_val, const SlotDescriptor* slot_desc, |
| const NonWritableBasicBlock& insert_before) { |
| LlvmCodeGen* codegen = read_write_info.codegen(); |
| LlvmBuilder* builder = read_write_info.builder(); |
| const ColumnType& type = read_write_info.type(); |
| DCHECK(type.IsCollectionType()); |
| |
| // Convert to 'CollectionValue'. |
| llvm::Type* raw_type = codegen->GetSlotType(type); |
| llvm::Value* coll_value = llvm::Constant::getNullValue(raw_type); |
| coll_value = CodegenCollValueSetLen(codegen, builder, coll_value, |
| read_write_info.GetPtrAndLen().len); |
| if (pool_val != nullptr) { |
| llvm::Value* num_tuples = read_write_info.GetPtrAndLen().len; |
| DCHECK(slot_desc != nullptr) << "SlotDescriptor needed to calculate the size of " |
| << "the collection for copying."; |
| // For a 'CollectionValue', 'len' is not the byte size of the whole data but the |
| // number of items, so we have to multiply it with the byte size of the item tuple |
| // to get the data size. |
| int item_tuple_byte_size = slot_desc->children_tuple_descriptor()->byte_size(); |
| llvm::Value* byte_len = builder->CreateMul(num_tuples, |
| codegen->GetI32Constant(item_tuple_byte_size), "coll_tuple_byte_len"); |
| |
| // Allocate a 'new_ptr' from 'pool_val' and copy the data from 'read_write_info->ptr'. |
| llvm::Value* new_ptr = codegen->CodegenMemPoolAllocate( |
| builder, pool_val, byte_len, "new_coll_val_ptr"); |
| codegen->CodegenMemcpy(builder, new_ptr, read_write_info.GetPtrAndLen().ptr, |
| byte_len); |
| coll_value = CodegenCollValueSetPtr(codegen, builder, coll_value, new_ptr); |
| |
| slot_desc->CodegenWriteCollectionItemsToSlot(codegen, builder, new_ptr, |
| read_write_info.GetPtrAndLen().len, pool_val, insert_before); |
| } else { |
| coll_value = CodegenCollValueSetPtr(codegen, builder, coll_value, |
| read_write_info.GetPtrAndLen().ptr); |
| } |
| builder->CreateStore(coll_value, slot_ptr); |
| } |
| |
| void SlotDescriptor::CodegenWriteStringToSlot( |
| const CodegenAnyValReadWriteInfo& read_write_info, |
| llvm::Value* slot_ptr, llvm::Value* pool_val, const SlotDescriptor* slot_desc) { |
| LlvmCodeGen* codegen = read_write_info.codegen(); |
| LlvmBuilder* builder = read_write_info.builder(); |
| const ColumnType& type = read_write_info.type(); |
| DCHECK(type.IsStringType()); |
| |
| llvm::Value* ptr = read_write_info.GetPtrAndLen().ptr; |
| llvm::Value* len = read_write_info.GetPtrAndLen().len; |
| if (pool_val != nullptr) { |
| // Allocate a 'new_ptr' from 'pool_val' and copy the data from 'read_write_info->ptr'. |
| llvm::Value* new_ptr = codegen->CodegenMemPoolAllocate( |
| builder, pool_val, len, "new_ptr"); |
| codegen->CodegenMemcpy(builder, new_ptr, ptr, len); |
| ptr = new_ptr; |
| } |
| llvm::Function* str_assign_fn = codegen->GetFunction( |
| IRFunction::STRING_VALUE_ASSIGN, false); |
| builder->CreateCall(str_assign_fn, |
| llvm::ArrayRef<llvm::Value*>({slot_ptr, ptr, len})); |
| } |
| |
| void SlotDescriptor::CodegenWriteCollectionItemsToSlot(LlvmCodeGen* codegen, |
| LlvmBuilder* builder, llvm::Value* collection_value_ptr, llvm::Value* num_tuples, |
| llvm::Value* pool_val, const NonWritableBasicBlock& insert_before) const { |
| DCHECK(pool_val != nullptr); |
| // We construct a while-like loop using basic blocks and conditional branches to iterate |
| // through the items of the collection, recursively. |
| llvm::Function* fn = builder->GetInsertBlock()->getParent(); |
| llvm::BasicBlock* loop_condition_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "loop_condition_block", fn); |
| llvm::BasicBlock* loop_body_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "loop_body_block", fn); |
| llvm::BasicBlock* loop_increment_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "loop_increment_block", fn); |
| llvm::BasicBlock* loop_exit_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "loop_exit_block", fn); |
| |
| // Initialise the loop counter. |
| llvm::Value* item_index_addr = codegen->CreateEntryBlockAlloca( |
| *builder, codegen->i32_type(), "item_index_addr"); |
| builder->CreateStore(codegen->GetI32Constant(0), item_index_addr); |
| |
| builder->CreateBr(loop_condition_block); |
| |
| // Loop condition block |
| builder->SetInsertPoint(loop_condition_block); |
| llvm::Value* item_index = builder->CreateLoad(item_index_addr, "item_index"); |
| llvm::Value* continue_loop = builder->CreateICmpSLT( |
| item_index, num_tuples, "continue_loop"); |
| builder->CreateCondBr(continue_loop, loop_body_block, loop_exit_block); |
| |
| // Loop body |
| builder->SetInsertPoint(loop_body_block); |
| CodegenWriteCollectionItemLoopBody(codegen, builder, collection_value_ptr, num_tuples, |
| item_index, fn, insert_before, pool_val); |
| builder->CreateBr(loop_increment_block); |
| |
| // Loop increment |
| builder->SetInsertPoint(loop_increment_block); |
| llvm::Value* item_index_incremented = builder->CreateAdd( |
| item_index, codegen->GetI32Constant(1), "item_index_incremented"); |
| builder->CreateStore(item_index_incremented, item_index_addr); |
| builder->CreateBr(loop_condition_block); |
| |
| // Loop exit |
| builder->SetInsertPoint(loop_exit_block); |
| } |
| |
| void SlotDescriptor::CodegenWriteCollectionItemLoopBody(LlvmCodeGen* codegen, |
| LlvmBuilder* builder, llvm::Value* collection_value_ptr, llvm::Value* num_tuples, |
| llvm::Value* item_index, llvm::Function* fn, |
| const NonWritableBasicBlock& insert_before, llvm::Value* pool_val) const { |
| DCHECK(pool_val != nullptr); |
| const TupleDescriptor* children_tuple_desc = children_tuple_descriptor(); |
| DCHECK(children_tuple_desc != nullptr); |
| |
| llvm::Type* children_tuple_struct_type = children_tuple_desc->GetLlvmStruct(codegen); |
| DCHECK(children_tuple_struct_type != nullptr); |
| llvm::PointerType* children_tuple_type = codegen->GetPtrType( |
| children_tuple_struct_type); |
| |
| llvm::Value* children_tuple_array = builder->CreateBitCast(collection_value_ptr, |
| children_tuple_type, "children_tuple_array"); |
| llvm::Value* children_tuple = builder->CreateInBoundsGEP(children_tuple_array, |
| item_index, "children_tuple"); |
| |
| CodegenWriteCollectionIterateOverChildren(codegen, builder, children_tuple, |
| children_tuple, fn, insert_before, pool_val); |
| } |
| |
| void SlotDescriptor::CodegenWriteCollectionIterateOverChildren(LlvmCodeGen* codegen, |
| LlvmBuilder* builder, llvm::Value* master_tuple, llvm::Value* children_tuple, |
| llvm::Function* fn, const NonWritableBasicBlock& insert_before, |
| llvm::Value* pool_val) const { |
| DCHECK(pool_val != nullptr); |
| const TupleDescriptor* children_tuple_desc = children_tuple_descriptor(); |
| DCHECK(children_tuple_desc != nullptr); |
| |
| for (const SlotDescriptor* child_slot_desc : children_tuple_desc->slots()) { |
| DCHECK(child_slot_desc != nullptr); |
| |
| const ColumnType& child_type = child_slot_desc->type(); |
| if (child_type.IsVarLenStringType() || child_type.IsCollectionType()) { |
| child_slot_desc->CodegenWriteCollectionVarlenChild(codegen, builder, master_tuple, |
| children_tuple, fn, insert_before, pool_val); |
| } else if (child_type.IsStructType()) { |
| child_slot_desc->CodegenWriteCollectionStructChild(codegen, builder, |
| master_tuple, children_tuple, fn, insert_before, pool_val); |
| } |
| } |
| } |
| |
| void SlotDescriptor::CodegenWriteCollectionStructChild(LlvmCodeGen* codegen, |
| LlvmBuilder* builder, llvm::Value* master_tuple, llvm::Value* tuple, |
| llvm::Function* fn, const NonWritableBasicBlock& insert_before, |
| llvm::Value* pool_val) const { |
| DCHECK(type().IsStructType()); |
| |
| const TupleDescriptor* children_tuple_desc = children_tuple_descriptor(); |
| DCHECK(children_tuple_desc != nullptr); |
| |
| llvm::Value* children_tuple = builder->CreateStructGEP(nullptr, tuple, |
| llvm_field_idx(), "struct_children_tuple"); |
| |
| // TODO IMPALA-12775: Check whether the struct itself is NULL. |
| CodegenWriteCollectionIterateOverChildren(codegen, builder, master_tuple, |
| children_tuple, fn, insert_before, pool_val); |
| } |
| |
| void SlotDescriptor::CodegenWriteCollectionVarlenChild(LlvmCodeGen* codegen, |
| LlvmBuilder* builder, llvm::Value* master_tuple, llvm::Value* children_tuple, |
| llvm::Function* fn, const NonWritableBasicBlock& insert_before, |
| llvm::Value* pool_val) const { |
| DCHECK(pool_val != nullptr); |
| DCHECK(type_.IsVarLenStringType() || type_.IsCollectionType()); |
| |
| llvm::BasicBlock* child_non_null_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "child_non_null_block", fn); |
| llvm::BasicBlock* child_written_block = insert_before.CreateBasicBlockBefore( |
| codegen->context(), "next_block_after_child_is_written", fn); |
| |
| llvm::Value* child_is_null = CodegenIsNull(codegen, builder, master_tuple); |
| builder->CreateCondBr(child_is_null, child_written_block, child_non_null_block); |
| |
| // Note: Although the input of CodegenWriteStringOrCollectionToSlot() is a '*Val', not a |
| // '*Value', the items of a collection are still '*Value' objects, because the pointer |
| // of the collection points to an array of tuples (the items). 'StringValue' has Small |
| // String Optimisation, but smallness is not preserved here: even if the 'StringValue' |
| // was originally small, the new copy will be a long string. |
| builder->SetInsertPoint(child_non_null_block); |
| llvm::Value* child_str_or_coll_value_slot = builder->CreateStructGEP(nullptr, |
| children_tuple, llvm_field_idx(), "child_str_or_coll_value_addr"); |
| llvm::Value* child_str_or_coll_value_ptr = CodegenStrOrCollValueGetPtr(codegen, builder, |
| child_str_or_coll_value_slot, "child_str_or_coll_value_ptr"); |
| llvm::Value* child_str_or_coll_value_len = CodegenStrOrCollValueGetLen(codegen, builder, |
| child_str_or_coll_value_slot, "child_str_or_coll_value_len"); |
| |
| CodegenAnyValReadWriteInfo child_rwi(codegen, builder, type()); |
| child_rwi.SetPtrAndLen(child_str_or_coll_value_ptr, child_str_or_coll_value_len); |
| |
| CodegenWriteStringOrCollectionToSlot(child_rwi, child_str_or_coll_value_slot, pool_val, |
| this, insert_before); |
| builder->CreateBr(child_written_block); |
| builder->SetInsertPoint(child_written_block); |
| } |
| |
| llvm::Value* SlotDescriptor::CodegenToTimestampValue( |
| const CodegenAnyValReadWriteInfo& read_write_info) { |
| const ColumnType& type = read_write_info.type(); |
| DCHECK_EQ(type.type, TYPE_TIMESTAMP); |
| // Convert TimestampVal to TimestampValue |
| // TimestampValue has type |
| // { boost::posix_time::time_duration, boost::gregorian::date } |
| // = { {{{i64}}}, {{i32}} } |
| llvm::Type* raw_type = read_write_info.codegen()->GetSlotType(type); |
| llvm::Value* timestamp_value = llvm::Constant::getNullValue(raw_type); |
| uint32_t time_of_day_idxs[] = {0, 0, 0, 0}; |
| |
| LlvmBuilder* builder = read_write_info.builder(); |
| timestamp_value = builder->CreateInsertValue( |
| timestamp_value, read_write_info.GetTimeAndDate().time_of_day, time_of_day_idxs); |
| uint32_t date_idxs[] = {1, 0, 0}; |
| timestamp_value = builder->CreateInsertValue( |
| timestamp_value, read_write_info.GetTimeAndDate().date, date_idxs); |
| return timestamp_value; |
| } |
| |
| vector<SlotDescriptor*> TupleDescriptor::SlotsOrderedByIdx() const { |
| vector<SlotDescriptor*> sorted_slots(slots().size()); |
| for (SlotDescriptor* slot: slots()) sorted_slots[slot->slot_idx_] = slot; |
| // Check that the size of sorted_slots has not changed. This ensures that the series |
| // of slot indexes starts at 0 and increases by 1 for each slot. This also ensures that |
| // the returned vector has no nullptr elements. |
| DCHECK_EQ(slots().size(), sorted_slots.size()); |
| return sorted_slots; |
| } |
| |
| llvm::StructType* TupleDescriptor::GetLlvmStruct(LlvmCodeGen* codegen) const { |
| int curr_struct_offset = 0; |
| auto struct_fields_and_offset = GetLlvmTypesAndOffset(codegen, curr_struct_offset); |
| vector<llvm::Type*> struct_fields = struct_fields_and_offset.first; |
| curr_struct_offset = struct_fields_and_offset.second; |
| |
| // For each null byte, add a byte to the struct |
| for (int i = 0; i < num_null_bytes_; ++i) { |
| struct_fields.push_back(codegen->i8_type()); |
| ++curr_struct_offset; |
| } |
| |
| DCHECK_LE(curr_struct_offset, byte_size_); |
| if (curr_struct_offset < byte_size_) { |
| struct_fields.push_back(llvm::ArrayType::get(codegen->i8_type(), |
| byte_size_ - curr_struct_offset)); |
| } |
| |
| return CreateLlvmStructTypeFromFieldTypes(codegen, struct_fields, 0); |
| } |
| |
| pair<vector<llvm::Type*>, int> TupleDescriptor::GetLlvmTypesAndOffset( |
| LlvmCodeGen* codegen, int curr_struct_offset) const { |
| // Get slots in the order they will appear in LLVM struct. |
| vector<SlotDescriptor*> sorted_slots = SlotsOrderedByIdx(); |
| |
| // Add the slot types to the struct description. |
| vector<llvm::Type*> struct_fields; |
| for (SlotDescriptor* slot: sorted_slots) { |
| DCHECK_EQ(curr_struct_offset, slot->tuple_offset()); |
| if (slot->type().IsStructType()) { |
| const int slot_offset = slot->tuple_offset(); |
| const TupleDescriptor* children_tuple = slot->children_tuple_descriptor(); |
| DCHECK(children_tuple != nullptr); |
| vector<llvm::Type*> child_field_types = children_tuple->GetLlvmTypesAndOffset( |
| codegen, curr_struct_offset).first; |
| llvm::StructType* struct_type = children_tuple->CreateLlvmStructTypeFromFieldTypes( |
| codegen, child_field_types, slot_offset); |
| struct_fields.push_back(struct_type); |
| } else { |
| struct_fields.push_back(codegen->GetSlotType(slot->type())); |
| } |
| curr_struct_offset = slot->tuple_offset() + slot->slot_size(); |
| } |
| return make_pair(struct_fields, curr_struct_offset); |
| } |
| |
| llvm::StructType* TupleDescriptor::CreateLlvmStructTypeFromFieldTypes( |
| LlvmCodeGen* codegen, const vector<llvm::Type*>& field_types, |
| int parent_slot_offset) const { |
| // Construct the struct type. Use the packed layout although not strictly necessary |
| // because the fields are already aligned, so LLVM should not add any padding. The |
| // fields are already aligned because we order the slots by descending size and only |
| // have powers-of-two slot sizes. Note that STRING and TIMESTAMP slots both occupy |
| // 16 bytes although their useful payload is only 12 bytes. |
| llvm::StructType* tuple_struct = llvm::StructType::get(codegen->context(), |
| llvm::ArrayRef<llvm::Type*>(field_types), true); |
| DCHECK(tuple_struct != nullptr); |
| const llvm::DataLayout& data_layout = codegen->execution_engine()->getDataLayout(); |
| const llvm::StructLayout* layout = data_layout.getStructLayout(tuple_struct); |
| for (SlotDescriptor* slot: slots()) { |
| // Verify that the byte offset in the llvm struct matches the tuple offset |
| // computed in the FE. |
| DCHECK_EQ(layout->getElementOffset(slot->llvm_field_idx()) + parent_slot_offset, |
| slot->tuple_offset()) << id_; |
| } |
| return tuple_struct; |
| } |
| |
| string DescriptorTbl::DebugString() const { |
| stringstream out; |
| out << "tuples:\n"; |
| for (TupleDescriptorMap::const_iterator i = tuple_desc_map_.begin(); |
| i != tuple_desc_map_.end(); ++i) { |
| out << i->second->DebugString() << '\n'; |
| } |
| return out.str(); |
| } |
| |
| std::ostream& operator<<(std::ostream& out, |
| const TDescriptorTableSerialized& serial_tbl) { |
| out << "TDescriptorTableSerialized("; |
| TDescriptorTable desc_tbl; |
| if (DescriptorTbl::DeserializeThrift(serial_tbl, &desc_tbl).ok()) { |
| out << desc_tbl; |
| } else { |
| const uint8_t* p = |
| reinterpret_cast<const uint8_t*>(serial_tbl.thrift_desc_tbl.data()); |
| const uint8_t* const end = p + serial_tbl.thrift_desc_tbl.length(); |
| while (p != end) { |
| out << ios::hex << (int)*p++; |
| } |
| } |
| out << ")"; |
| return out; |
| } |
| |
| } |
| |