blob: d0888d4622c43ee7778dfa68fa1fe404318781c7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <type_traits>
#include "runtime/primitive_type.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_decimal.h"
#include "vec/columns/column_string.h"
#include "vec/data_types/data_type_array.h"
#include "vec/functions/array/function_array_utils.h"
#include "vec/functions/function_helpers.h"
namespace doris::vectorized {
enum class SetOperation { UNION, EXCEPT };
template <typename Set, typename Element>
struct UnionAction;
template <typename Set, typename Element>
struct ExceptAction;
template <typename Set, typename Element, SetOperation operation>
struct ActionImpl;
template <typename Set, typename Element>
struct ActionImpl<Set, Element, SetOperation::EXCEPT> {
using Action = ExceptAction<Set, Element>;
};
template <SetOperation operation, typename ColumnType>
struct OpenSetImpl {
using Element = typename ColumnType::value_type;
using ElementNativeType = typename NativeType<Element>::Type;
using Set = phmap::flat_hash_set<ElementNativeType>;
using Action = typename ActionImpl<Set, Element, operation>::Action;
Action action;
Set set;
Set result_set;
void reset() {
set.clear();
result_set.clear();
action.reset();
}
template <bool is_left>
void apply(const ColumnArrayExecutionData& src, size_t off, size_t len,
ColumnArrayMutableData& dst, size_t* count) {
const auto& src_data = assert_cast<const ColumnType&>(*src.nested_col).get_data();
auto& dst_data = assert_cast<ColumnType&>(*dst.nested_col).get_data();
for (size_t i = off; i < off + len; ++i) {
if (src.nested_nullmap_data && src.nested_nullmap_data[i]) {
if (action.template apply_null<is_left>()) {
dst_data.push_back(Element());
dst.nested_nullmap_data->push_back(1);
++(*count);
}
} else {
if (action.template apply<is_left>(set, result_set, src_data[i])) {
dst_data.push_back(src_data[i]);
if (dst.nested_nullmap_data) {
dst.nested_nullmap_data->push_back(0);
}
++(*count);
}
}
}
}
};
template <SetOperation operation>
struct OpenSetImpl<operation, ColumnString> {
using Set = phmap::flat_hash_set<StringRef>;
using Action = typename ActionImpl<Set, StringRef, operation>::Action;
Action action;
Set set;
Set result_set;
void reset() {
set.clear();
result_set.clear();
action.reset();
}
template <bool is_left>
void apply(const ColumnArrayExecutionData& src, size_t off, size_t len,
ColumnArrayMutableData& dst, size_t* count) {
const auto& src_column = assert_cast<const ColumnString&>(*src.nested_col);
auto& dst_column = assert_cast<ColumnString&>(*dst.nested_col);
for (size_t i = off; i < off + len; ++i) {
if (src.nested_nullmap_data && src.nested_nullmap_data[i]) {
if (action.template apply_null<is_left>()) {
dst_column.insert_default();
dst.nested_nullmap_data->push_back(1);
++(*count);
}
} else {
if (action.template apply<is_left>(set, result_set, src_column.get_data_at(i))) {
dst_column.insert_from(src_column, i);
if (dst.nested_nullmap_data) {
dst.nested_nullmap_data->push_back(0);
}
++(*count);
}
}
}
}
};
template <SetOperation operation>
struct ArraySetImpl {
public:
static DataTypePtr get_return_type(const DataTypes& arguments) {
DataTypePtr res;
// if any nested type of array arguments is nullable then return array with
// nullable nested type.
for (const auto& arg : arguments) {
const DataTypeArray* array_type = check_and_get_data_type<DataTypeArray>(arg.get());
if (array_type->get_nested_type()->is_nullable()) {
res = arg;
break;
}
}
res = res ? res : arguments[0];
return res;
}
static Status execute(ColumnPtr& res_ptr, const ColumnArrayExecutionData& left_data,
const ColumnArrayExecutionData& right_data, bool left_const,
bool right_const) {
ColumnArrayMutableData dst;
if (left_data.nested_nullmap_data || right_data.nested_nullmap_data) {
dst = create_mutable_data(left_data.nested_col.get(), true);
} else {
dst = create_mutable_data(left_data.nested_col.get(), false);
}
ColumnPtr res_column;
if (left_const) {
if (_execute_internal<true, false, ALL_COLUMNS_SIMPLE>(dst, left_data, right_data)) {
res_column = assemble_column_array(dst);
}
} else if (right_const) {
if (_execute_internal<false, true, ALL_COLUMNS_SIMPLE>(dst, left_data, right_data)) {
res_column = assemble_column_array(dst);
}
} else {
if (_execute_internal<false, false, ALL_COLUMNS_SIMPLE>(dst, left_data, right_data)) {
res_column = assemble_column_array(dst);
}
}
if (res_column) {
res_ptr = std::move(res_column);
return Status::OK();
}
return Status::RuntimeError("Unexpected columns: {}, {}", left_data.nested_col->get_name(),
right_data.nested_col->get_name());
}
private:
template <bool LCONST, bool RCONST, typename ColumnType>
static bool _execute_internal(ColumnArrayMutableData& dst,
const ColumnArrayExecutionData& left_data,
const ColumnArrayExecutionData& right_data) {
using Impl = OpenSetImpl<operation, ColumnType>;
if (!is_column<ColumnType>(*left_data.nested_col)) {
return false;
}
constexpr auto execute_left_column_first = Impl::Action::execute_left_column_first;
size_t current = 0;
Impl impl;
size_t row_size = left_data.offsets_ptr->size();
if constexpr (LCONST) {
row_size = right_data.offsets_ptr->size();
}
for (size_t row = 0; row < row_size; ++row) {
size_t count = 0;
size_t left_off = (*left_data.offsets_ptr)[index_check_const(row, LCONST) - 1];
size_t left_len = (*left_data.offsets_ptr)[index_check_const(row, LCONST)] - left_off;
size_t right_off = (*right_data.offsets_ptr)[index_check_const(row, RCONST) - 1];
size_t right_len =
(*right_data.offsets_ptr)[index_check_const(row, RCONST)] - right_off;
if constexpr (execute_left_column_first) {
impl.template apply<true>(left_data, left_off, left_len, dst, &count);
impl.template apply<false>(right_data, right_off, right_len, dst, &count);
} else {
impl.template apply<false>(right_data, right_off, right_len, dst, &count);
impl.template apply<true>(left_data, left_off, left_len, dst, &count);
}
current += count;
dst.offsets_ptr->push_back(current);
impl.reset();
}
return true;
}
template <bool LCONST, bool RCONST, typename T, typename... Ts>
requires(sizeof...(Ts) > 0)
static bool _execute_internal(ColumnArrayMutableData& dst,
const ColumnArrayExecutionData& left_data,
const ColumnArrayExecutionData& right_data) {
return _execute_internal<LCONST, RCONST, T>(dst, left_data, right_data) ||
_execute_internal<LCONST, RCONST, Ts...>(dst, left_data, right_data);
}
};
} // namespace doris::vectorized