blob: 95521b0ce07d8f0b07cdb9404ba740335dcfdb79 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <fmt/format.h>
#include <glog/logging.h>
#include <stdint.h>
#include <time.h>
#include <algorithm>
#include <memory>
#include <ostream>
#include <random>
#include <string>
#include <utility>
#include "common/status.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
class FunctionContext;
} // namespace doris
namespace doris::vectorized {
#include "common/compile_check_begin.h"
class FunctionArrayShuffle : public IFunction {
public:
static constexpr auto name = "array_shuffle";
static FunctionPtr create() { return std::make_shared<FunctionArrayShuffle>(); }
/// Get function name.
String get_name() const override { return name; }
bool is_variadic() const override { return true; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
<< "first argument for function: " << name << " should be DataTypeArray"
<< " and arguments[0] is " << arguments[0]->get_name();
return arguments[0];
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
ColumnPtr src_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto& src_column_array = assert_cast<const ColumnArray&>(*src_column);
size_t seed = time(nullptr);
if (arguments.size() == 2) {
ColumnPtr seed_column =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
seed = assert_cast<const ColumnInt64*>(seed_column.get())->get_element(0);
}
// time() and seed will not exceed the range of uint32.
std::mt19937 g(cast_set<uint32_t>(seed));
auto dest_column_ptr = _execute(src_column_array, g);
if (!dest_column_ptr) {
return Status::RuntimeError(
fmt::format("execute failed or unsupported types for function {}({})",
get_name(), block.get_by_position(arguments[0]).type->get_name()));
}
block.replace_by_position(result, std::move(dest_column_ptr));
return Status::OK();
}
private:
ColumnPtr _execute(const ColumnArray& src_column_array, std::mt19937& g) const {
const auto& src_offsets = src_column_array.get_offsets();
const auto src_nested_column = src_column_array.get_data_ptr();
ColumnArray::Offset64 src_offsets_size = src_offsets.size();
IColumn::Permutation permutation(src_nested_column->size());
for (size_t i = 0; i < src_nested_column->size(); ++i) {
permutation[i] = i;
}
for (size_t i = 0; i < src_offsets_size; ++i) {
auto last_offset = src_offsets[i - 1];
auto src_offset = src_offsets[i];
std::shuffle(&permutation[last_offset], &permutation[src_offset], g);
}
return ColumnArray::create(src_nested_column->permute(permutation, 0),
src_column_array.get_offsets_ptr());
}
};
void register_function_array_shuffle(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayShuffle>();
factory.register_alias("array_shuffle", "shuffle");
}
} // namespace doris::vectorized