blob: 5e2f2858c0f452a10b98b3ea722ed0152977a3a6 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <stddef.h>
#include <stdint.h>
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <memory>
#include <utility>
#include "common/status.h"
#include "runtime/primitive_type.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_vector.h"
#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
class FunctionContext;
} // namespace doris
namespace doris::vectorized {
class FunctionWidthBucket : public IFunction {
public:
static constexpr auto name = "width_bucket";
static FunctionPtr create() { return std::make_shared<FunctionWidthBucket>(); }
/// Get function name.
String get_name() const override { return name; }
bool is_variadic() const override { return false; }
size_t get_number_of_arguments() const override { return 4; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeInt64>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
ColumnPtr expr_ptr =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
ColumnPtr min_value_ptr =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
ColumnPtr max_value_ptr =
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
ColumnPtr num_buckets_ptr =
block.get_by_position(arguments[3]).column->convert_to_full_column_if_const();
int64_t num_buckets = num_buckets_ptr->get_int(0);
if (num_buckets <= 0) {
return Status::InternalError(
"The desired number({}) of buckets must be a positive integer value.",
num_buckets);
}
auto nested_column_ptr = ColumnInt64::create(input_rows_count, 0);
DataTypePtr expr_type = block.get_by_position(arguments[0]).type;
if (!_execute_by_type(*expr_ptr, *min_value_ptr, *max_value_ptr, num_buckets,
*nested_column_ptr, expr_type)) {
return Status::InvalidArgument("Unsupported type for width_bucket: {}",
expr_type->get_name());
}
block.replace_by_position(result, std::move(nested_column_ptr));
return Status::OK();
}
private:
template <typename ColumnType>
void _execute(const IColumn& expr_column, const IColumn& min_value_column,
const IColumn& max_value_column, const int64_t num_buckets,
IColumn& nested_column) const {
const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column);
const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column);
const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column);
auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column);
size_t input_rows_count = expr_column.size();
for (size_t i = 0; i < input_rows_count; ++i) {
auto min_value = min_value_column_concrete.get_data()[i];
auto max_value = max_value_column_concrete.get_data()[i];
auto average_value = (max_value - min_value) / (1.0 * num_buckets);
if (expr_column_concrete.get_data()[i] < min_value) {
continue;
} else if (expr_column_concrete.get_data()[i] >= max_value) {
nested_column_concrete.get_data()[i] = num_buckets + 1;
} else {
if ((max_value - min_value) / num_buckets == 0) {
continue;
}
nested_column_concrete.get_data()[i] =
(int64_t)(1 +
(expr_column_concrete.get_data()[i] - min_value) / average_value);
}
}
}
bool _execute_by_type(const IColumn& expr_column, const IColumn& min_value_column,
const IColumn& max_value_column, const int64_t num_buckets,
IColumn& nested_column_column, DataTypePtr& expr_type) const {
switch (expr_type->get_primitive_type()) {
case PrimitiveType::TYPE_TINYINT:
_execute<ColumnInt8>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
case PrimitiveType::TYPE_SMALLINT:
_execute<ColumnInt16>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
case PrimitiveType::TYPE_INT:
_execute<ColumnInt32>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
case PrimitiveType::TYPE_BIGINT:
_execute<ColumnInt64>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
case PrimitiveType::TYPE_FLOAT:
_execute<ColumnFloat32>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
case PrimitiveType::TYPE_DOUBLE:
_execute<ColumnFloat64>(expr_column, min_value_column, max_value_column, num_buckets,
nested_column_column);
break;
default:
return false;
break;
}
return true;
}
};
void register_function_width_bucket(SimpleFunctionFactory& factory) {
factory.register_function<FunctionWidthBucket>();
}
} // namespace doris::vectorized