blob: e56d845c38b522f04a7cadf2525c613258e42c05 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include "cast_to_decimal.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "util/string_parser.hpp"
#include "vec/columns/column_vector.h"
#include "vec/columns/common_column_test.h"
#include "vec/core/field.h"
namespace doris::vectorized {
struct FunctionCastToDecimalPerfTest : public FunctionCastTest {
template <PrimitiveType FromPT, PrimitiveType ToPT>
void perf_test_func(const std::string& test_data_file, size_t orig_line_count,
int from_precision, int from_scale, int to_precision, int to_scale,
bool nullable) {
DataTypePtr dt_from = DataTypeFactory::instance().create_data_type(
FromPT, false, from_precision, from_scale);
auto column_orig = dt_from->create_column();
{
MutableColumns columns;
columns.push_back(column_orig->get_ptr());
DataTypeSerDeSPtrs serde = {dt_from->get_serde()};
load_columns_data_from_file(columns, serde, ';', {0}, test_data_file);
EXPECT_EQ(column_orig->size(), orig_line_count);
}
auto column_from = dt_from->create_column();
for (int i = 0; i < 10; ++i) {
column_from->insert_range_from(*column_orig, 0, column_orig->size());
}
EXPECT_EQ(column_from->size(), orig_line_count * 10);
std::cout << "column_from size: " << column_from->size() << std::endl;
DataTypePtr dt_to =
DataTypeFactory::instance().create_data_type(ToPT, true, to_precision, to_scale);
bool is_strict_mode = false;
auto ctx = create_context(is_strict_mode);
auto fn = get_cast_wrapper(ctx.get(), dt_from, dt_to);
ASSERT_TRUE(fn != nullptr);
Block block = {
{std::move(column_from), dt_from, "from"},
{nullptr, dt_to, "to"},
};
int64_t duration_ns = 0;
{
SCOPED_RAW_TIMER(&duration_ns);
EXPECT_TRUE(fn(ctx.get(), block, {0}, 1, block.rows(), nullptr));
}
auto result = block.get_by_position(1).column;
EXPECT_EQ(result->size(), orig_line_count * 10);
std::cout << block.dump_data(0, 10) << "\n";
std::cout << fmt::format("cast {} to {} time {}\n", type_to_string(FromPT),
type_to_string(ToPT),
PrettyPrinter::print(duration_ns, TUnit::TIME_NS));
}
};
// Optimized version with reserve for better performance
void read_file_to_vector_optimized(const std::string& filename, std::vector<std::string>& lines) {
std::ifstream file(filename);
if (!file.is_open()) {
std::cerr << "Error: Could not open file " << filename << std::endl;
return;
}
// Get file size to estimate number of lines for better memory allocation
file.seekg(0, std::ios::end);
std::streamsize file_size = file.tellg();
file.seekg(0, std::ios::beg);
// Rough estimate: average line length of 50 characters
lines.reserve(file_size / 50);
std::string line;
while (std::getline(file, line)) {
lines.push_back(std::move(line)); // Move instead of copy
}
}
/*
TEST_F(FunctionCastToDecimalPerfTest, test_to_decimal128v3_from_string_perf) {
std::vector<std::string> file_contents;
read_file_to_vector_optimized("/mnt/disk2/tengjianping/cast_perf/decimal_test_data_38_19.txt",
file_contents);
size_t line_count = file_contents.size();
std::cout << "Read " << line_count << " lines" << std::endl;
// Show first few lines
for (size_t i = 0; i < std::min(size_t(5), file_contents.size()); ++i) {
std::cout << "Line " << i + 1 << ": " << file_contents[i] << std::endl;
}
int64_t duration_ns = 0;
StringParser::ParseResult result;
{
SCOPED_RAW_TIMER(&duration_ns);
for (size_t i = 0; i != line_count; ++i) {
auto decimal_value = StringParser::string_to_decimal<PrimitiveType::TYPE_DECIMAL128I>(
file_contents[i].c_str(), file_contents[i].size(), 38, 19, &result);
(void)decimal_value; // Use the value to avoid unused variable warning
}
}
std::cout << "parse time: " << PrettyPrinter::print(duration_ns, TUnit::TIME_NS) << "\n";
}
TEST_F(FunctionCastToDecimalPerfTest, test_to_decimal128v3_from_decimal128v3_perf) {
std::string test_data_file = "/mnt/disk2/tengjianping/cast_perf/decimal_test_data_38_19.txt";
size_t orig_line_count = 10000000; // 10 million lines
perf_test_func<PrimitiveType::TYPE_DECIMAL128I, PrimitiveType::TYPE_DECIMAL128I>(
test_data_file, orig_line_count, 38, 19, 38, 18, false);
}
TEST_F(FunctionCastToDecimalPerfTest, test_to_decimal128v3_from_decimalv2_perf) {
std::string test_data_file = "/mnt/disk2/tengjianping/cast_perf/decimalv2_test_data_27_9.txt";
size_t orig_line_count = 10000000; // 10 million lines
perf_test_func<PrimitiveType::TYPE_DECIMAL, PrimitiveType::TYPE_DECIMAL128I>(
test_data_file, orig_line_count, 27, 9, 38, 18, false);
}
TEST_F(FunctionCastToDecimalPerfTest, test_to_decimal128v3_from_int_perf) {
std::string test_data_file = "/mnt/disk2/tengjianping/cast_perf/decimal_test_data_38_19.txt";
size_t orig_line_count = 10000000; // 10 million lines
perf_test_func<PrimitiveType::TYPE_LARGEINT, PrimitiveType::TYPE_DECIMAL128I>(
test_data_file, orig_line_count, 0, 0, 38, 19, false);
}
TEST_F(FunctionCastToDecimalPerfTest, test_to_decimal128v3_from_float_perf) {
std::string test_data_file = "/mnt/disk2/tengjianping/cast_perf/decimal_test_data_38_19.txt";
size_t orig_line_count = 10000000; // 10 million lines
perf_test_func<PrimitiveType::TYPE_FLOAT, PrimitiveType::TYPE_DECIMAL128I>(
test_data_file, orig_line_count, 0, 0, 38, 18, true);
}
TEST_F(FunctionCastToDecimalPerfTest, test_to_largeint_from_decimal128v3_perf) {
std::string test_data_file = "/mnt/disk2/tengjianping/cast_perf/decimal_test_data_38_19.txt";
size_t orig_line_count = 10000000; // 10 million lines
perf_test_func<PrimitiveType::TYPE_DECIMAL128I, PrimitiveType::TYPE_LARGEINT>(
test_data_file, orig_line_count, 38, 19, 0, 0, true);
}
*/
} // namespace doris::vectorized