blob: 0ef1014efa60e2f1eb16e85f16382c7b6d9ab6ab [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <string>
#include <vector>
#include "olap/key_coder.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/segment_writer.h"
#include "util/key_util.h"
#include "util/slice.h"
#include "vec/common/string_view.h" // hex_dump
#include "vec/olap/olap_data_convertor.h"
namespace doris {
namespace segment_v2 {
using namespace doris::vectorized;
auto create_string_accessor(const std::vector<std::string>& str) {
ColumnString::MutablePtr column = ColumnString::create();
// ASSERT_TRUE(!str.empty());
for (auto& s : str) column->insert_value(s);
DataTypePtr data_type =
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_VARCHAR, 0, 0);
ColumnWithTypeAndName typed_column(column->get_ptr(), data_type, "test_string_column");
// Create a VARCHAR convertor, a convertor is an accessor
auto convertor =
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorVarChar>(false);
convertor->set_source_column(typed_column, 0, str.size()); // row_pos=0, num_rows=str.size()
// Convert to OLAP format
auto status = convertor->convert_to_olap();
EXPECT_TRUE(status.ok());
if (status.ok()) {
// Get the converted data
const void* data = convertor->get_data_at(0);
// const UInt8* nullmap = convertor->get_nullmap();
std::cout << ((StringRef*)data)->to_string() << std::endl;
std::cout << column->get_data_at(0) << std::endl;
// Use the converted data as needed
}
return convertor;
}
auto create_int_accessor(const std::vector<PrimitiveTypeTraits<TYPE_BIGINT>::CppType>& values) {
// ASSERT_TRUE(!values.empty());
auto column = ColumnInt64::create();
for (auto value : values) column->insert_value(value);
DataTypePtr data_type = DataTypeFactory::instance().create_data_type(TYPE_INT, 0, 0);
ColumnWithTypeAndName typed_column(column->get_ptr(), data_type, "test_int_column");
auto convertor =
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorSimple<TYPE_BIGINT>>();
convertor->set_source_column(typed_column, 0,
values.size()); // row_pos=0, num_rows=values.size()
auto status = convertor->convert_to_olap();
EXPECT_TRUE(status.ok());
return convertor;
}
TEST(SegmentWriterFullEncodeKeysTest, TestSegmentWriterKeyEncoding) {
// 2 rows of key columns(int,string,string), expect encode bytes of row1 < row2
// 0x05050505, a, bb
// 0x05050505, a\x01, cc
// however the ending byte of 2nd row is \x01 (smaller than KEY_NORMAL_MARKER)
// will be in reversed order after encoding
auto int_accessor = create_int_accessor({0x05050505, 0x05050505});
auto str_accessor0 = create_string_accessor({"a", "a\x01"});
auto str_accessor1 = create_string_accessor({"bb", "cc"});
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns = {
int_accessor.get(), str_accessor0.get(), str_accessor1.get()};
auto int_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_INT);
auto str_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_VARCHAR);
std::vector<const KeyCoder*> key_coders = {int_coder, str_coder, str_coder};
////////////////////////////////////////////////////////////////////////////
std::string encoded0 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 0);
std::string encoded1 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 1);
////////////////////////////////////////////////////////////////////////////
std::cout << StringView(encoded0).dump_hex() << std::endl; // X'02850505050261026262'
std::cout << StringView(encoded1).dump_hex() << std::endl; // X'0285050505026101026363'
// EXPECT_LT(encoded0, encoded1); // BANG! not satisfied
}
} // namespace segment_v2
} // namespace doris