| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| #include "olap/schema_change.h" |
| |
| #include <gtest/gtest.h> |
| |
| #include "olap/byte_buffer.h" |
| #include "olap/field.h" |
| #include "olap/olap_common.h" |
| #include "olap/olap_define.h" |
| #include "olap/row_block.h" |
| #include "olap/row_cursor.h" |
| #include "olap/rowset/column_reader.h" |
| #include "olap/rowset/column_writer.h" |
| #include "olap/stream_name.h" |
| #include "runtime/mem_pool.h" |
| #include "runtime/vectorized_row_batch.h" |
| #include "util/logging.h" |
| |
| using std::string; |
| |
| namespace doris { |
| |
| class TestColumn : public testing::Test { |
| public: |
| TestColumn() : _column_writer(NULL), _column_reader(NULL), _stream_factory(NULL) { |
| _offsets.clear(); |
| _map_in_streams.clear(); |
| _present_buffers.clear(); |
| _data_buffers.clear(); |
| _second_buffers.clear(); |
| _dictionary_buffers.clear(); |
| _length_buffers.clear(); |
| _mem_tracker.reset(new MemTracker(-1)); |
| _mem_pool.reset(new MemPool(_mem_tracker.get())); |
| } |
| |
| virtual ~TestColumn() { |
| SAFE_DELETE(_column_writer); |
| SAFE_DELETE(_column_reader); |
| SAFE_DELETE(_stream_factory); |
| } |
| |
| virtual void SetUp() { |
| _offsets.push_back(0); |
| _stream_factory = new (std::nothrow) |
| OutStreamFactory(COMPRESS_LZ4, OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE); |
| ASSERT_TRUE(_stream_factory != NULL); |
| config::column_dictionary_key_ratio_threshold = 30; |
| config::column_dictionary_key_size_threshold = 1000; |
| } |
| |
| virtual void TearDown() { |
| SAFE_DELETE(_column_writer); |
| SAFE_DELETE(_column_reader); |
| SAFE_DELETE(_stream_factory); |
| SAFE_DELETE(_shared_buffer); |
| |
| _offsets.clear(); |
| for (auto in_stream : _map_in_streams) { |
| delete in_stream.second; |
| } |
| _map_in_streams.clear(); |
| _present_buffers.clear(); |
| _data_buffers.clear(); |
| _second_buffers.clear(); |
| _dictionary_buffers.clear(); |
| _length_buffers.clear(); |
| } |
| |
| void CreateColumnWriter(const TabletSchema& tablet_schema) { |
| _column_writer = ColumnWriter::create(0, tablet_schema, _stream_factory, 1024, |
| BLOOM_FILTER_DEFAULT_FPP); |
| ASSERT_TRUE(_column_writer != NULL); |
| ASSERT_EQ(_column_writer->init(), OLAP_SUCCESS); |
| } |
| |
| void CreateColumnReader(const TabletSchema& tablet_schema) { |
| UniqueIdEncodingMap encodings; |
| encodings[0] = ColumnEncodingMessage(); |
| encodings[0].set_kind(ColumnEncodingMessage::DIRECT); |
| encodings[0].set_dictionary_size(1); |
| CreateColumnReader(tablet_schema, encodings); |
| } |
| |
| void CreateColumnReader(const TabletSchema& tablet_schema, UniqueIdEncodingMap& encodings) { |
| UniqueIdToColumnIdMap included; |
| included[0] = 0; |
| UniqueIdToColumnIdMap segment_included; |
| segment_included[0] = 0; |
| |
| SAFE_DELETE(_column_reader); |
| _column_reader = |
| ColumnReader::create(0, tablet_schema, included, segment_included, encodings); |
| |
| ASSERT_TRUE(_column_reader != NULL); |
| |
| system("mkdir -p ./ut_dir"); |
| system("rm ./ut_dir/tmp_file"); |
| |
| ASSERT_EQ(OLAP_SUCCESS, |
| helper.open_with_mode("./ut_dir/tmp_file", O_CREAT | O_EXCL | O_WRONLY, |
| S_IRUSR | S_IWUSR)); |
| std::vector<int> off; |
| std::vector<int> length; |
| std::vector<int> buffer_size; |
| std::vector<StreamName> name; |
| |
| std::map<StreamName, OutStream*>::const_iterator it = _stream_factory->streams().begin(); |
| for (; it != _stream_factory->streams().end(); ++it) { |
| StreamName stream_name = it->first; |
| OutStream* out_stream = it->second; |
| std::vector<StorageByteBuffer*>* buffers; |
| |
| if (out_stream->is_suppressed()) { |
| continue; |
| } |
| if (stream_name.kind() == StreamInfoMessage::ROW_INDEX) { |
| continue; |
| } else if (stream_name.kind() == StreamInfoMessage::PRESENT) { |
| buffers = &_present_buffers; |
| } else if (stream_name.kind() == StreamInfoMessage::DATA) { |
| buffers = &_data_buffers; |
| } else if (stream_name.kind() == StreamInfoMessage::SECONDARY) { |
| buffers = &_second_buffers; |
| } else if (stream_name.kind() == StreamInfoMessage::DICTIONARY_DATA) { |
| buffers = &_dictionary_buffers; |
| } else if (stream_name.kind() == StreamInfoMessage::LENGTH) { |
| buffers = &_length_buffers; |
| } else { |
| ASSERT_TRUE(false); |
| } |
| |
| ASSERT_TRUE(buffers != NULL); |
| off.push_back(helper.tell()); |
| out_stream->write_to_file(&helper, 0); |
| length.push_back(out_stream->get_stream_length()); |
| buffer_size.push_back(out_stream->get_total_buffer_size()); |
| name.push_back(stream_name); |
| } |
| helper.close(); |
| |
| ASSERT_EQ(OLAP_SUCCESS, |
| helper.open_with_mode("./ut_dir/tmp_file", O_RDONLY, S_IRUSR | S_IWUSR)); |
| |
| SAFE_DELETE(_shared_buffer); |
| _shared_buffer = StorageByteBuffer::create(OLAP_DEFAULT_COLUMN_STREAM_BUFFER_SIZE + |
| sizeof(StreamHead)); |
| ASSERT_TRUE(_shared_buffer != NULL); |
| |
| for (auto in_stream : _map_in_streams) { |
| delete in_stream.second; |
| } |
| _map_in_streams.clear(); |
| |
| for (int i = 0; i < off.size(); ++i) { |
| ReadOnlyFileStream* in_stream = new (std::nothrow) |
| ReadOnlyFileStream(&helper, &_shared_buffer, off[i], length[i], lz4_decompress, |
| buffer_size[i], &_stats); |
| ASSERT_EQ(OLAP_SUCCESS, in_stream->init()); |
| _map_in_streams[name[i]] = in_stream; |
| } |
| |
| ASSERT_EQ(_column_reader->init(&_map_in_streams, 1024, _mem_pool.get(), &_stats), |
| OLAP_SUCCESS); |
| } |
| |
| void SetTabletSchema(const std::string& name, const std::string& type, |
| const std::string& aggregation, uint32_t length, bool is_allow_null, |
| bool is_key, TabletSchema* tablet_schema) { |
| TabletSchemaPB tablet_schema_pb; |
| ColumnPB* column = tablet_schema_pb.add_column(); |
| column->set_unique_id(0); |
| column->set_name(name); |
| column->set_type(type); |
| column->set_is_key(is_key); |
| column->set_is_nullable(is_allow_null); |
| column->set_length(length); |
| column->set_aggregation(aggregation); |
| tablet_schema->init_from_pb(tablet_schema_pb); |
| } |
| |
| void create_and_save_last_position() { |
| ASSERT_EQ(_column_writer->create_row_index_entry(), OLAP_SUCCESS); |
| } |
| |
| template <typename T> |
| void test_convert_to_varchar(const std::string& type_name, int type_size, T val, |
| const std::string& expected_val, OLAPStatus expected_st) { |
| TabletSchema src_tablet_schema; |
| SetTabletSchema("ConvertColumn", type_name, "REPLACE", type_size, false, false, |
| &src_tablet_schema); |
| CreateColumnWriter(src_tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(src_tablet_schema); |
| RowBlock block(&src_tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| write_row.set_field_content(0, reinterpret_cast<char*>(&val), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| helper.close(); |
| |
| TabletSchema dst_tablet_schema; |
| SetTabletSchema("VarcharColumn", "VARCHAR", "REPLACE", 255, false, false, |
| &dst_tablet_schema); |
| CreateColumnReader(src_tablet_schema); |
| RowCursor read_row; |
| read_row.init(dst_tablet_schema); |
| |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), |
| _mem_pool.get()); |
| ASSERT_EQ(st, expected_st); |
| if (st == OLAP_SUCCESS) { |
| std::string dst_str = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_TRUE(dst_str.compare(0, expected_val.size(), expected_val) == 0); |
| } |
| |
| TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL); |
| st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get()); |
| ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| void test_convert_from_varchar(const std::string& type_name, int type_size, |
| const std::string& value, OLAPStatus expected_st) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("VarcharColumn", "VARCHAR", "REPLACE", 255, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| Slice normal_str(value); |
| write_row.set_field_content(0, reinterpret_cast<char*>(&normal_str), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| helper.close(); |
| |
| TabletSchema converted_tablet_schema; |
| SetTabletSchema("ConvertColumn", type_name, "REPLACE", type_size, false, false, |
| &converted_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(converted_tablet_schema); |
| |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| auto st = read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), |
| _mem_pool.get()); |
| ASSERT_EQ(st, expected_st); |
| if (st == OLAP_SUCCESS) { |
| std::string dst_str = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_TRUE(dst_str.compare(0, value.size(), value) == 0); |
| } |
| |
| TypeInfo* tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL); |
| st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get()); |
| ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| ColumnWriter* _column_writer; |
| |
| ColumnReader* _column_reader; |
| std::shared_ptr<MemTracker> _mem_tracker; |
| std::unique_ptr<MemPool> _mem_pool; |
| std::unique_ptr<ColumnVector> _col_vector; |
| |
| OutStreamFactory* _stream_factory; |
| |
| std::vector<size_t> _offsets; |
| std::vector<StorageByteBuffer*> _present_buffers; |
| std::vector<StorageByteBuffer*> _data_buffers; |
| std::vector<StorageByteBuffer*> _second_buffers; |
| std::vector<StorageByteBuffer*> _dictionary_buffers; |
| std::vector<StorageByteBuffer*> _length_buffers; |
| StorageByteBuffer* _shared_buffer = nullptr; |
| std::map<StreamName, ReadOnlyFileStream*> _map_in_streams; |
| FileHandler helper; |
| OlapReaderStatistics _stats; |
| }; |
| |
| TEST_F(TestColumn, ConvertFloatToDouble) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("FloatColumn", "FLOAT", "REPLACE", 4, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| float value = 1.234; |
| write_row.set_field_content(0, reinterpret_cast<char*>(&value), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| value = 3.234; |
| write_row.set_field_content(0, reinterpret_cast<char*>(&value), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| // read data |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DoubleColumn", "DOUBLE", "REPLACE", 4, false, false, &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 2, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get()); |
| //float val1 = *reinterpret_cast<float*>(read_row.cell_ptr(0)); |
| double val2 = *reinterpret_cast<double*>(read_row.cell_ptr(0)); |
| |
| char buf[64]; |
| memset(buf, 0, sizeof(buf)); |
| sprintf(buf, "%f", val2); |
| char* tg; |
| double v2 = strtod(buf, &tg); |
| ASSERT_TRUE(v2 == 1.234); |
| |
| //test not support type |
| TypeInfo* tp = get_scalar_type_info(OLAP_FIELD_TYPE_HLL); |
| OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get()); |
| ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertDatetimeToDate) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("DatetimeColumn", "DATETIME", "REPLACE", 8, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| std::vector<std::string> val_string_array; |
| std::string origin_val = "2019-11-25 19:07:00"; |
| val_string_array.emplace_back(origin_val); |
| OlapTuple tuple(val_string_array); |
| write_row.from_tuple(tuple); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| // read data |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DateColumn", "DATE", "REPLACE", 3, false, false, &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get()); |
| std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_TRUE(strncmp(dest_string.c_str(), "2019-11-25", strlen("2019-11-25")) == 0); |
| |
| //test not support type |
| TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL); |
| OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get()); |
| ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertDateToDatetime) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("DateColumn", "DATE", "REPLACE", 3, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| std::vector<std::string> val_string_array; |
| std::string origin_val = "2019-12-04"; |
| val_string_array.emplace_back(origin_val); |
| OlapTuple tuple(val_string_array); |
| write_row.from_tuple(tuple); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| ColumnDataHeaderMessage header_message; |
| ASSERT_EQ(_column_writer->finalize(&header_message), OLAP_SUCCESS); |
| |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DateTimeColumn", "DATETIME", "REPLACE", 8, false, false, |
| &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| read_row.set_field_content(0, data, _mem_pool.get()); |
| read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get()); |
| std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_TRUE(dest_string.compare("2019-12-04 00:00:00") == 0); |
| |
| //test not support type |
| TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL); |
| OLAPStatus st = read_row.convert_from(0, data, tp, _mem_pool.get()); |
| ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertIntToDate) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("IntColumn", "INT", "REPLACE", 4, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| int time_val = 20191205; |
| write_row.set_field_content(0, reinterpret_cast<char*>(&time_val), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DateColumn", "DATE", "REPLACE", 3, false, false, &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get()); |
| std::string dest_string = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_TRUE(strncmp(dest_string.c_str(), "2019-12-05", strlen("2019-12-05")) == 0); |
| |
| //test not support type |
| TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL); |
| OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get()); |
| ASSERT_TRUE(st == OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToDate) { |
| TabletSchema tablet_schema; |
| SetTabletSchema("VarcharColumn", "VARCHAR", "REPLACE", 255, false, false, &tablet_schema); |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| // test valid format convert |
| std::vector<Slice> valid_src_strs = { |
| "2019-12-17", "19-12-17", "20191217", "191217", "2019/12/17", "19/12/17", |
| }; |
| std::string expected_val("2019-12-17"); |
| for (auto src_str : valid_src_strs) { |
| write_row.set_field_content(0, reinterpret_cast<char*>(&src_str), _mem_pool.get()); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| // because file_helper is reused in this case, we should close it. |
| helper.close(); |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DateColumn", "DATE", "REPLACE", 3, false, false, &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| |
| _col_vector.reset(new ColumnVector()); |
| ASSERT_EQ(_column_reader->next_vector(_col_vector.get(), 1, _mem_pool.get()), OLAP_SUCCESS); |
| char* data = reinterpret_cast<char*>(_col_vector->col_data()); |
| read_row.convert_from(0, data, write_row.column_schema(0)->type_info(), _mem_pool.get()); |
| std::string dst_str = read_row.column_schema(0)->to_string(read_row.cell_ptr(0)); |
| ASSERT_EQ(expected_val, dst_str); |
| } |
| helper.close(); |
| TabletSchema convert_tablet_schema; |
| SetTabletSchema("DateColumn", "DATE", "REPLACE", 3, false, false, &convert_tablet_schema); |
| CreateColumnReader(tablet_schema); |
| RowCursor read_row; |
| read_row.init(convert_tablet_schema); |
| |
| //test not support type |
| TypeInfo* tp = get_type_info(OLAP_FIELD_TYPE_HLL); |
| OLAPStatus st = read_row.convert_from(0, read_row.cell_ptr(0), tp, _mem_pool.get()); |
| ASSERT_EQ(st, OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToTinyInt1) { |
| test_convert_from_varchar("TINYINT", 1, "127", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToTinyInt2) { |
| test_convert_from_varchar("TINYINT", 1, "128", OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToSmallInt1) { |
| test_convert_from_varchar("SMALLINT", 2, "32767", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToSmallInt2) { |
| test_convert_from_varchar("SMALLINT", 2, "32768", OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToInt1) { |
| test_convert_from_varchar("INT", 4, "2147483647", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToInt2) { |
| test_convert_from_varchar("INT", 4, "2147483648", OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToBigInt1) { |
| test_convert_from_varchar("BIGINT", 8, "9223372036854775807", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToBigInt2) { |
| test_convert_from_varchar("BIGINT", 8, "9223372036854775808", OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToLargeInt1) { |
| test_convert_from_varchar("LARGEINT", 16, "170141183460469000000000000000000000000", |
| OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToLargeInt2) { |
| test_convert_from_varchar("LARGEINT", 16, "1701411834604690000000000000000000000000", |
| OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToFloat1) { |
| test_convert_from_varchar("FLOAT", 4, "3.40282e+38", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToFloat2) { |
| test_convert_from_varchar( |
| "FLOAT", 4, |
| "17976900000000000632303049213894264349303303643368533621541098328912643414890628994061" |
| "52996321966094455338163203127744334848599000464911410516510916727344709727599413825823" |
| "04802812882753059262973637182942535982636884444611376868582636745405553206881859340916" |
| "3400929532301499014067384276511218551077374242324480.999", |
| OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToDouble1) { |
| test_convert_from_varchar("DOUBLE", 8, "123.456", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertVarcharToDouble2) { |
| test_convert_from_varchar( |
| "DOUBLE", 8, |
| "17976900000000000632303049213894264349303303643368533621541098328912643414890628994061" |
| "52996321966094455338163203127744334848599000464911410516510916727344709727599413825823" |
| "04802812882753059262973637182942535982636884444611376868582636745405553206881859340916" |
| "3400929532301499014067384276511218551077374242324480.0000000000", |
| OLAP_ERR_INVALID_SCHEMA); |
| } |
| |
| TEST_F(TestColumn, ConvertTinyIntToVarchar) { |
| test_convert_to_varchar<int8_t>("TINYINT", 1, 127, "127", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertSmallIntToVarchar) { |
| test_convert_to_varchar<int16_t>("SMALLINT", 2, 32767, "32767", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertIntToVarchar) { |
| test_convert_to_varchar<int32_t>("INT", 4, 2147483647, "2147483647", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertBigIntToVarchar) { |
| test_convert_to_varchar<int64_t>("BIGINT", 8, 9223372036854775807, "9223372036854775807", |
| OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertLargeIntToVarchar) { |
| test_convert_to_varchar<int128_t>("LARGEINT", 16, 1701411834604690, "1701411834604690", |
| OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertFloatToVarchar) { |
| test_convert_to_varchar<float>("FLOAT", 4, 3.40282e+38, "3.40282e+38", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertDoubleToVarchar) { |
| test_convert_to_varchar<double>("DOUBLE", 8, 123.456, "123.456", OLAP_SUCCESS); |
| } |
| |
| TEST_F(TestColumn, ConvertDecimalToVarchar) { |
| decimal12_t val(456, 789000000); |
| test_convert_to_varchar<decimal12_t>("Decimal", 12, val, "456.789000000", OLAP_SUCCESS); |
| } |
| |
| void CreateTabletSchema(TabletSchema& tablet_schema) { |
| TabletSchemaPB tablet_schema_pb; |
| tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS); |
| tablet_schema_pb.set_num_short_key_columns(2); |
| tablet_schema_pb.set_num_rows_per_row_block(1024); |
| tablet_schema_pb.set_compress_kind(COMPRESS_NONE); |
| tablet_schema_pb.set_next_column_unique_id(4); |
| |
| ColumnPB* column_1 = tablet_schema_pb.add_column(); |
| column_1->set_unique_id(1); |
| column_1->set_name("k1"); |
| column_1->set_type("INT"); |
| column_1->set_is_key(true); |
| column_1->set_length(4); |
| column_1->set_index_length(4); |
| column_1->set_is_nullable(false); |
| column_1->set_is_bf_column(false); |
| |
| ColumnPB* column_2 = tablet_schema_pb.add_column(); |
| column_2->set_unique_id(2); |
| column_2->set_name("k2"); |
| column_2->set_type("VARCHAR"); |
| column_2->set_length(20); |
| column_2->set_index_length(20); |
| column_2->set_is_key(true); |
| column_2->set_is_nullable(false); |
| column_2->set_is_bf_column(false); |
| |
| ColumnPB* column_3 = tablet_schema_pb.add_column(); |
| column_3->set_unique_id(3); |
| column_3->set_name("k3"); |
| column_3->set_type("INT"); |
| column_3->set_is_key(true); |
| column_3->set_length(4); |
| column_3->set_index_length(4); |
| column_3->set_is_nullable(false); |
| column_3->set_is_bf_column(false); |
| |
| ColumnPB* column_4 = tablet_schema_pb.add_column(); |
| column_4->set_unique_id(4); |
| column_4->set_name("v1"); |
| column_4->set_type("INT"); |
| column_4->set_length(4); |
| column_4->set_is_key(false); |
| column_4->set_is_nullable(false); |
| column_4->set_is_bf_column(false); |
| column_4->set_aggregation("SUM"); |
| |
| tablet_schema.init_from_pb(tablet_schema_pb); |
| } |
| |
| TEST_F(TestColumn, ConvertIntToBitmap) { |
| //Base Tablet |
| TabletSchema tablet_schema; |
| CreateTabletSchema(tablet_schema); |
| //Base row block |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| write_row.allocate_memory_for_string_type(tablet_schema); |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| std::vector<std::string> val_string_array; |
| val_string_array.emplace_back("5"); |
| val_string_array.emplace_back("4"); |
| val_string_array.emplace_back("2"); |
| val_string_array.emplace_back("3"); |
| OlapTuple tuple(val_string_array); |
| write_row.from_tuple(tuple); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| //Materialized View tablet schema |
| TabletSchemaPB mv_tablet_schema_pb; |
| mv_tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS); |
| mv_tablet_schema_pb.set_num_short_key_columns(2); |
| mv_tablet_schema_pb.set_num_rows_per_row_block(1024); |
| mv_tablet_schema_pb.set_compress_kind(COMPRESS_NONE); |
| mv_tablet_schema_pb.set_next_column_unique_id(3); |
| |
| ColumnPB* mv_column_1 = mv_tablet_schema_pb.add_column(); |
| mv_column_1->set_unique_id(1); |
| mv_column_1->set_name("k1"); |
| mv_column_1->set_type("INT"); |
| mv_column_1->set_is_key(true); |
| mv_column_1->set_length(4); |
| mv_column_1->set_index_length(4); |
| mv_column_1->set_is_nullable(false); |
| mv_column_1->set_is_bf_column(false); |
| |
| ColumnPB* mv_column_2 = mv_tablet_schema_pb.add_column(); |
| mv_column_2->set_unique_id(2); |
| mv_column_2->set_name("v1"); |
| mv_column_2->set_type("OBJECT"); |
| mv_column_2->set_length(8); |
| mv_column_2->set_is_key(false); |
| mv_column_2->set_is_nullable(false); |
| mv_column_2->set_is_bf_column(false); |
| mv_column_2->set_aggregation("BITMAP_UNION"); |
| |
| TabletSchema mv_tablet_schema; |
| mv_tablet_schema.init_from_pb(mv_tablet_schema_pb); |
| |
| RowBlockChanger row_block_changer(mv_tablet_schema); |
| ColumnMapping* column_mapping = row_block_changer.get_mutable_column_mapping(0); |
| column_mapping->ref_column = 0; |
| column_mapping = row_block_changer.get_mutable_column_mapping(1); |
| column_mapping->ref_column = 2; |
| column_mapping->materialized_function = "to_bitmap"; |
| |
| RowBlock mutable_block(&mv_tablet_schema); |
| mutable_block.init(block_info); |
| uint64_t filtered_rows = 0; |
| row_block_changer.change_row_block(&block, 0, &mutable_block, &filtered_rows); |
| |
| RowCursor mv_row_cursor; |
| mv_row_cursor.init(mv_tablet_schema); |
| mutable_block.get_row(0, &mv_row_cursor); |
| |
| auto dst_slice = reinterpret_cast<Slice*>(mv_row_cursor.cell_ptr(1)); |
| BitmapValue bitmapValue(dst_slice->data); |
| ASSERT_EQ(bitmapValue.cardinality(), 1); |
| } |
| |
| TEST_F(TestColumn, ConvertCharToHLL) { |
| //Base Tablet |
| TabletSchema tablet_schema; |
| CreateTabletSchema(tablet_schema); |
| |
| //Base row block |
| CreateColumnWriter(tablet_schema); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| write_row.allocate_memory_for_string_type(tablet_schema); |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| std::vector<std::string> val_string_array; |
| //std::string origin_val = "2019-11-25 19:07:00"; |
| //val_string_array.emplace_back(origin_val); |
| val_string_array.emplace_back("1"); |
| val_string_array.emplace_back("1"); |
| val_string_array.emplace_back("2"); |
| val_string_array.emplace_back("3"); |
| OlapTuple tuple(val_string_array); |
| write_row.from_tuple(tuple); |
| block.set_row(0, write_row); |
| block.finalize(1); |
| ASSERT_EQ(_column_writer->write_batch(&block, &write_row), OLAP_SUCCESS); |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| //Materialized View tablet schema |
| TabletSchemaPB mv_tablet_schema_pb; |
| mv_tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS); |
| mv_tablet_schema_pb.set_num_short_key_columns(2); |
| mv_tablet_schema_pb.set_num_rows_per_row_block(1024); |
| mv_tablet_schema_pb.set_compress_kind(COMPRESS_NONE); |
| mv_tablet_schema_pb.set_next_column_unique_id(3); |
| |
| ColumnPB* mv_column_1 = mv_tablet_schema_pb.add_column(); |
| mv_column_1->set_unique_id(1); |
| mv_column_1->set_name("k1"); |
| mv_column_1->set_type("INT"); |
| mv_column_1->set_is_key(true); |
| mv_column_1->set_length(4); |
| mv_column_1->set_index_length(4); |
| mv_column_1->set_is_nullable(false); |
| mv_column_1->set_is_bf_column(false); |
| |
| ColumnPB* mv_column_2 = mv_tablet_schema_pb.add_column(); |
| mv_column_2->set_unique_id(2); |
| mv_column_2->set_name("v1"); |
| mv_column_2->set_type("HLL"); |
| mv_column_2->set_length(4); |
| mv_column_2->set_is_key(false); |
| mv_column_2->set_is_nullable(false); |
| mv_column_2->set_is_bf_column(false); |
| mv_column_2->set_aggregation("HLL_UNION"); |
| |
| TabletSchema mv_tablet_schema; |
| mv_tablet_schema.init_from_pb(mv_tablet_schema_pb); |
| |
| RowBlockChanger row_block_changer(mv_tablet_schema); |
| ColumnMapping* column_mapping = row_block_changer.get_mutable_column_mapping(0); |
| column_mapping->ref_column = 0; |
| column_mapping = row_block_changer.get_mutable_column_mapping(1); |
| column_mapping->ref_column = 1; |
| column_mapping->materialized_function = "hll_hash"; |
| |
| RowBlock mutable_block(&mv_tablet_schema); |
| mutable_block.init(block_info); |
| uint64_t filtered_rows = 0; |
| row_block_changer.change_row_block(&block, 0, &mutable_block, &filtered_rows); |
| |
| RowCursor mv_row_cursor; |
| mv_row_cursor.init(mv_tablet_schema); |
| mutable_block.get_row(0, &mv_row_cursor); |
| |
| auto dst_slice = reinterpret_cast<Slice*>(mv_row_cursor.cell_ptr(1)); |
| HyperLogLog hll(*dst_slice); |
| ASSERT_EQ(hll.estimate_cardinality(), 1); |
| } |
| |
| TEST_F(TestColumn, ConvertCharToCount) { |
| //Base Tablet |
| TabletSchema tablet_schema; |
| CreateTabletSchema(tablet_schema); |
| |
| //Base row block |
| CreateColumnWriter(tablet_schema); |
| |
| RowBlock block(&tablet_schema); |
| RowBlockInfo block_info; |
| block_info.row_num = 10000; |
| block.init(block_info); |
| |
| RowCursor write_row; |
| write_row.init(tablet_schema); |
| write_row.allocate_memory_for_string_type(tablet_schema); |
| std::vector<std::string> val_string_array; |
| val_string_array.emplace_back("1"); |
| val_string_array.emplace_back("1"); |
| val_string_array.emplace_back("2"); |
| val_string_array.emplace_back("3"); |
| OlapTuple tuple(val_string_array); |
| write_row.from_tuple(tuple); |
| block.set_row(0, write_row); |
| |
| block.finalize(1); |
| ColumnDataHeaderMessage header; |
| ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS); |
| |
| //Materialized View tablet schema |
| TabletSchemaPB mv_tablet_schema_pb; |
| mv_tablet_schema_pb.set_keys_type(KeysType::AGG_KEYS); |
| mv_tablet_schema_pb.set_num_short_key_columns(2); |
| mv_tablet_schema_pb.set_num_rows_per_row_block(1024); |
| mv_tablet_schema_pb.set_compress_kind(COMPRESS_NONE); |
| mv_tablet_schema_pb.set_next_column_unique_id(3); |
| |
| ColumnPB* mv_column_1 = mv_tablet_schema_pb.add_column(); |
| mv_column_1->set_unique_id(1); |
| mv_column_1->set_name("k1"); |
| mv_column_1->set_type("INT"); |
| mv_column_1->set_is_key(true); |
| mv_column_1->set_length(4); |
| mv_column_1->set_index_length(4); |
| mv_column_1->set_is_nullable(false); |
| mv_column_1->set_is_bf_column(false); |
| |
| ColumnPB* mv_column_2 = mv_tablet_schema_pb.add_column(); |
| mv_column_2->set_unique_id(2); |
| mv_column_2->set_name("v1"); |
| mv_column_2->set_type("BIGINT"); |
| mv_column_2->set_length(4); |
| mv_column_2->set_is_key(false); |
| mv_column_2->set_is_nullable(false); |
| mv_column_2->set_is_bf_column(false); |
| mv_column_2->set_aggregation("SUM"); |
| |
| TabletSchema mv_tablet_schema; |
| mv_tablet_schema.init_from_pb(mv_tablet_schema_pb); |
| |
| RowBlockChanger row_block_changer(mv_tablet_schema); |
| ColumnMapping* column_mapping = row_block_changer.get_mutable_column_mapping(0); |
| column_mapping->ref_column = 0; |
| column_mapping = row_block_changer.get_mutable_column_mapping(1); |
| column_mapping->ref_column = 1; |
| column_mapping->materialized_function = "count_field"; |
| |
| RowBlock mutable_block(&mv_tablet_schema); |
| mutable_block.init(block_info); |
| uint64_t filtered_rows = 0; |
| row_block_changer.change_row_block(&block, 0, &mutable_block, &filtered_rows); |
| |
| RowCursor mv_row_cursor; |
| mv_row_cursor.init(mv_tablet_schema); |
| mutable_block.get_row(0, &mv_row_cursor); |
| |
| auto dst = mv_row_cursor.cell_ptr(1); |
| ASSERT_EQ(*(int64_t*)dst, 1); |
| } |
| } // namespace doris |
| |
| int main(int argc, char** argv) { |
| std::string conf_file = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; |
| if (!doris::config::init(conf_file.c_str(), false)) { |
| fprintf(stderr, "error read config file. \n"); |
| return -1; |
| } |
| doris::init_glog("be-test"); |
| int ret = doris::OLAP_SUCCESS; |
| testing::InitGoogleTest(&argc, argv); |
| ret = RUN_ALL_TESTS(); |
| return ret; |
| } |