blob: 3e6ac1ba77208a619fe0c25c0fb15e9115278329 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/common/string_buffer.hpp"
#include <gtest/gtest.h>
#include "vec/columns/column_string.h"
#include "vec/common/arena.h"
namespace doris::vectorized {
TEST(StringBufferTest, TestWrite) {
auto column = ColumnString::create();
BufferWritable buf(*column);
buf.write("hello", 5);
buf.commit();
ASSERT_EQ(column->size(), 1);
ASSERT_EQ(column->get_data_at(0).to_string(), "hello");
buf.write(' ');
buf.commit();
ASSERT_EQ(column->size(), 2);
ASSERT_EQ(column->get_data_at(1).to_string(), " ");
buf.write_c_string("world");
buf.commit();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(column->get_data_at(2).to_string(), "world");
std::string s = "!";
buf.write(s.data(), s.size());
buf.commit();
ASSERT_EQ(column->size(), 4);
ASSERT_EQ(column->get_data_at(3).to_string(), "!");
ASSERT_EQ(column->get_data_at(0).to_string(), "hello");
ASSERT_EQ(column->get_data_at(1).to_string(), " ");
ASSERT_EQ(column->get_data_at(2).to_string(), "world");
ASSERT_EQ(column->get_data_at(3).to_string(), "!");
}
TEST(StringBufferTest, TestWriteNumber) {
auto column = ColumnString::create();
BufferWritable buf(*column);
buf.write_number(123);
buf.commit();
ASSERT_EQ(column->size(), 1);
ASSERT_EQ(column->get_data_at(0).to_string(), "123");
buf.write_number(-456);
buf.commit();
ASSERT_EQ(column->size(), 2);
ASSERT_EQ(column->get_data_at(0).to_string(), "123");
ASSERT_EQ(column->get_data_at(1).to_string(), "-456");
buf.write_number(78.9);
buf.commit();
ASSERT_EQ(column->size(), 3);
ASSERT_EQ(column->get_data_at(2).to_string(), "78.9");
}
TEST(StringBufferTest, TestWriteReadBinary) {
auto column = ColumnString::create();
BufferWritable buf(*column);
// POD
int int_val = 123;
buf.write_binary(int_val);
buf.commit();
ASSERT_EQ(column->size(), 1);
ASSERT_EQ(column->get_data_at(0).size, sizeof(int));
StringRef sr = column->get_data_at(0);
BufferReadable reader(sr);
int read_int_val = 0;
reader.read_binary(read_int_val);
ASSERT_EQ(int_val, read_int_val);
// String
std::string str_val = "hello world";
buf.write_binary(str_val);
buf.commit();
ASSERT_EQ(column->size(), 2);
sr = column->get_data_at(1);
BufferReadable reader2(sr);
std::string read_str_val;
reader2.read_binary(read_str_val);
ASSERT_EQ(str_val, read_str_val);
// StringRef
StringRef str_ref_val("doris", 5);
buf.write_binary(str_ref_val);
buf.commit();
ASSERT_EQ(column->size(), 3);
sr = column->get_data_at(2);
BufferReadable reader3(sr);
StringRef read_str_ref_val;
reader3.read_binary(read_str_ref_val);
ASSERT_EQ(str_ref_val.to_string(), read_str_ref_val.to_string());
}
// This test may fail due to a bug in read_var_uint, where it can read out of bounds.
// The loop condition `i < 9` should probably be `i < len`.
//TEST(StringBufferTest, TestVarUInt) {
// auto column = ColumnString::create();
// BufferWritable buf(*column);
//
// std::vector<UInt64> values = {123, 12345, 1234567, 0, (1UL << 35) - 1, (1UL << 63) - 1};
//
// for (const auto& v : values) {
// buf.write_var_uint(v);
// }
// buf.commit();
//
// ASSERT_EQ(column->size(), 1);
// StringRef sr = column->get_data_at(0);
// BufferReadable reader(sr);
//
// for (const auto& v : values) {
// UInt64 read_val;
// reader.read_var_uint(read_val);
// ASSERT_EQ(v, read_val);
// }
//}
TEST(StringBufferTest, TestWriteJsonString) {
auto column = ColumnString::create();
BufferWritable buf(*column);
std::string json_str = "ab\b\f\n\r\t\\\"/c";
buf.write_json_string(json_str);
buf.commit();
ASSERT_EQ(column->size(), 1);
std::string expected = "\"ab\\b\\f\\n\\r\\t\\\\\\\"/c\"";
ASSERT_EQ(column->get_data_at(0).to_string(), expected);
// control characters
char control_chars[] = {0x01, 0x1f};
buf.write_json_string(control_chars, 2);
buf.commit();
ASSERT_EQ(column->size(), 2);
expected = "\"\\u0001\\u001F\"";
ASSERT_EQ(column->get_data_at(1).to_string(), expected);
// utf8 line separators
std::string ls_str =
"\xE2\x80\xA8"
" and "
"\xE2\x80\xA9";
buf.write_json_string(ls_str);
buf.commit();
ASSERT_EQ(column->size(), 3);
expected = "\"\\u2028 and \\u2029\"";
ASSERT_EQ(column->get_data_at(2).to_string(), expected);
}
TEST(StringBufferTest, ReadWriteStringRefWithArena) {
auto column = ColumnString::create();
BufferWritable buf(*column);
Arena arena;
StringRef original_str_ref("hello from arena", 16);
buf.write_binary(original_str_ref);
buf.commit();
ASSERT_EQ(column->size(), 1);
StringRef sr = column->get_data_at(0);
BufferReadable reader(sr);
StringRef new_str_ref = reader.read_binary_into(arena);
ASSERT_EQ(original_str_ref.size, new_str_ref.size);
ASSERT_EQ(original_str_ref.to_string(), new_str_ref.to_string());
// The new StringRef should have its data in the arena.
const char* arena_end = arena.alloc(0);
const char* arena_start = arena_end - arena.size();
ASSERT_TRUE(new_str_ref.data >= arena_start && new_str_ref.data < arena_end);
}
} // namespace doris::vectorized