| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "kudu/common/schema.h" |
| |
| #include <glog/logging.h> |
| #include <gtest/gtest.h> |
| #include <unordered_map> |
| #include <vector> |
| |
| #include "kudu/common/key_encoder.h" |
| #include "kudu/common/row.h" |
| #include "kudu/gutil/strings/substitute.h" |
| #include "kudu/util/hexdump.h" |
| #include "kudu/util/stopwatch.h" |
| #include "kudu/util/test_macros.h" |
| |
| namespace kudu { |
| namespace tablet { |
| |
| using std::unordered_map; |
| using std::vector; |
| using strings::Substitute; |
| |
| // Copy a row and its referenced data into the given Arena. |
| static Status CopyRowToArena(const Slice &row, |
| const Schema &schema, |
| Arena *dst_arena, |
| ContiguousRow *copied) { |
| Slice row_data; |
| |
| // Copy the direct row data to arena |
| if (!dst_arena->RelocateSlice(row, &row_data)) { |
| return Status::IOError("no space for row data in arena"); |
| } |
| |
| copied->Reset(row_data.mutable_data()); |
| RETURN_NOT_OK(RelocateIndirectDataToArena(copied, dst_arena)); |
| return Status::OK(); |
| } |
| |
| |
| |
| // Test basic functionality of Schema definition |
| TEST(TestSchema, TestSchema) { |
| Schema empty_schema; |
| ASSERT_GT(empty_schema.memory_footprint_excluding_this(), 0); |
| |
| ColumnSchema col1("key", STRING); |
| ColumnSchema col2("uint32val", UINT32, true); |
| ColumnSchema col3("int32val", INT32); |
| |
| vector<ColumnSchema> cols = { col1, col2, col3 }; |
| Schema schema(cols, 1); |
| |
| ASSERT_EQ(sizeof(Slice) + sizeof(uint32_t) + sizeof(int32_t), |
| schema.byte_size()); |
| ASSERT_EQ(3, schema.num_columns()); |
| ASSERT_EQ(0, schema.column_offset(0)); |
| ASSERT_EQ(sizeof(Slice), schema.column_offset(1)); |
| ASSERT_GT(schema.memory_footprint_excluding_this(), |
| empty_schema.memory_footprint_excluding_this()); |
| |
| EXPECT_EQ("Schema [\n" |
| "\tkey[string NOT NULL],\n" |
| "\tuint32val[uint32 NULLABLE],\n" |
| "\tint32val[int32 NOT NULL]\n" |
| "]", |
| schema.ToString()); |
| EXPECT_EQ("key[string NOT NULL]", schema.column(0).ToString()); |
| EXPECT_EQ("uint32 NULLABLE", schema.column(1).TypeToString()); |
| } |
| |
| TEST(TestSchema, TestSwap) { |
| Schema schema1({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", STRING), |
| ColumnSchema("col3", UINT32) }, |
| 2); |
| Schema schema2({ ColumnSchema("col3", UINT32), |
| ColumnSchema("col2", STRING) }, |
| 1); |
| schema1.swap(schema2); |
| ASSERT_EQ(2, schema1.num_columns()); |
| ASSERT_EQ(1, schema1.num_key_columns()); |
| ASSERT_EQ(3, schema2.num_columns()); |
| ASSERT_EQ(2, schema2.num_key_columns()); |
| } |
| |
| TEST(TestSchema, TestReset) { |
| Schema schema; |
| ASSERT_FALSE(schema.initialized()); |
| |
| ASSERT_OK(schema.Reset({ ColumnSchema("col3", UINT32), |
| ColumnSchema("col2", STRING) }, |
| 1)); |
| ASSERT_TRUE(schema.initialized()); |
| |
| // Swap the initialized schema with an uninitialized one. |
| Schema schema2; |
| schema2.swap(schema); |
| ASSERT_FALSE(schema.initialized()); |
| ASSERT_TRUE(schema2.initialized()); |
| } |
| |
| // Test for KUDU-943, a bug where we suspected that Variant didn't behave |
| // correctly with empty strings. |
| TEST(TestSchema, TestEmptyVariant) { |
| Slice empty_val(""); |
| Slice nonempty_val("test"); |
| |
| Variant v(STRING, &nonempty_val); |
| ASSERT_EQ("test", (static_cast<const Slice*>(v.value()))->ToString()); |
| v.Reset(STRING, &empty_val); |
| ASSERT_EQ("", (static_cast<const Slice*>(v.value()))->ToString()); |
| v.Reset(STRING, &nonempty_val); |
| ASSERT_EQ("test", (static_cast<const Slice*>(v.value()))->ToString()); |
| } |
| |
| TEST(TestSchema, TestProjectSubset) { |
| Schema schema1({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", STRING), |
| ColumnSchema("col3", UINT32) }, |
| 1); |
| |
| Schema schema2({ ColumnSchema("col3", UINT32), |
| ColumnSchema("col2", STRING) }, |
| 0); |
| |
| RowProjector row_projector(&schema1, &schema2); |
| ASSERT_OK(row_projector.Init()); |
| |
| // Verify the mapping |
| ASSERT_EQ(2, row_projector.base_cols_mapping().size()); |
| ASSERT_EQ(0, row_projector.projection_defaults().size()); |
| |
| const vector<RowProjector::ProjectionIdxMapping>& mapping = row_projector.base_cols_mapping(); |
| ASSERT_EQ(mapping[0].first, 0); // col3 schema2 |
| ASSERT_EQ(mapping[0].second, 2); // col3 schema1 |
| ASSERT_EQ(mapping[1].first, 1); // col2 schema2 |
| ASSERT_EQ(mapping[1].second, 1); // col2 schema1 |
| } |
| |
| // Test projection when the type of the projected column |
| // doesn't match the original type. |
| TEST(TestSchema, TestProjectTypeMismatch) { |
| Schema schema1({ ColumnSchema("key", STRING), |
| ColumnSchema("val", UINT32) }, |
| 1); |
| Schema schema2({ ColumnSchema("val", STRING) }, 0); |
| |
| RowProjector row_projector(&schema1, &schema2); |
| Status s = row_projector.Init(); |
| ASSERT_TRUE(s.IsInvalidArgument()); |
| ASSERT_STR_CONTAINS(s.message().ToString(), "must have type"); |
| } |
| |
| // Test projection when the some columns in the projection |
| // are not present in the base schema |
| TEST(TestSchema, TestProjectMissingColumn) { |
| Schema schema1({ ColumnSchema("key", STRING), ColumnSchema("val", UINT32) }, 1); |
| Schema schema2({ ColumnSchema("val", UINT32), ColumnSchema("non_present", STRING) }, 0); |
| Schema schema3({ ColumnSchema("val", UINT32), ColumnSchema("non_present", UINT32, true) }, 0); |
| uint32_t default_value = 15; |
| Schema schema4({ ColumnSchema("val", UINT32), |
| ColumnSchema("non_present", UINT32, false, &default_value) }, |
| 0); |
| |
| RowProjector row_projector(&schema1, &schema2); |
| Status s = row_projector.Init(); |
| ASSERT_TRUE(s.IsInvalidArgument()); |
| ASSERT_STR_CONTAINS(s.message().ToString(), |
| "does not exist in the projection, and it does not have a default value or a nullable type"); |
| |
| // Verify Default nullable column with no default value |
| ASSERT_OK(row_projector.Reset(&schema1, &schema3)); |
| |
| ASSERT_EQ(1, row_projector.base_cols_mapping().size()); |
| ASSERT_EQ(1, row_projector.projection_defaults().size()); |
| |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].first, 0); // val schema2 |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].second, 1); // val schema1 |
| ASSERT_EQ(row_projector.projection_defaults()[0], 1); // non_present schema3 |
| |
| // Verify Default non nullable column with default value |
| ASSERT_OK(row_projector.Reset(&schema1, &schema4)); |
| |
| ASSERT_EQ(1, row_projector.base_cols_mapping().size()); |
| ASSERT_EQ(1, row_projector.projection_defaults().size()); |
| |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].first, 0); // val schema4 |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].second, 1); // val schema1 |
| ASSERT_EQ(row_projector.projection_defaults()[0], 1); // non_present schema4 |
| } |
| |
| // Test projection mapping using IDs. |
| // This simulate a column rename ('val' -> 'val_renamed') |
| // and a new column added ('non_present') |
| TEST(TestSchema, TestProjectRename) { |
| SchemaBuilder builder; |
| ASSERT_OK(builder.AddKeyColumn("key", STRING)); |
| ASSERT_OK(builder.AddColumn("val", UINT32)); |
| Schema schema1 = builder.Build(); |
| |
| builder.Reset(schema1); |
| ASSERT_OK(builder.AddNullableColumn("non_present", UINT32)); |
| ASSERT_OK(builder.RenameColumn("val", "val_renamed")); |
| Schema schema2 = builder.Build(); |
| |
| RowProjector row_projector(&schema1, &schema2); |
| ASSERT_OK(row_projector.Init()); |
| |
| ASSERT_EQ(2, row_projector.base_cols_mapping().size()); |
| ASSERT_EQ(1, row_projector.projection_defaults().size()); |
| |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].first, 0); // key schema2 |
| ASSERT_EQ(row_projector.base_cols_mapping()[0].second, 0); // key schema1 |
| |
| ASSERT_EQ(row_projector.base_cols_mapping()[1].first, 1); // val_renamed schema2 |
| ASSERT_EQ(row_projector.base_cols_mapping()[1].second, 1); // val schema1 |
| |
| ASSERT_EQ(row_projector.projection_defaults()[0], 2); // non_present schema2 |
| } |
| |
| |
| // Test that the schema can be used to compare and stringify rows. |
| TEST(TestSchema, TestRowOperations) { |
| Schema schema({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", STRING), |
| ColumnSchema("col3", UINT32), |
| ColumnSchema("col4", INT32) }, |
| 1); |
| |
| Arena arena(1024, 256*1024); |
| |
| RowBuilder rb(schema); |
| rb.AddString(string("row_a_1")); |
| rb.AddString(string("row_a_2")); |
| rb.AddUint32(3); |
| rb.AddInt32(-3); |
| ContiguousRow row_a(&schema); |
| ASSERT_OK(CopyRowToArena(rb.data(), schema, &arena, &row_a)); |
| |
| rb.Reset(); |
| rb.AddString(string("row_b_1")); |
| rb.AddString(string("row_b_2")); |
| rb.AddUint32(3); |
| rb.AddInt32(-3); |
| ContiguousRow row_b(&schema); |
| ASSERT_OK(CopyRowToArena(rb.data(), schema, &arena, &row_b)); |
| |
| ASSERT_GT(schema.Compare(row_b, row_a), 0); |
| ASSERT_LT(schema.Compare(row_a, row_b), 0); |
| |
| ASSERT_EQ(string("(string col1=row_a_1, string col2=row_a_2, uint32 col3=3, int32 col4=-3)"), |
| schema.DebugRow(row_a)); |
| } |
| |
| TEST(TestKeyEncoder, TestKeyEncoder) { |
| faststring fs; |
| const KeyEncoder<faststring>& encoder = GetKeyEncoder<faststring>(GetTypeInfo(STRING)); |
| |
| typedef std::tuple<vector<Slice>, Slice> test_pair; |
| |
| vector<test_pair> pairs; |
| |
| // Simple key |
| pairs.push_back(test_pair({ Slice("foo", 3) }, Slice("foo", 3))); |
| |
| // Simple compound key |
| pairs.push_back(test_pair({ Slice("foo", 3), Slice("bar", 3) }, |
| Slice("foo" "\x00\x00" "bar", 8))); |
| |
| // Compound key with a \x00 in it |
| pairs.push_back(test_pair({ Slice("xxx\x00yyy", 7), Slice("bar", 3) }, |
| Slice("xxx" "\x00\x01" "yyy" "\x00\x00" "bar", 13))); |
| |
| int i = 0; |
| for (const test_pair &t : pairs) { |
| const vector<Slice> &in = std::get<0>(t); |
| Slice expected = std::get<1>(t); |
| |
| fs.clear(); |
| for (int col = 0; col < in.size(); col++) { |
| encoder.Encode(&in[col], col == in.size() - 1, &fs); |
| } |
| |
| ASSERT_EQ(0, expected.compare(Slice(fs))) |
| << "Failed encoding example " << i << ".\n" |
| << "Expected: " << HexDump(expected) << "\n" |
| << "Got: " << HexDump(Slice(fs)); |
| i++; |
| } |
| } |
| |
| TEST(TestSchema, TestDecodeKeys_CompoundStringKey) { |
| Schema schema({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", STRING), |
| ColumnSchema("col3", STRING) }, |
| 2); |
| |
| EXPECT_EQ("(string col1=foo, string col2=bar)", |
| schema.DebugEncodedRowKey(Slice("foo\0\0bar", 8), Schema::START_KEY)); |
| EXPECT_EQ("(string col1=fo\\000o, string col2=bar)", |
| schema.DebugEncodedRowKey(Slice("fo\x00\x01o\0\0""bar", 10), Schema::START_KEY)); |
| EXPECT_EQ("(string col1=fo\\000o, string col2=bar\\000xy)", |
| schema.DebugEncodedRowKey(Slice("fo\x00\x01o\0\0""bar\0xy", 13), Schema::START_KEY)); |
| |
| EXPECT_EQ("<start of table>", |
| schema.DebugEncodedRowKey("", Schema::START_KEY)); |
| EXPECT_EQ("<end of table>", |
| schema.DebugEncodedRowKey("", Schema::END_KEY)); |
| } |
| |
| // Test that appropriate statuses are returned when trying to decode an invalid |
| // encoded key. |
| TEST(TestSchema, TestDecodeKeys_InvalidKeys) { |
| Schema schema({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", UINT32), |
| ColumnSchema("col3", STRING) }, |
| 2); |
| |
| EXPECT_EQ("<invalid key: Invalid argument: Error decoding composite key component" |
| " 'col1': Missing separator after composite key string component: foo>", |
| schema.DebugEncodedRowKey(Slice("foo"), Schema::START_KEY)); |
| EXPECT_EQ("<invalid key: Invalid argument: Error decoding composite key component 'col2': " |
| "key too short>", |
| schema.DebugEncodedRowKey(Slice("foo\x00\x00", 5), Schema::START_KEY)); |
| EXPECT_EQ("<invalid key: Invalid argument: Error decoding composite key component 'col2': " |
| "key too short: \\xff\\xff>", |
| schema.DebugEncodedRowKey(Slice("foo\x00\x00\xff\xff", 7), Schema::START_KEY)); |
| } |
| |
| TEST(TestSchema, TestCreateProjection) { |
| Schema schema({ ColumnSchema("col1", STRING), |
| ColumnSchema("col2", STRING), |
| ColumnSchema("col3", STRING), |
| ColumnSchema("col4", STRING), |
| ColumnSchema("col5", STRING) }, |
| 2); |
| Schema schema_with_ids = SchemaBuilder(schema).Build(); |
| Schema partial_schema; |
| |
| // By names, without IDs |
| ASSERT_OK(schema.CreateProjectionByNames({ "col1", "col2", "col4" }, &partial_schema)); |
| EXPECT_EQ("Schema [\n" |
| "\tcol1[string NOT NULL],\n" |
| "\tcol2[string NOT NULL],\n" |
| "\tcol4[string NOT NULL]\n" |
| "]", |
| partial_schema.ToString()); |
| |
| // By names, with IDS |
| ASSERT_OK(schema_with_ids.CreateProjectionByNames({ "col1", "col2", "col4" }, &partial_schema)); |
| EXPECT_EQ(Substitute("Schema [\n" |
| "\t$0:col1[string NOT NULL],\n" |
| "\t$1:col2[string NOT NULL],\n" |
| "\t$2:col4[string NOT NULL]\n" |
| "]", |
| schema_with_ids.column_id(0), |
| schema_with_ids.column_id(1), |
| schema_with_ids.column_id(3)), |
| partial_schema.ToString()); |
| |
| // By names, with missing names. |
| Status s = schema.CreateProjectionByNames({ "foobar" }, &partial_schema); |
| EXPECT_EQ("Not found: column not found: foobar", s.ToString()); |
| |
| // By IDs |
| ASSERT_OK(schema_with_ids.CreateProjectionByIdsIgnoreMissing({ schema_with_ids.column_id(0), |
| schema_with_ids.column_id(1), |
| ColumnId(1000), // missing column |
| schema_with_ids.column_id(3) }, |
| &partial_schema)); |
| EXPECT_EQ(Substitute("Schema [\n" |
| "\t$0:col1[string NOT NULL],\n" |
| "\t$1:col2[string NOT NULL],\n" |
| "\t$2:col4[string NOT NULL]\n" |
| "]", |
| schema_with_ids.column_id(0), |
| schema_with_ids.column_id(1), |
| schema_with_ids.column_id(3)), |
| partial_schema.ToString()); |
| } |
| |
| #ifdef NDEBUG |
| TEST(TestKeyEncoder, BenchmarkSimpleKey) { |
| faststring fs; |
| Schema schema({ ColumnSchema("col1", STRING) }, 1); |
| |
| RowBuilder rb(schema); |
| rb.AddString(Slice("hello world")); |
| ConstContiguousRow row(&rb.schema(), rb.data()); |
| |
| LOG_TIMING(INFO, "Encoding") { |
| for (int i = 0; i < 10000000; i++) { |
| schema.EncodeComparableKey(row, &fs); |
| } |
| } |
| } |
| #endif |
| |
| } // namespace tablet |
| } // namespace kudu |