| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <gmock/gmock-actions.h> |
| #include <gmock/gmock-matchers.h> |
| #include <gmock/gmock-spec-builders.h> |
| #include <gtest/gtest-matchers.h> |
| |
| #include <memory> |
| #include <string> |
| #include <vector> |
| |
| #include "core/types.h" |
| #include "exprs/function/function_test_util.h" |
| #include "exprs/mock_vexpr.h" |
| #include "exprs/table_function/vexplode.h" |
| #include "exprs/table_function/vexplode_numbers.h" |
| #include "exprs/table_function/vexplode_v2.h" |
| #include "exprs/table_function/vjson_each.h" |
| #include "testutil/any_type.h" |
| #include "util/jsonb_parser_simd.h" |
| #include "util/jsonb_utils.h" |
| #include "util/jsonb_writer.h" |
| |
| namespace doris { |
| |
| using ::testing::_; |
| using ::testing::DoAll; |
| using ::testing::Invoke; |
| using ::testing::Return; |
| using ::testing::SetArgPointee; |
| |
| class TableFunctionTest : public testing::Test { |
| protected: |
| void SetUp() override {} |
| void TearDown() override {} |
| |
| void clear() { |
| _ctx = nullptr; |
| _root = nullptr; |
| _children.clear(); |
| _column_ids.clear(); |
| } |
| |
| void init_expr_context(int child_num) { |
| clear(); |
| |
| _root = std::make_shared<MockVExpr>(); |
| for (int i = 0; i < child_num; ++i) { |
| _column_ids.push_back(i); |
| _children.push_back(std::make_shared<MockVExpr>()); |
| EXPECT_CALL(*_children[i], execute(_, _, _)) |
| .WillRepeatedly(DoAll(SetArgPointee<2>(_column_ids[i]), Return(Status::OK()))); |
| const int col_id = _column_ids[i]; |
| EXPECT_CALL(*_children[i], execute_column_impl(_, _, _, _, _)) |
| .WillRepeatedly(Invoke([col_id](VExprContext*, const Block* block, |
| const Selector*, size_t, ColumnPtr& result) { |
| result = block->get_by_position(col_id).column; |
| return Status::OK(); |
| })); |
| _root->add_child(_children[i]); |
| } |
| _ctx = std::make_shared<VExprContext>(_root); |
| } |
| |
| private: |
| VExprContextSPtr _ctx; |
| std::shared_ptr<MockVExpr> _root; |
| std::vector<std::shared_ptr<MockVExpr>> _children; |
| std::vector<int> _column_ids; |
| }; |
| |
| TEST_F(TableFunctionTest, vexplode_outer) { |
| init_expr_context(1); |
| VExplodeTableFunction explode_outer; |
| explode_outer.set_outer(); |
| explode_outer.set_expr_context(_ctx); |
| |
| // explode_outer(Array<Int32>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; |
| TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)}; |
| InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType {TestArray {}}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_INT}; |
| InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, |
| {Int32(3)}, {Null()}, {Null()}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| |
| // explode_outer(Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("abc"), std::string(""), std::string("def")}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR}; |
| InputDataSet output_set = { |
| {Null()}, {Null()}, {std::string("abc")}, {std::string("")}, {std::string("def")}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| |
| // explode_outer(Array<Decimal>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_DECIMALV2}; |
| TestArray vec = {ut_type::DECIMALV2VALUEFROMDOUBLE(17014116.67), |
| ut_type::DECIMALV2VALUEFROMDOUBLE(-17014116.67)}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_DECIMALV2}; |
| InputDataSet output_set = {{Null()}, |
| {Null()}, |
| {ut_type::DECIMALV2VALUEFROMDOUBLE(17014116.67)}, |
| {ut_type::DECIMALV2VALUEFROMDOUBLE(-17014116.67)}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vexplode_outer_v2) { |
| init_expr_context(1); |
| VExplodeV2TableFunction explode_outer; |
| explode_outer.set_outer(); |
| explode_outer.set_expr_context(_ctx); |
| |
| // explode_outer(Array<Int32>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; |
| TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)}; |
| InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType {TestArray {}}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_INT}; |
| |
| InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, |
| {Int32(3)}, {Null()}, {Null()}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| |
| // explode_outer(Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("abc"), std::string(""), std::string("def")}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR}; |
| |
| InputDataSet output_set = { |
| {Null()}, {Null()}, {std::string("abc")}, {std::string("")}, {std::string("def")}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| |
| // // explode_outer(Array<Decimal>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_DECIMALV2}; |
| TestArray vec = {ut_type::DECIMALV2VALUEFROMDOUBLE(17014116.67), |
| ut_type::DECIMALV2VALUEFROMDOUBLE(-17014116.67)}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_DECIMALV2}; |
| |
| InputDataSet output_set = {{Null()}, |
| {Null()}, |
| {ut_type::DECIMALV2VALUEFROMDOUBLE(17014116.67)}, |
| {ut_type::DECIMALV2VALUEFROMDOUBLE(-17014116.67)}}; |
| |
| check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vexplode) { |
| init_expr_context(1); |
| VExplodeTableFunction explode; |
| explode.set_expr_context(_ctx); |
| |
| // explode(Array<Int32>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; |
| |
| TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)}; |
| InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType {TestArray {}}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_INT}; |
| InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, {Int32(3)}}; |
| |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set); |
| } |
| |
| // explode(Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("abc"), std::string(""), std::string("def")}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR}; |
| InputDataSet output_set = {{std::string("abc")}, {std::string("")}, {std::string("def")}}; |
| |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vexplode_v2) { |
| init_expr_context(1); |
| VExplodeV2TableFunction explode; |
| explode.set_expr_context(_ctx); |
| |
| // explode(Array<Int32>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_INT}; |
| |
| TestArray vec = {Int32(1), Null(), Int32(2), Int32(3)}; |
| InputDataSet input_set = {{AnyType {vec}}, {Null()}, {AnyType {TestArray {}}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_INT}; |
| InputDataSet output_set = {{Int32(1)}, {Null()}, {Int32(2)}, {Int32(3)}}; |
| |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set); |
| } |
| |
| // explode(Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("abc"), std::string(""), std::string("def")}; |
| InputDataSet input_set = {{Null()}, {AnyType {TestArray {}}}, {AnyType {vec}}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_VARCHAR}; |
| |
| InputDataSet output_set = {{std::string("abc")}, {std::string("")}, {std::string("def")}}; |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vexplode_v2_two_param) { |
| init_expr_context(2); |
| VExplodeV2TableFunction explode; |
| explode.set_expr_context(_ctx); |
| // explode(Array<String>, Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("one"), std::string("two"), std::string("three")}; |
| TestArray vec1 = {std::string("1"), std::string("2"), std::string("3")}; |
| InputDataSet input_set = {{vec, vec1}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT, PrimitiveType::TYPE_VARCHAR, |
| PrimitiveType::TYPE_VARCHAR}; |
| |
| InputDataSet output_set = {{{TestArray {std::string("one"), std::string("1")}}}, |
| {{TestArray {std::string("two"), std::string("2")}}}, |
| {{TestArray {std::string("three"), std::string("3")}}}}; |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); |
| } |
| |
| // explode(null, Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("one"), std::string("two"), std::string("three")}; |
| InputDataSet input_set = {{Null(), vec}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT, PrimitiveType::TYPE_VARCHAR, |
| PrimitiveType::TYPE_VARCHAR}; |
| |
| InputDataSet output_set = {{{TestArray {Null(), std::string("one")}}}, |
| {{TestArray {Null(), std::string("two")}}}, |
| {{TestArray {Null(), std::string("three")}}}}; |
| |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); |
| } |
| |
| // explode(Array<Null>, Array<String>) |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_ARRAY, PrimitiveType::TYPE_VARCHAR}; |
| TestArray vec = {std::string("one"), std::string("two"), std::string("three")}; |
| TestArray vec1 = {std::string("1"), Null(), std::string("3")}; |
| InputDataSet input_set = {{vec, vec1}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_STRUCT, PrimitiveType::TYPE_VARCHAR, |
| PrimitiveType::TYPE_VARCHAR}; |
| |
| InputDataSet output_set = {{{TestArray {std::string("one"), std::string("1")}}}, |
| {{TestArray {std::string("two"), Null()}}}, |
| {{TestArray {std::string("three"), std::string("3")}}}}; |
| |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); |
| check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vexplode_numbers) { |
| init_expr_context(1); |
| VExplodeNumbersTableFunction tfn; |
| tfn.set_expr_context(_ctx); |
| |
| { |
| InputTypeSet input_types = {PrimitiveType::TYPE_INT}; |
| InputDataSet input_set = {{Int32(2)}, {Int32(3)}, {Null()}, {Int32(0)}, {Int32(-2)}}; |
| |
| InputTypeSet output_types = {PrimitiveType::TYPE_INT}; |
| InputDataSet output_set = {{Int32(0)}, {Int32(1)}, {Int32(0)}, {Int32(1)}, {Int32(2)}}; |
| |
| check_vec_table_function(&tfn, input_types, input_set, output_types, output_set); |
| } |
| } |
| |
| // --------------------------------------------------------------------------- |
| // Direct-API helpers for json_each / json_each_text tests. |
| // The test framework's check_vec_table_function does not properly support |
| // TYPE_STRUCT output (insert_cell always expects ColumnNullable wrapping the |
| // struct column), so we drive the table function API directly. |
| // --------------------------------------------------------------------------- |
| |
| // Build a one-column JSONB input block. An empty string means SQL NULL. |
| static std::unique_ptr<Block> build_jsonb_input_block(const std::vector<std::string>& json_rows) { |
| auto str_col = ColumnString::create(); |
| auto null_col = ColumnUInt8::create(); |
| for (const auto& json : json_rows) { |
| if (json.empty()) { |
| str_col->insert_default(); |
| null_col->insert_value(1); |
| } else { |
| JsonbWriter writer; |
| if (JsonbParser::parse(json.c_str(), json.size(), writer).ok()) { |
| str_col->insert_data(writer.getOutput()->getBuffer(), |
| writer.getOutput()->getSize()); |
| null_col->insert_value(0); |
| } else { |
| str_col->insert_default(); |
| null_col->insert_value(1); |
| } |
| } |
| } |
| auto col = ColumnNullable::create(std::move(str_col), std::move(null_col)); |
| auto block = Block::create_unique(); |
| block->insert({std::move(col), |
| make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_JSONB, false)), |
| "jval"}); |
| return block; |
| } |
| |
| // Run the given table function over all rows in block. |
| // Returns list of (key, value) pairs where value == "__NULL__" means SQL NULL. |
| // val_is_jsonb controls whether the value column is decoded as JSONB→JSON text or plain text. |
| static std::vector<std::pair<std::string, std::string>> run_json_each_fn(TableFunction* fn, |
| Block* block, |
| bool val_is_jsonb) { |
| // Output type: Nullable(Struct(Nullable(VARCHAR key), Nullable(VARCHAR/JSONB value))) |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| val_is_jsonb ? doris::PrimitiveType::TYPE_JSONB : doris::PrimitiveType::TYPE_VARCHAR, |
| false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| auto out_col = struct_dt->create_column(); |
| fn->set_nullable(); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| EXPECT_TRUE(fn->process_init(block, &rs).ok()); |
| |
| for (size_t row = 0; row < block->rows(); ++row) { |
| fn->process_row(row); |
| if (!fn->current_empty()) { |
| do { |
| fn->get_value(out_col, 1); |
| } while (!fn->eos()); |
| } |
| } |
| fn->process_close(); |
| |
| std::vector<std::pair<std::string, std::string>> result; |
| const auto& nullable_out = assert_cast<const ColumnNullable&>(*out_col); |
| const auto& struct_col = assert_cast<const ColumnStruct&>(nullable_out.get_nested_column()); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| const auto& val_col = assert_cast<const ColumnNullable&>(struct_col.get_column(1)); |
| |
| for (size_t i = 0; i < struct_col.size(); ++i) { |
| if (nullable_out.is_null_at(i)) { |
| result.emplace_back("__NULL_ROW__", "__NULL_ROW__"); |
| continue; |
| } |
| std::string key; |
| if (!key_col.is_null_at(i)) { |
| StringRef sr = key_col.get_nested_column().get_data_at(i); |
| key.assign(sr.data, sr.size); |
| } |
| std::string val; |
| if (val_col.is_null_at(i)) { |
| val = "__NULL__"; |
| } else { |
| StringRef sr = val_col.get_nested_column().get_data_at(i); |
| if (val_is_jsonb) { |
| // JSONB binary → JSON text for comparison |
| const JsonbDocument* doc = nullptr; |
| if (JsonbDocument::checkAndCreateDocument(sr.data, sr.size, &doc).ok() && doc && |
| doc->getValue()) { |
| JsonbToJson converter; |
| val = converter.to_json_string(doc->getValue()); |
| } else { |
| val = "__BAD_JSONB__"; |
| } |
| } else { |
| val.assign(sr.data, sr.size); |
| } |
| } |
| result.emplace_back(std::move(key), std::move(val)); |
| } |
| return result; |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| // basic: string and numeric values; JSONB value column shows JSON text with quotes |
| { |
| auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| ASSERT_EQ(2u, rows.size()); |
| EXPECT_EQ("a", rows[0].first); |
| EXPECT_EQ("\"foo\"", rows[0].second); // JSONB string → JSON text includes quotes |
| EXPECT_EQ("b", rows[1].first); |
| EXPECT_EQ("123", rows[1].second); |
| } |
| |
| // JSON null value → SQL NULL |
| { |
| auto block = build_jsonb_input_block({{R"({"x":null})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| ASSERT_EQ(1u, rows.size()); |
| EXPECT_EQ("x", rows[0].first); |
| EXPECT_EQ("__NULL__", rows[0].second); |
| } |
| |
| // boolean and negative int |
| { |
| auto block = build_jsonb_input_block({{R"({"flag":true,"neg":-1})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| ASSERT_EQ(2u, rows.size()); |
| bool ok_flag = false, ok_neg = false; |
| for (auto& kv : rows) { |
| if (kv.first == "flag") { |
| EXPECT_EQ("true", kv.second); |
| ok_flag = true; |
| } |
| if (kv.first == "neg") { |
| EXPECT_EQ("-1", kv.second); |
| ok_neg = true; |
| } |
| } |
| EXPECT_TRUE(ok_flag) << "key 'flag' not found"; |
| EXPECT_TRUE(ok_neg) << "key 'neg' not found"; |
| } |
| |
| // SQL NULL input → 0 rows |
| { |
| auto block = build_jsonb_input_block({{""}}); // empty string → SQL NULL |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| EXPECT_EQ(0u, rows.size()); |
| } |
| |
| // empty object → 0 rows |
| { |
| auto block = build_jsonb_input_block({{"{}"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| EXPECT_EQ(0u, rows.size()); |
| } |
| |
| // non-object input → 0 rows |
| { |
| auto block = build_jsonb_input_block({{"[1,2,3]"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| EXPECT_EQ(0u, rows.size()); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each_text) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| // basic: strings unquoted (text mode), numbers as plain text |
| { |
| auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| ASSERT_EQ(2u, rows.size()); |
| EXPECT_EQ("a", rows[0].first); |
| EXPECT_EQ("foo", rows[0].second); // string unquoted in text mode |
| EXPECT_EQ("b", rows[1].first); |
| EXPECT_EQ("123", rows[1].second); |
| } |
| |
| // booleans |
| { |
| auto block = build_jsonb_input_block({{R"({"t":true,"f":false})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| ASSERT_EQ(2u, rows.size()); |
| bool ok_t = false, ok_f = false; |
| for (auto& kv : rows) { |
| if (kv.first == "t") { |
| EXPECT_EQ("true", kv.second); |
| ok_t = true; |
| } |
| if (kv.first == "f") { |
| EXPECT_EQ("false", kv.second); |
| ok_f = true; |
| } |
| } |
| EXPECT_TRUE(ok_t) << "key 't' not found"; |
| EXPECT_TRUE(ok_f) << "key 'f' not found"; |
| } |
| |
| // JSON null → SQL NULL |
| { |
| auto block = build_jsonb_input_block({{R"({"x":null})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| ASSERT_EQ(1u, rows.size()); |
| EXPECT_EQ("x", rows[0].first); |
| EXPECT_EQ("__NULL__", rows[0].second); |
| } |
| |
| // SQL NULL input → 0 rows |
| { |
| auto block = build_jsonb_input_block({{""}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| EXPECT_EQ(0u, rows.size()); |
| } |
| |
| // empty object → 0 rows |
| { |
| auto block = build_jsonb_input_block({{"{}"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| EXPECT_EQ(0u, rows.size()); |
| } |
| } |
| TEST_F(TableFunctionTest, vjson_each_get_same_many_values) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| // Case 1: normal object — get_same_many_values replicates the entry at _cur_offset. |
| // Simulates a non-last table function being asked to copy its current value 3 times |
| // to match 3 rows emitted by the driving (last) function in the same pass. |
| { |
| auto block = build_jsonb_input_block({{R"({"k0":"v0","k1":"v1"})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_same_many_values(out_col, 3); |
| |
| const auto& nullable_out = assert_cast<const ColumnNullable&>(*out_col); |
| ASSERT_EQ(3u, nullable_out.size()); |
| const auto& struct_col = assert_cast<const ColumnStruct&>(nullable_out.get_nested_column()); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| // All 3 output rows should carry the entry at _cur_offset=0 ("k0") |
| for (size_t i = 0; i < 3; ++i) { |
| EXPECT_FALSE(nullable_out.is_null_at(i)); |
| ASSERT_FALSE(key_col.is_null_at(i)); |
| StringRef k = key_col.get_nested_column().get_data_at(i); |
| EXPECT_EQ("k0", std::string(k.data, k.size)); |
| } |
| fn.process_close(); |
| } |
| |
| // Case 2: SQL NULL input — current_empty() is true → insert_many_defaults. |
| { |
| auto block = build_jsonb_input_block({{""}}); // empty string → SQL NULL |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_TRUE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_same_many_values(out_col, 2); |
| |
| ASSERT_EQ(2u, out_col->size()); |
| const auto& nullable_out = assert_cast<const ColumnNullable&>(*out_col); |
| EXPECT_TRUE(nullable_out.is_null_at(0)); |
| EXPECT_TRUE(nullable_out.is_null_at(1)); |
| fn.process_close(); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each_outer) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| // set_outer() correctly sets the is_outer flag |
| EXPECT_FALSE(fn.is_outer()); |
| fn.set_outer(); |
| EXPECT_TRUE(fn.is_outer()); |
| |
| // Normal object: outer flag does not affect KV expansion |
| { |
| auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), true); |
| ASSERT_EQ(2u, rows.size()); |
| EXPECT_EQ("a", rows[0].first); |
| EXPECT_EQ("\"foo\"", rows[0].second); |
| EXPECT_EQ("b", rows[1].first); |
| EXPECT_EQ("123", rows[1].second); |
| } |
| |
| // For NULL / empty-object / non-object inputs: current_empty() is true. |
| // The operator calls get_value() unconditionally when is_outer() — verify that |
| // get_value() inserts exactly one default (NULL) struct row in each case. |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| for (const char* input : {"", "{}", "[1,2,3]"}) { |
| auto block = build_jsonb_input_block({{input}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()) << "input: " << input; |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()) << "input: " << input; |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_value(out_col, 1); |
| ASSERT_EQ(1u, out_col->size()) << "input: " << input; |
| EXPECT_TRUE(out_col->is_null_at(0)) << "input: " << input; |
| fn.process_close(); |
| } |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each_text_outer) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| EXPECT_FALSE(fn.is_outer()); |
| fn.set_outer(); |
| EXPECT_TRUE(fn.is_outer()); |
| |
| // Normal object: text mode (strings unquoted), outer flag does not affect expansion |
| { |
| auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}}); |
| auto rows = run_json_each_fn(&fn, block.get(), false); |
| ASSERT_EQ(2u, rows.size()); |
| EXPECT_EQ("a", rows[0].first); |
| EXPECT_EQ("foo", rows[0].second); |
| EXPECT_EQ("b", rows[1].first); |
| EXPECT_EQ("123", rows[1].second); |
| } |
| |
| // NULL / empty-object / non-object → current_empty(), get_value() inserts one default row |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| for (const char* input : {"", "{}", "[1,2,3]"}) { |
| auto block = build_jsonb_input_block({{input}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()) << "input: " << input; |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()) << "input: " << input; |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_value(out_col, 1); |
| ASSERT_EQ(1u, out_col->size()) << "input: " << input; |
| EXPECT_TRUE(out_col->is_null_at(0)) << "input: " << input; |
| fn.process_close(); |
| } |
| } |
| |
| // Helper: build a one-column JSONB block with raw bytes, bypassing JSON parse. |
| static std::unique_ptr<Block> build_raw_jsonb_block( |
| const std::vector<std::pair<std::string, bool>>& entries) { |
| auto str_col = ColumnString::create(); |
| auto null_col = ColumnUInt8::create(); |
| for (const auto& [data, is_null] : entries) { |
| if (is_null) { |
| str_col->insert_default(); |
| null_col->insert_value(1); |
| } else { |
| str_col->insert_data(data.data(), data.size()); |
| null_col->insert_value(0); |
| } |
| } |
| auto col = ColumnNullable::create(std::move(str_col), std::move(null_col)); |
| auto block = Block::create_unique(); |
| block->insert({std::move(col), |
| make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_JSONB, false)), |
| "jval"}); |
| return block; |
| } |
| |
| // Corrupt JSONB binary input — hits checkAndCreateDocument failure branch in process_row. |
| TEST_F(TableFunctionTest, vjson_each_corrupt_jsonb_binary) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| // Garbage bytes marked as non-null — checkAndCreateDocument should fail, |
| // process_row should leave _cur_size=0 (current_empty() == true). |
| std::string garbage = "\xDE\xAD\xBE\xEF"; |
| auto block = build_raw_jsonb_block({{garbage, false}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| |
| // Directly verify internal state via BE_TEST-exposed members |
| EXPECT_TRUE(fn.current_empty()); |
| EXPECT_TRUE(!fn.test_kv_pairs_first()); |
| EXPECT_TRUE(!fn.test_kv_pairs_second()); |
| |
| fn.process_close(); |
| } |
| |
| // Corrupt JSONB + outer mode → get_value should insert exactly one default (NULL) row. |
| TEST_F(TableFunctionTest, vjson_each_corrupt_jsonb_outer) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_outer(); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| std::string garbage = "\x00\x01\x02\x03"; |
| auto block = build_raw_jsonb_block({{garbage, false}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_value(out_col, 1); |
| ASSERT_EQ(1U, out_col->size()); |
| EXPECT_TRUE(out_col->is_null_at(0)); // outer: one default NULL row |
| |
| fn.process_close(); |
| } |
| |
| // Corrupt JSONB in json_each_text mode — same behaviour. |
| TEST_F(TableFunctionTest, vjson_each_text_corrupt_jsonb_binary) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| std::string garbage = "\xFF\xFE\xFD"; |
| auto block = build_raw_jsonb_block({{garbage, false}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| |
| EXPECT_TRUE(fn.current_empty()); |
| EXPECT_TRUE(!fn.test_kv_pairs_first()); |
| EXPECT_TRUE(!fn.test_kv_pairs_second()); |
| |
| fn.process_close(); |
| } |
| |
| // Corrupt JSONB in json_each_text_outer mode should still emit one default row. |
| TEST_F(TableFunctionTest, vjson_each_text_corrupt_jsonb_outer) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_outer(); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| std::string garbage = "\xFA\xFB\xFC"; |
| auto block = build_raw_jsonb_block({{garbage, false}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_value(out_col, 1); |
| ASSERT_EQ(1U, out_col->size()); |
| EXPECT_TRUE(out_col->is_null_at(0)); |
| |
| fn.process_close(); |
| } |
| |
| // get_same_many_values at non-zero offset — verify it copies current offset entry, |
| // not the first entry. |
| TEST_F(TableFunctionTest, vjson_each_get_same_many_values_nonzero_offset) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| // Object with 3 keys: k0, k1, k2 |
| auto block = build_jsonb_input_block({{R"({"k0":"v0","k1":"v1","k2":"v2"})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| // Consume the first entry via get_value(max_step=1) to advance _cur_offset to 1 |
| { |
| auto tmp_col = struct_dt->create_column(); |
| int step = fn.get_value(tmp_col, 1); |
| EXPECT_EQ(1, step); |
| EXPECT_FALSE(fn.eos()); |
| } |
| |
| // Now _cur_offset == 1. get_same_many_values should replicate entry at offset 1 ("k1"). |
| auto out_col = struct_dt->create_column(); |
| fn.get_same_many_values(out_col, 3); |
| |
| const auto& nullable_out = assert_cast<const ColumnNullable&>(*out_col); |
| ASSERT_EQ(3U, nullable_out.size()); |
| const auto& struct_col = assert_cast<const ColumnStruct&>(nullable_out.get_nested_column()); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| for (size_t i = 0; i < 3; ++i) { |
| EXPECT_FALSE(nullable_out.is_null_at(i)); |
| ASSERT_FALSE(key_col.is_null_at(i)); |
| StringRef k = key_col.get_nested_column().get_data_at(i); |
| EXPECT_EQ("k1", std::string(k.data, k.size)) |
| << "Expected entry at offset 1, got '" << std::string(k.data, k.size) << "'"; |
| } |
| fn.process_close(); |
| } |
| |
| // Same test for json_each_text mode. |
| TEST_F(TableFunctionTest, vjson_each_text_get_same_many_values_nonzero_offset) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":"A","b":"B","c":"C"})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| // Advance offset past first entry |
| { |
| auto tmp_col = struct_dt->create_column(); |
| fn.get_value(tmp_col, 1); |
| } |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_same_many_values(out_col, 2); |
| |
| const auto& nullable_out = assert_cast<const ColumnNullable&>(*out_col); |
| ASSERT_EQ(2U, nullable_out.size()); |
| const auto& struct_col = assert_cast<const ColumnStruct&>(nullable_out.get_nested_column()); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| const auto& val_col = assert_cast<const ColumnNullable&>(struct_col.get_column(1)); |
| for (size_t i = 0; i < 2; ++i) { |
| StringRef k = key_col.get_nested_column().get_data_at(i); |
| EXPECT_EQ("b", std::string(k.data, k.size)); // offset 1 = "b" |
| StringRef v = val_col.get_nested_column().get_data_at(i); |
| EXPECT_EQ("B", std::string(v.data, v.size)); // text mode: unquoted |
| } |
| fn.process_close(); |
| } |
| |
| // process_close — directly verify private members are reset. |
| TEST_F(TableFunctionTest, vjson_each_process_close_internal_state) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":1,"b":2})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| |
| // Before close: members should be populated |
| EXPECT_TRUE(fn.test_json_column()); |
| EXPECT_TRUE(fn.test_kv_pairs_first()); |
| EXPECT_TRUE(fn.test_kv_pairs_second()); |
| EXPECT_FALSE(fn.current_empty()); |
| |
| fn.process_close(); |
| |
| // After close: all pointers null, _cur_size reset |
| EXPECT_TRUE(!fn.test_json_column()); |
| EXPECT_TRUE(!fn.test_kv_pairs_first()); |
| EXPECT_TRUE(!fn.test_kv_pairs_second()); |
| EXPECT_TRUE(fn.current_empty()); |
| } |
| |
| // process_row with _is_const — second call should skip re-parsing when _cur_size > 0. |
| // Verify by inspecting _kv_pairs: they should remain from the first call. |
| TEST_F(TableFunctionTest, vjson_each_process_row_const_column) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| // Build a const column (ColumnConst wrapping a single JSONB value) |
| JsonbWriter writer; |
| const std::string json_const_obj = R"({"x":10,"y":20})"; |
| ASSERT_TRUE(JsonbParser::parse(json_const_obj.data(), json_const_obj.size(), writer).ok()); |
| auto inner_str_col = ColumnString::create(); |
| auto inner_null_col = ColumnUInt8::create(); |
| inner_str_col->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
| inner_null_col->insert_value(0); |
| auto inner_nullable = |
| ColumnNullable::create(std::move(inner_str_col), std::move(inner_null_col)); |
| // Wrap as ColumnConst with 3 logical rows |
| auto const_col = ColumnConst::create(std::move(inner_nullable), 3); |
| |
| auto block = Block::create_unique(); |
| block->insert({std::move(const_col), |
| make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_JSONB, false)), |
| "jval"}); |
| |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| |
| // First process_row: parses and populates _kv_pairs |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| auto* kv_first_ptr = fn.test_kv_pairs_first().get(); |
| ASSERT_NE(nullptr, kv_first_ptr); |
| |
| // Reset offset to simulate next iteration (the operator resets between rows) |
| fn.reset(); |
| |
| // Second process_row on a different logical row: should skip reparsing (_is_const && _cur_size>0) |
| fn.process_row(1); |
| EXPECT_FALSE(fn.current_empty()); |
| // _kv_pairs.first pointer should be identical — no re-allocation |
| EXPECT_EQ(kv_first_ptr, fn.test_kv_pairs_first().get()); |
| |
| fn.process_close(); |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each_process_row_const_empty_object_column) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| JsonbWriter writer; |
| const std::string json_empty_obj = R"({})"; |
| ASSERT_TRUE(JsonbParser::parse(json_empty_obj.data(), json_empty_obj.size(), writer).ok()); |
| auto inner_str_col = ColumnString::create(); |
| auto inner_null_col = ColumnUInt8::create(); |
| inner_str_col->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
| inner_null_col->insert_value(0); |
| auto inner_nullable = |
| ColumnNullable::create(std::move(inner_str_col), std::move(inner_null_col)); |
| auto const_col = ColumnConst::create(std::move(inner_nullable), 2); |
| |
| auto block = Block::create_unique(); |
| block->insert({std::move(const_col), |
| make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_JSONB, false)), |
| "jval"}); |
| |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()); |
| EXPECT_TRUE(!fn.test_kv_pairs_first()); |
| EXPECT_TRUE(!fn.test_kv_pairs_second()); |
| |
| fn.reset(); |
| fn.process_row(1); |
| EXPECT_TRUE(fn.current_empty()); |
| EXPECT_TRUE(!fn.test_kv_pairs_first()); |
| EXPECT_TRUE(!fn.test_kv_pairs_second()); |
| |
| fn.process_close(); |
| } |
| |
| // get_value on current_empty — inserts exactly one default and returns 1. |
| TEST_F(TableFunctionTest, vjson_each_get_value_current_empty) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| // Empty object → current_empty |
| auto block = build_jsonb_input_block({{"{}"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| EXPECT_TRUE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| int step = fn.get_value(out_col, 5); // max_step ignored when empty |
| EXPECT_EQ(1, step); |
| ASSERT_EQ(1U, out_col->size()); |
| EXPECT_TRUE(out_col->is_null_at(0)); // default row is NULL struct |
| EXPECT_TRUE(fn.eos()); |
| |
| fn.process_close(); |
| } |
| |
| // get_value with max_step > _cur_size — clamped to actual size. |
| TEST_F(TableFunctionTest, vjson_each_get_value_max_step_clamped) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":1,"b":2})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| int step = fn.get_value(out_col, 100); // request 100, only 2 available |
| EXPECT_EQ(2, step); |
| ASSERT_EQ(2U, out_col->size()); |
| EXPECT_TRUE(fn.eos()); |
| |
| fn.process_close(); |
| } |
| |
| TEST_F(TableFunctionTest, vjson_each_get_value_zero_max_step) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| fn.set_nullable(); |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = |
| make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt})); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":1,"b":2})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| auto out_col = struct_dt->create_column(); |
| int step = fn.get_value(out_col, 0); |
| EXPECT_EQ(0, step); |
| EXPECT_EQ(0U, out_col->size()); |
| EXPECT_FALSE(fn.eos()); |
| |
| fn.process_close(); |
| } |
| |
| // Verify _kv_pairs content directly after process_row (json_each mode) — |
| // value column should contain JSONB binary, not JSON text. |
| TEST_F(TableFunctionTest, vjson_each_kv_pairs_jsonb_binary) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"k":"hello"})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| // Inspect key column |
| const auto& key_col = assert_cast<const ColumnNullable&>(*fn.test_kv_pairs_first()); |
| ASSERT_EQ(1U, key_col.size()); |
| ASSERT_FALSE(key_col.is_null_at(0)); |
| StringRef key = key_col.get_nested_column().get_data_at(0); |
| EXPECT_EQ("k", std::string(key.data, key.size)); |
| |
| // Inspect value column — should be valid JSONB binary, not plain text |
| const auto& val_col = assert_cast<const ColumnNullable&>(*fn.test_kv_pairs_second()); |
| ASSERT_EQ(1U, val_col.size()); |
| ASSERT_FALSE(val_col.is_null_at(0)); |
| StringRef val_raw = val_col.get_nested_column().get_data_at(0); |
| // Verify it's valid JSONB by parsing it back |
| const JsonbDocument* doc = nullptr; |
| ASSERT_TRUE(JsonbDocument::checkAndCreateDocument(val_raw.data, val_raw.size, &doc).ok()); |
| ASSERT_NE(nullptr, doc); |
| ASSERT_NE(nullptr, doc->getValue()); |
| EXPECT_TRUE(doc->getValue()->isString()); |
| |
| fn.process_close(); |
| } |
| |
| // Verify _kv_pairs content directly after process_row (json_each_text mode) — |
| // string values should be raw blob content (unquoted), not JSONB binary. |
| TEST_F(TableFunctionTest, vjson_each_text_kv_pairs_plain_text) { |
| init_expr_context(1); |
| VJsonEachTextTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"k":"hello","n":42})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| const auto& val_col = assert_cast<const ColumnNullable&>(*fn.test_kv_pairs_second()); |
| ASSERT_EQ(2U, val_col.size()); |
| |
| // Find the entries (order depends on JSONB iteration) |
| std::map<std::string, std::string> kv; |
| const auto& key_col = assert_cast<const ColumnNullable&>(*fn.test_kv_pairs_first()); |
| for (size_t i = 0; i < 2; ++i) { |
| StringRef kr = key_col.get_nested_column().get_data_at(i); |
| StringRef vr = val_col.get_nested_column().get_data_at(i); |
| kv[std::string(kr.data, kr.size)] = std::string(vr.data, vr.size); |
| } |
| // Text mode: string "hello" unquoted, number "42" as plain text |
| EXPECT_EQ("hello", kv["k"]); |
| EXPECT_EQ("42", kv["n"]); |
| |
| fn.process_close(); |
| } |
| |
| // Verify _kv_pairs for JSON null value — should produce SQL NULL (is_null_at == true). |
| TEST_F(TableFunctionTest, vjson_each_kv_pairs_null_value) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"k":null})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| const auto& val_col = assert_cast<const ColumnNullable&>(*fn.test_kv_pairs_second()); |
| ASSERT_EQ(1U, val_col.size()); |
| EXPECT_TRUE(val_col.is_null_at(0)); // JSON null → SQL NULL via insert_default |
| |
| fn.process_close(); |
| } |
| |
| // forward() and eos() interaction — test the base class forward logic |
| // through the json_each function. |
| TEST_F(TableFunctionTest, vjson_each_forward_eos) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":1,"b":2})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| ASSERT_FALSE(fn.current_empty()); |
| |
| EXPECT_FALSE(fn.eos()); |
| fn.forward(1); // offset 0 → 1 |
| EXPECT_FALSE(fn.eos()); |
| fn.forward(1); // offset 1 → 2, == _cur_size → eos |
| EXPECT_TRUE(fn.eos()); |
| |
| fn.process_close(); |
| } |
| |
| // Non-nullable get_value path (without set_nullable) — struct_col directly, no ColumnNullable wrapper. |
| TEST_F(TableFunctionTest, vjson_each_get_value_non_nullable) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| // Intentionally NOT calling fn.set_nullable() |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| // Non-nullable struct type — no wrapping Nullable |
| DataTypePtr struct_dt = std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt}); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"a":1})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| |
| auto out_col = struct_dt->create_column(); |
| int step = fn.get_value(out_col, 10); |
| EXPECT_EQ(1, step); |
| ASSERT_EQ(1U, out_col->size()); |
| |
| // Directly a ColumnStruct, not wrapped in ColumnNullable |
| const auto& struct_col = assert_cast<const ColumnStruct&>(*out_col); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| StringRef k = key_col.get_nested_column().get_data_at(0); |
| EXPECT_EQ("a", std::string(k.data, k.size)); |
| |
| fn.process_close(); |
| } |
| |
| // Non-nullable get_same_many_values path. |
| TEST_F(TableFunctionTest, vjson_each_get_same_many_values_non_nullable) { |
| init_expr_context(1); |
| VJsonEachTableFn fn; |
| fn.set_expr_context(_ctx); |
| // NOT calling fn.set_nullable() |
| |
| DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type( |
| doris::PrimitiveType::TYPE_VARCHAR, false)); |
| DataTypePtr val_dt = make_nullable( |
| DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false)); |
| DataTypePtr struct_dt = std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt}); |
| |
| TQueryOptions q_opts; |
| TQueryGlobals q_globals; |
| RuntimeState rs(q_opts, q_globals); |
| |
| auto block = build_jsonb_input_block({{R"({"x":1})"}}); |
| ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()); |
| fn.process_row(0); |
| |
| auto out_col = struct_dt->create_column(); |
| fn.get_same_many_values(out_col, 2); |
| ASSERT_EQ(2U, out_col->size()); |
| |
| const auto& struct_col = assert_cast<const ColumnStruct&>(*out_col); |
| const auto& key_col = assert_cast<const ColumnNullable&>(struct_col.get_column(0)); |
| for (size_t i = 0; i < 2; ++i) { |
| StringRef k = key_col.get_nested_column().get_data_at(i); |
| EXPECT_EQ("x", std::string(k.data, k.size)); |
| } |
| |
| fn.process_close(); |
| } |
| |
| } // namespace doris |