blob: 12dae95f6fc79f5a123547ac6f9e3c1ed2dbee70 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/json/json_parser.h"
#include <gtest/gtest.h>
#include <vector>
#include "vec/common/string_ref.h"
using doris::vectorized::JSONDataParser;
using doris::vectorized::SimdJSONParser;
using doris::vectorized::ParseConfig;
TEST(JsonParserTest, ParseSimpleTypes) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
// int
auto result = parser.parse("123", 3, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
// double
result = parser.parse("1.23", 4, config);
ASSERT_TRUE(result.has_value());
// bool
result = parser.parse("true", 4, config);
ASSERT_TRUE(result.has_value());
// null
result = parser.parse("null", 4, config);
ASSERT_TRUE(result.has_value());
// string
result = parser.parse("\"abc\"", 5, config);
ASSERT_TRUE(result.has_value());
}
TEST(JsonParserTest, ParseObjectAndArray) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
// Object
auto result = parser.parse(R"({"a":1,"b":2})", 13, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 2);
// Array
result = parser.parse("[1,2,3]", 7, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
}
TEST(JsonParserTest, ParseMultiLevelNestedArray) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
auto result = parser.parse("[[1,2],[3,4]]", 13, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_ARRAY);
result = parser.parse("[[[1],[2]],[[3],[4]]]", 21, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_ARRAY);
result = parser.parse("[[1,2],[3],[4,5,6]]", 19, config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
// Test complex nested structure
config.enable_flatten_nested = false;
std::string json1 = R"({"a":[[1,2],[3],[4,5,6]]})";
// multi level nested array in object
result = parser.parse(json1.c_str(), json1.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_ARRAY);
std::string json = R"({"nested": [{"a": [1,2,3]}]})";
// result should be jsonbField
result = parser.parse(json.c_str(), json.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB);
// multi level nested array in nested array object
std::string json2 = R"({"a":[{"b":[[1,2,3]]}]})";
result = parser.parse(json2.c_str(), json2.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB);
// test flatten nested
config.enable_flatten_nested = true;
EXPECT_ANY_THROW(parser.parse(json.c_str(), json.size(), config));
// test flatten nested with multi level nested array
// no throw because it is not nested object array
result = parser.parse(json1.c_str(), json1.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_EQ(result->values.size(), 1);
EXPECT_EQ(result->paths.size(), 1);
EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_ARRAY);
EXPECT_ANY_THROW(parser.parse(json2.c_str(), json2.size(), config));
}
TEST(JsonParserTest, ParseNestedAndFlatten) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
std::string json = R"({"a":[{"b":1},{"b":2}]})";
auto result = parser.parse(json.c_str(), json.size(), config);
ASSERT_TRUE(result.has_value());
EXPECT_GT(result->values.size(), 0);
config.enable_flatten_nested = false;
std::string json2 = R"({"a":[{"b":1},{"b":2}]})";
result = parser.parse(json2.c_str(), json2.size(), config);
ASSERT_TRUE(result.has_value());
}
TEST(JsonParserTest, ParseInvalidJson) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
auto result = parser.parse("{a:1}", 5, config);
ASSERT_FALSE(result.has_value());
result = parser.parse("", 0, config);
ASSERT_FALSE(result.has_value());
}
TEST(JsonParserTest, ParseCornerCases) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
auto result = parser.parse("{}", 2, config);
ASSERT_TRUE(result.has_value());
result = parser.parse("[]", 2, config);
ASSERT_TRUE(result.has_value());
result = parser.parse(R"({"a":"\n\t"})", 12, config);
ASSERT_TRUE(result.has_value());
}
// Test cases for the selected code functionality
TEST(JsonParserTest, TestIsPrefixFunction) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
// Test is_prefix functionality through nested path parsing
// This tests the is_prefix function used in checkAmbiguousStructure
// Test case 1: Simple nested paths that should not be ambiguous
std::string json1 = R"({"a": [{"b": 1}, {"b": 2}]})";
auto result1 = parser.parse(json1.c_str(), json1.size(), config);
ASSERT_TRUE(result1.has_value());
// Test case 2: More complex nested paths
std::string json2 = R"({"a": [{"b": {"c": 1}}, {"b": {"c": 2}}]})";
auto result2 = parser.parse(json2.c_str(), json2.size(), config);
ASSERT_TRUE(result2.has_value());
// Test case 3: Deep nested structure
std::string json3 = R"({"level1": {"level2": [{"level3": {"level4": 1}}]}})";
auto result3 = parser.parse(json3.c_str(), json3.size(), config);
ASSERT_TRUE(result3.has_value());
}
TEST(JsonParserTest, TestAmbiguousStructureDetection) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
// Test case 1: Arrays with different sizes in nested structure
// This should trigger the array size mismatch exception
std::string json1 = R"([{"b": [1, 2]}, {"b": [1, 2, 3]}])";
EXPECT_ANY_THROW(parser.parse(json1.c_str(), json1.size(), config));
// Test case 2: Arrays with same sizes should not throw
std::string json2 = R"([{"b": [1, 2]}, {"b": [3, 4]}])";
EXPECT_ANY_THROW(parser.parse(json2.c_str(), json2.size(), config));
// Test case 3: More complex nested array size mismatch
std::string json3 = R"({"nested": [{"arr": [[1, 2], [3]]}, {"arr": [[1, 2], [3, 4]]}]})";
EXPECT_ANY_THROW(parser.parse(json3.c_str(), json3.size(), config));
// Test case 4: Ambiguous structure with prefix paths
// This should trigger the ambiguous structure exception
std::string json4 = R"([{"a": {"c": 1}}, {"a": 2}])";
EXPECT_ANY_THROW(parser.parse(json4.c_str(), json4.size(), config));
}
TEST(JsonParserTest, TestNestedArrayHandling) {
JSONDataParser<SimdJSONParser> parser;
ParseConfig config;
config.enable_flatten_nested = true;
// Test case 1: Simple nested array handling
std::string json1 = R"([{"b": 1}, {"c": 2}])";
auto result1 = parser.parse(json1.c_str(), json1.size(), config);
ASSERT_TRUE(result1.has_value());
EXPECT_GT(result1->values.size(), 0);
// Test case 2: Multi-level nested array
std::string json2 = R"([{"a": {"b": 1}}, {"a": {"b": 2}}])";
auto result2 = parser.parse(json2.c_str(), json2.size(), config);
ASSERT_TRUE(result2.has_value());
EXPECT_GT(result2->values.size(), 0);
}
TEST(JsonParserTest, TestNestedArrayWithDifferentConfigs) {
JSONDataParser<SimdJSONParser> parser;
// Test with flatten_nested = false
ParseConfig config1;
config1.enable_flatten_nested = false;
std::string json1 = R"([{"b": [1, 2]}, {"b": [3, 4]}])";
auto result1 = parser.parse(json1.c_str(), json1.size(), config1);
ASSERT_TRUE(result1.has_value());
EXPECT_EQ(result1->values.size(), 1);
EXPECT_EQ(result1->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB);
// Test with flatten_nested = true
ParseConfig config2;
config2.enable_flatten_nested = true;
EXPECT_ANY_THROW(parser.parse(json1.c_str(), json1.size(), config2));
}
// Test case for directly calling handleNewPath to cover the if (!nested_key.empty()) branch
TEST(JsonParserTest, TestHandleNewPathDirectCall) {
JSONDataParser<SimdJSONParser> parser;
// Create a ParseArrayContext
JSONDataParser<SimdJSONParser>::ParseArrayContext ctx;
ctx.current_size = 1;
ctx.total_size = 2;
ctx.has_nested_in_flatten = true;
ctx.is_top_array = true;
// Create a path with nested parts
doris::vectorized::PathInData::Parts path;
// Create a nested part (is_nested = true)
path.emplace_back("nested_key", true, 0); // is_nested = true
path.emplace_back("inner_key", false, 0); // is_nested = false
// Create a Field with array type (required for getNameOfNested to return non-empty)
doris::vectorized::Array array_data;
array_data.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(1));
array_data.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(2));
doris::vectorized::Field value =
doris::vectorized::Field::create_field<doris::TYPE_ARRAY>(std::move(array_data));
// Create hash for the path
UInt128 hash = doris::vectorized::PathInData::get_parts_hash(path);
// Call handleNewPath directly
// This should trigger the if (!nested_key.empty()) branch
parser.handleNewPath(hash, path, value, ctx);
// Verify that the nested_sizes_by_key was populated
EXPECT_EQ(ctx.nested_sizes_by_key.size(), 1);
// Verify that the arrays_by_path was populated
EXPECT_EQ(ctx.arrays_by_path.size(), 1);
EXPECT_TRUE(ctx.arrays_by_path.find(hash) != ctx.arrays_by_path.end());
}
// Test case for testing the else branch in handleNewPath (when nested_sizes is not empty)
TEST(JsonParserTest, TestHandleNewPathElseBranch) {
JSONDataParser<SimdJSONParser> parser;
// Create a ParseArrayContext
JSONDataParser<SimdJSONParser>::ParseArrayContext ctx;
ctx.current_size = 2; // Start with size 2
ctx.total_size = 3;
ctx.has_nested_in_flatten = true;
ctx.is_top_array = true;
// Create a path with nested parts
doris::vectorized::PathInData::Parts path;
path.emplace_back("nested_key", true, 0);
path.emplace_back("inner_key", false, 0);
// Create a Field with array type
doris::vectorized::Array array_data;
array_data.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(1));
array_data.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(2));
doris::vectorized::Field value =
doris::vectorized::Field::create_field<doris::TYPE_ARRAY>(std::move(array_data));
// Create hash for the path
UInt128 hash = doris::vectorized::PathInData::get_parts_hash(path);
// First call to populate nested_sizes_by_key
parser.handleNewPath(hash, path, value, ctx);
// Verify nested_sizes_by_key was populated
EXPECT_EQ(ctx.nested_sizes_by_key.at(doris::StringRef("nested_key")).size(), 3);
EXPECT_EQ(ctx.nested_sizes_by_key.at(doris::StringRef("nested_key"))[0], 0);
// Create another array with same size
doris::vectorized::Array array_data2;
array_data2.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(3));
array_data2.push_back(doris::vectorized::Field::create_field<doris::TYPE_INT>(4));
doris::vectorized::Field value2 =
doris::vectorized::Field::create_field<doris::TYPE_ARRAY>(std::move(array_data2));
// Second call should trigger the else branch (nested_sizes is not empty)
ctx.is_top_array = false;
ctx.has_nested_in_flatten = false;
parser.handleNewPath(hash, path, value2, ctx);
// Verify nested_sizes_by_key was updated
EXPECT_EQ(ctx.nested_sizes_by_key.at(doris::StringRef("nested_key")).size(), 3);
EXPECT_EQ(ctx.nested_sizes_by_key.at(doris::StringRef("nested_key"))[1], 0);
}