|  | // Licensed to the Apache Software Foundation (ASF) under one | 
|  | // or more contributor license agreements.  See the NOTICE file | 
|  | // distributed with this work for additional information | 
|  | // regarding copyright ownership.  The ASF licenses this file | 
|  | // to you under the Apache License, Version 2.0 (the | 
|  | // "License"); you may not use this file except in compliance | 
|  | // with the License.  You may obtain a copy of the License at | 
|  | // | 
|  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, | 
|  | // software distributed under the License is distributed on an | 
|  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | // KIND, either express or implied.  See the License for the | 
|  | // specific language governing permissions and limitations | 
|  | // under the License. | 
|  |  | 
|  | #include "olap/inverted_index_parser.h" | 
|  |  | 
|  | #include <gtest/gtest.h> | 
|  |  | 
|  | #include <map> | 
|  | #include <string> | 
|  |  | 
|  | namespace doris { | 
|  |  | 
|  | class InvertedIndexParserTest : public testing::Test { | 
|  | public: | 
|  | void SetUp() override {} | 
|  | void TearDown() override {} | 
|  | }; | 
|  |  | 
|  | // Test inverted_index_parser_type_to_string function | 
|  | TEST_F(InvertedIndexParserTest, TestParserTypeToString) { | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_NONE), | 
|  | INVERTED_INDEX_PARSER_NONE); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_STANDARD), | 
|  | INVERTED_INDEX_PARSER_STANDARD); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNICODE), | 
|  | INVERTED_INDEX_PARSER_UNICODE); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ENGLISH), | 
|  | INVERTED_INDEX_PARSER_ENGLISH); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_CHINESE), | 
|  | INVERTED_INDEX_PARSER_CHINESE); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_ICU), | 
|  | INVERTED_INDEX_PARSER_ICU); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_BASIC), | 
|  | INVERTED_INDEX_PARSER_BASIC); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_IK), | 
|  | INVERTED_INDEX_PARSER_IK); | 
|  | EXPECT_EQ(inverted_index_parser_type_to_string(InvertedIndexParserType::PARSER_UNKNOWN), | 
|  | INVERTED_INDEX_PARSER_UNKNOWN); | 
|  | } | 
|  |  | 
|  | // Test get_inverted_index_parser_type_from_string function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserTypeFromString) { | 
|  | // Test all valid parser types (case insensitive) | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("none"), | 
|  | InvertedIndexParserType::PARSER_NONE); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("NONE"), | 
|  | InvertedIndexParserType::PARSER_NONE); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("standard"), | 
|  | InvertedIndexParserType::PARSER_STANDARD); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("Standard"), | 
|  | InvertedIndexParserType::PARSER_STANDARD); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("unicode"), | 
|  | InvertedIndexParserType::PARSER_UNICODE); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("english"), | 
|  | InvertedIndexParserType::PARSER_ENGLISH); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("chinese"), | 
|  | InvertedIndexParserType::PARSER_CHINESE); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("icu"), | 
|  | InvertedIndexParserType::PARSER_ICU); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("basic"), | 
|  | InvertedIndexParserType::PARSER_BASIC); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("ik"), InvertedIndexParserType::PARSER_IK); | 
|  |  | 
|  | // Test unknown parser type | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string("invalid"), | 
|  | InvertedIndexParserType::PARSER_UNKNOWN); | 
|  | EXPECT_EQ(get_inverted_index_parser_type_from_string(""), | 
|  | InvertedIndexParserType::PARSER_UNKNOWN); | 
|  | } | 
|  |  | 
|  | // Test get_parser_string_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserStringFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_string_from_properties(properties), INVERTED_INDEX_PARSER_NONE); | 
|  |  | 
|  | // Test with parser key present | 
|  | properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH; | 
|  | EXPECT_EQ(get_parser_string_from_properties(properties), INVERTED_INDEX_PARSER_ENGLISH); | 
|  |  | 
|  | // Test with different parser value | 
|  | properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_CHINESE; | 
|  | EXPECT_EQ(get_parser_string_from_properties(properties), INVERTED_INDEX_PARSER_CHINESE); | 
|  | } | 
|  |  | 
|  | // Test get_parser_mode_string_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserModeStringFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_mode_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_COARSE_GRANULARITY); | 
|  |  | 
|  | // Test with parser_mode key present | 
|  | properties[INVERTED_INDEX_PARSER_MODE_KEY] = INVERTED_INDEX_PARSER_FINE_GRANULARITY; | 
|  | EXPECT_EQ(get_parser_mode_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_FINE_GRANULARITY); | 
|  |  | 
|  | // Test with IK parser (should return smart mode when no mode specified) | 
|  | properties.clear(); | 
|  | properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_IK; | 
|  | EXPECT_EQ(get_parser_mode_string_from_properties(properties), INVERTED_INDEX_PARSER_SMART); | 
|  |  | 
|  | // Test with non-IK parser (should return coarse granularity) | 
|  | properties[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH; | 
|  | EXPECT_EQ(get_parser_mode_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_COARSE_GRANULARITY); | 
|  | } | 
|  |  | 
|  | // Test get_parser_phrase_support_string_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserPhraseSupportStringFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO); | 
|  |  | 
|  | // Test with phrase support key present | 
|  | properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] = INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES; | 
|  | EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES); | 
|  |  | 
|  | properties[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] = INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO; | 
|  | EXPECT_EQ(get_parser_phrase_support_string_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO); | 
|  | } | 
|  |  | 
|  | // Test get_parser_char_filter_map_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserCharFilterMapFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | CharFilterMap result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_TRUE(result.empty()); | 
|  |  | 
|  | // Test with missing char_filter_type | 
|  | properties["some_key"] = "some_value"; | 
|  | result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_TRUE(result.empty()); | 
|  |  | 
|  | // Test with valid char_replace filter but missing pattern | 
|  | properties.clear(); | 
|  | properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "char_replace"; | 
|  | result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_TRUE(result.empty()); | 
|  |  | 
|  | // Test with valid char_replace filter and pattern | 
|  | properties[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._"; | 
|  | result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_EQ(result.size(), 3); | 
|  | EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], "char_replace"); | 
|  | EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], "._"); | 
|  | EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], " "); // default replacement | 
|  |  | 
|  | // Test with custom replacement | 
|  | properties[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = "-"; | 
|  | result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_EQ(result.size(), 3); | 
|  | EXPECT_EQ(result[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], "-"); | 
|  |  | 
|  | // Test with invalid filter type | 
|  | properties.clear(); | 
|  | properties[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "invalid_type"; | 
|  | result = get_parser_char_filter_map_from_properties(properties); | 
|  | EXPECT_TRUE(result.empty()); | 
|  | } | 
|  |  | 
|  | // Test get_parser_ignore_above_value_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserIgnoreAboveValueFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties), | 
|  | INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE); | 
|  |  | 
|  | // Test with ignore_above key present | 
|  | properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "512"; | 
|  | EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties), "512"); | 
|  |  | 
|  | properties[INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY] = "1024"; | 
|  | EXPECT_EQ(get_parser_ignore_above_value_from_properties(properties), "1024"); | 
|  | } | 
|  |  | 
|  | // Test get_parser_lowercase_from_properties function (template function) | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserLowercaseFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties (default template parameter false) | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties(properties), ""); | 
|  |  | 
|  | // Test with empty properties (template parameter true) | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), INVERTED_INDEX_PARSER_TRUE); | 
|  |  | 
|  | // Test with lower_case key present | 
|  | properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = INVERTED_INDEX_PARSER_TRUE; | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties(properties), INVERTED_INDEX_PARSER_TRUE); | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), INVERTED_INDEX_PARSER_TRUE); | 
|  |  | 
|  | properties[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = INVERTED_INDEX_PARSER_FALSE; | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties(properties), INVERTED_INDEX_PARSER_FALSE); | 
|  | EXPECT_EQ(get_parser_lowercase_from_properties<true>(properties), INVERTED_INDEX_PARSER_FALSE); | 
|  | } | 
|  |  | 
|  | // Test get_parser_stopwords_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserStopwordsFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_stopwords_from_properties(properties), ""); | 
|  |  | 
|  | // Test with stopwords key present | 
|  | properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = "a,an,the"; | 
|  | EXPECT_EQ(get_parser_stopwords_from_properties(properties), "a,an,the"); | 
|  |  | 
|  | properties[INVERTED_INDEX_PARSER_STOPWORDS_KEY] = ""; | 
|  | EXPECT_EQ(get_parser_stopwords_from_properties(properties), ""); | 
|  | } | 
|  |  | 
|  | // Test get_parser_dict_compression_from_properties function | 
|  | TEST_F(InvertedIndexParserTest, TestGetParserDictCompressionFromProperties) { | 
|  | std::map<std::string, std::string> properties; | 
|  |  | 
|  | // Test with empty properties | 
|  | EXPECT_EQ(get_parser_dict_compression_from_properties(properties), ""); | 
|  |  | 
|  | // Test with dict_compression key present | 
|  | properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "true"; | 
|  | EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "true"); | 
|  |  | 
|  | properties[INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY] = "false"; | 
|  | EXPECT_EQ(get_parser_dict_compression_from_properties(properties), "false"); | 
|  | } | 
|  |  | 
|  | // Test InvertedIndexCtx structure | 
|  | TEST_F(InvertedIndexParserTest, TestInvertedIndexCtxStructure) { | 
|  | InvertedIndexCtx ctx; | 
|  |  | 
|  | // Test default initialization | 
|  | ctx.parser_type = InvertedIndexParserType::PARSER_ENGLISH; | 
|  | ctx.parser_mode = INVERTED_INDEX_PARSER_FINE_GRANULARITY; | 
|  | ctx.lower_case = INVERTED_INDEX_PARSER_TRUE; | 
|  | ctx.stop_words = "a,an,the"; | 
|  | ctx.analyzer = nullptr; | 
|  |  | 
|  | EXPECT_EQ(ctx.parser_type, InvertedIndexParserType::PARSER_ENGLISH); | 
|  | EXPECT_EQ(ctx.parser_mode, INVERTED_INDEX_PARSER_FINE_GRANULARITY); | 
|  | EXPECT_EQ(ctx.lower_case, INVERTED_INDEX_PARSER_TRUE); | 
|  | EXPECT_EQ(ctx.stop_words, "a,an,the"); | 
|  | EXPECT_EQ(ctx.analyzer, nullptr); | 
|  |  | 
|  | // Test char_filter_map | 
|  | ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE] = "char_replace"; | 
|  | ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN] = "._"; | 
|  | ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT] = " "; | 
|  |  | 
|  | EXPECT_EQ(ctx.char_filter_map.size(), 3); | 
|  | EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE], "char_replace"); | 
|  | EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN], "._"); | 
|  | EXPECT_EQ(ctx.char_filter_map[INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT], " "); | 
|  | } | 
|  |  | 
|  | // Test constants | 
|  | TEST_F(InvertedIndexParserTest, TestConstants) { | 
|  | // Test parser constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_UNKNOWN, "unknown"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_NONE, "none"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_STANDARD, "standard"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_UNICODE, "unicode"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_ENGLISH, "english"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_CHINESE, "chinese"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_ICU, "icu"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_BASIC, "basic"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_IK, "ik"); | 
|  |  | 
|  | // Test mode constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_FINE_GRANULARITY, "fine_grained"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_COARSE_GRANULARITY, "coarse_grained"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_MAX_WORD, "ik_max_word"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_SMART, "ik_smart"); | 
|  |  | 
|  | // Test boolean constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_TRUE, "true"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_FALSE, "false"); | 
|  |  | 
|  | // Test phrase support constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES, "true"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO, "false"); | 
|  |  | 
|  | // Test key constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_KEY, "parser"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_MODE_KEY, "parser_mode"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY, "support_phrase"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_LOWERCASE_KEY, "lower_case"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_STOPWORDS_KEY, "stopwords"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY, "dict_compression"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY, "ignore_above"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE, "256"); | 
|  |  | 
|  | // Test char filter constants | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE, "char_filter_type"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN, "char_filter_pattern"); | 
|  | EXPECT_EQ(INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT, "char_filter_replacement"); | 
|  | } | 
|  |  | 
|  | } // namespace doris |