|  | // Licensed to the Apache Software Foundation (ASF) under one | 
|  | // or more contributor license agreements.  See the NOTICE file | 
|  | // distributed with this work for additional information | 
|  | // regarding copyright ownership.  The ASF licenses this file | 
|  | // to you under the Apache License, Version 2.0 (the | 
|  | // "License"); you may not use this file except in compliance | 
|  | // with the License.  You may obtain a copy of the License at | 
|  | // | 
|  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, | 
|  | // software distributed under the License is distributed on an | 
|  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | // KIND, either express or implied.  See the License for the | 
|  | // specific language governing permissions and limitations | 
|  | // under the License. | 
|  |  | 
|  | #pragma once | 
|  |  | 
|  | #include <map> | 
|  | #include <memory> | 
|  | #include <string> | 
|  |  | 
|  | #include "util/debug_points.h" | 
|  |  | 
|  | namespace lucene { | 
|  | namespace analysis { | 
|  | class Analyzer; | 
|  | } | 
|  | } // namespace lucene | 
|  |  | 
|  | namespace doris { | 
|  |  | 
|  | enum class InvertedIndexParserType { | 
|  | PARSER_UNKNOWN = 0, | 
|  | PARSER_NONE = 1, | 
|  | PARSER_STANDARD = 2, | 
|  | PARSER_ENGLISH = 3, | 
|  | PARSER_CHINESE = 4, | 
|  | PARSER_UNICODE = 5, | 
|  | PARSER_ICU = 6, | 
|  | PARSER_BASIC = 7, | 
|  | PARSER_IK = 8 | 
|  | }; | 
|  |  | 
|  | using CharFilterMap = std::map<std::string, std::string>; | 
|  |  | 
|  | struct InvertedIndexCtx { | 
|  | std::string custom_analyzer; | 
|  | InvertedIndexParserType parser_type; | 
|  | std::string parser_mode; | 
|  | std::string support_phrase; | 
|  | CharFilterMap char_filter_map; | 
|  | std::string lower_case; | 
|  | std::string stop_words; | 
|  | lucene::analysis::Analyzer* analyzer = nullptr; | 
|  | }; | 
|  |  | 
|  | using InvertedIndexCtxSPtr = std::shared_ptr<InvertedIndexCtx>; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_TRUE = "true"; | 
|  | const std::string INVERTED_INDEX_PARSER_FALSE = "false"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_MODE_KEY = "parser_mode"; | 
|  | const std::string INVERTED_INDEX_PARSER_FINE_GRANULARITY = "fine_grained"; | 
|  | const std::string INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained"; | 
|  | const std::string INVERTED_INDEX_PARSER_MAX_WORD = "ik_max_word"; | 
|  | const std::string INVERTED_INDEX_PARSER_SMART = "ik_smart"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_KEY = "parser"; | 
|  | const std::string INVERTED_INDEX_PARSER_KEY_ALIAS = "built_in_analyzer"; | 
|  | const std::string INVERTED_INDEX_PARSER_UNKNOWN = "unknown"; | 
|  | const std::string INVERTED_INDEX_PARSER_NONE = "none"; | 
|  | const std::string INVERTED_INDEX_PARSER_STANDARD = "standard"; | 
|  | const std::string INVERTED_INDEX_PARSER_UNICODE = "unicode"; | 
|  | const std::string INVERTED_INDEX_PARSER_ENGLISH = "english"; | 
|  | const std::string INVERTED_INDEX_PARSER_CHINESE = "chinese"; | 
|  | const std::string INVERTED_INDEX_PARSER_ICU = "icu"; | 
|  | const std::string INVERTED_INDEX_PARSER_BASIC = "basic"; | 
|  | const std::string INVERTED_INDEX_PARSER_IK = "ik"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY = "support_phrase"; | 
|  | const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES = "true"; | 
|  | const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO = "false"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE = "char_filter_type"; | 
|  | const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN = "char_filter_pattern"; | 
|  | const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT = "char_filter_replacement"; | 
|  | const std::string INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = "ignore_above"; | 
|  | const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE = "256"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY = "dict_compression"; | 
|  |  | 
|  | const std::string INVERTED_INDEX_CUSTOM_ANALYZER_KEY = "analyzer"; | 
|  |  | 
|  | std::string inverted_index_parser_type_to_string(InvertedIndexParserType parser_type); | 
|  |  | 
|  | InvertedIndexParserType get_inverted_index_parser_type_from_string(const std::string& parser_str); | 
|  |  | 
|  | std::string get_parser_string_from_properties(const std::map<std::string, std::string>& properties); | 
|  | std::string get_parser_mode_string_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  | std::string get_parser_phrase_support_string_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | CharFilterMap get_parser_char_filter_map_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | // get parser ignore_above value from properties | 
|  | std::string get_parser_ignore_above_value_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | template <bool ReturnTrue = false> | 
|  | std::string get_parser_lowercase_from_properties( | 
|  | const std::map<std::string, std::string>& properties) { | 
|  | DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties", { return ""; }) | 
|  |  | 
|  | if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != properties.end()) { | 
|  | return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY); | 
|  | } else { | 
|  | if constexpr (ReturnTrue) { | 
|  | return INVERTED_INDEX_PARSER_TRUE; | 
|  | } else { | 
|  | return ""; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | std::string get_parser_stopwords_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | std::string get_parser_dict_compression_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | std::string get_custom_analyzer_string_from_properties( | 
|  | const std::map<std::string, std::string>& properties); | 
|  |  | 
|  | } // namespace doris |