blob: 075a4f8e543089ce4f3943b408ed2c6273e76f37 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include "util/debug_points.h"
namespace lucene {
namespace analysis {
class Analyzer;
}
} // namespace lucene
namespace doris {
enum class InvertedIndexParserType {
PARSER_UNKNOWN = 0,
PARSER_NONE = 1,
PARSER_STANDARD = 2,
PARSER_ENGLISH = 3,
PARSER_CHINESE = 4,
PARSER_UNICODE = 5,
PARSER_ICU = 6,
PARSER_BASIC = 7,
PARSER_IK = 8
};
using CharFilterMap = std::map<std::string, std::string>;
struct InvertedIndexCtx {
std::string custom_analyzer;
InvertedIndexParserType parser_type;
std::string parser_mode;
std::string support_phrase;
CharFilterMap char_filter_map;
std::string lower_case;
std::string stop_words;
lucene::analysis::Analyzer* analyzer = nullptr;
};
using InvertedIndexCtxSPtr = std::shared_ptr<InvertedIndexCtx>;
const std::string INVERTED_INDEX_PARSER_TRUE = "true";
const std::string INVERTED_INDEX_PARSER_FALSE = "false";
const std::string INVERTED_INDEX_PARSER_MODE_KEY = "parser_mode";
const std::string INVERTED_INDEX_PARSER_FINE_GRANULARITY = "fine_grained";
const std::string INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained";
const std::string INVERTED_INDEX_PARSER_MAX_WORD = "ik_max_word";
const std::string INVERTED_INDEX_PARSER_SMART = "ik_smart";
const std::string INVERTED_INDEX_PARSER_KEY = "parser";
const std::string INVERTED_INDEX_PARSER_KEY_ALIAS = "built_in_analyzer";
const std::string INVERTED_INDEX_PARSER_UNKNOWN = "unknown";
const std::string INVERTED_INDEX_PARSER_NONE = "none";
const std::string INVERTED_INDEX_PARSER_STANDARD = "standard";
const std::string INVERTED_INDEX_PARSER_UNICODE = "unicode";
const std::string INVERTED_INDEX_PARSER_ENGLISH = "english";
const std::string INVERTED_INDEX_PARSER_CHINESE = "chinese";
const std::string INVERTED_INDEX_PARSER_ICU = "icu";
const std::string INVERTED_INDEX_PARSER_BASIC = "basic";
const std::string INVERTED_INDEX_PARSER_IK = "ik";
const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY = "support_phrase";
const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES = "true";
const std::string INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO = "false";
const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_TYPE = "char_filter_type";
const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_PATTERN = "char_filter_pattern";
const std::string INVERTED_INDEX_PARSER_CHAR_FILTER_REPLACEMENT = "char_filter_replacement";
const std::string INVERTED_INDEX_CHAR_FILTER_CHAR_REPLACE = "char_replace";
const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_KEY = "ignore_above";
const std::string INVERTED_INDEX_PARSER_IGNORE_ABOVE_VALUE = "256";
const std::string INVERTED_INDEX_PARSER_LOWERCASE_KEY = "lower_case";
const std::string INVERTED_INDEX_PARSER_STOPWORDS_KEY = "stopwords";
const std::string INVERTED_INDEX_PARSER_DICT_COMPRESSION_KEY = "dict_compression";
const std::string INVERTED_INDEX_CUSTOM_ANALYZER_KEY = "analyzer";
std::string inverted_index_parser_type_to_string(InvertedIndexParserType parser_type);
InvertedIndexParserType get_inverted_index_parser_type_from_string(const std::string& parser_str);
std::string get_parser_string_from_properties(const std::map<std::string, std::string>& properties);
std::string get_parser_mode_string_from_properties(
const std::map<std::string, std::string>& properties);
std::string get_parser_phrase_support_string_from_properties(
const std::map<std::string, std::string>& properties);
CharFilterMap get_parser_char_filter_map_from_properties(
const std::map<std::string, std::string>& properties);
// get parser ignore_above value from properties
std::string get_parser_ignore_above_value_from_properties(
const std::map<std::string, std::string>& properties);
template <bool ReturnTrue = false>
std::string get_parser_lowercase_from_properties(
const std::map<std::string, std::string>& properties) {
DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties", { return ""; })
if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
} else {
if constexpr (ReturnTrue) {
return INVERTED_INDEX_PARSER_TRUE;
} else {
return "";
}
}
}
std::string get_parser_stopwords_from_properties(
const std::map<std::string, std::string>& properties);
std::string get_parser_dict_compression_from_properties(
const std::map<std::string, std::string>& properties);
std::string get_custom_analyzer_string_from_properties(
const std::map<std::string, std::string>& properties);
} // namespace doris