| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.update.processor; |
| |
| public interface LangIdParams { |
| |
| String LANGUAGE_ID = "langid"; |
| String DOCID_PARAM = LANGUAGE_ID + ".idField"; |
| |
| String FIELDS_PARAM = LANGUAGE_ID + ".fl"; // Field list to detect from |
| String LANG_FIELD = LANGUAGE_ID + ".langField"; // Main language detected |
| String LANGS_FIELD = LANGUAGE_ID + ".langsField"; // All languages detected (multiValued) |
| String FALLBACK = LANGUAGE_ID + ".fallback"; // Fallback lang code |
| String FALLBACK_FIELDS = LANGUAGE_ID + ".fallbackFields"; // Comma-sep list of fallback fields |
| String OVERWRITE = LANGUAGE_ID + ".overwrite"; // Overwrite if existing language value in LANG_FIELD |
| String THRESHOLD = LANGUAGE_ID + ".threshold"; // Detection threshold |
| String ENFORCE_SCHEMA = LANGUAGE_ID + ".enforceSchema"; // Enforces that output fields exist in schema |
| String LANG_WHITELIST = LANGUAGE_ID + ".whitelist"; // Allowed languages |
| String LCMAP = LANGUAGE_ID + ".lcmap"; // Maps detected langcode to other value |
| String MAP_ENABLE = LANGUAGE_ID + ".map"; // Turns on or off the field mapping |
| String MAP_FL = LANGUAGE_ID + ".map.fl"; // Field list for mapping |
| String MAP_OVERWRITE = LANGUAGE_ID + ".map.overwrite"; // Whether to overwrite existing fields |
| String MAP_KEEP_ORIG = LANGUAGE_ID + ".map.keepOrig"; // Keep original field after mapping |
| String MAP_INDIVIDUAL = LANGUAGE_ID + ".map.individual"; // Detect language per individual field |
| String MAP_INDIVIDUAL_FL = LANGUAGE_ID + ".map.individual.fl";// Field list of fields to redetect language for |
| String MAP_LCMAP = LANGUAGE_ID + ".map.lcmap"; // Enables mapping multiple langs to same output field |
| String MAP_PATTERN = LANGUAGE_ID + ".map.pattern"; // RegEx pattern to match field name |
| String MAP_REPLACE = LANGUAGE_ID + ".map.replace"; // Replace pattern |
| String MAX_FIELD_VALUE_CHARS = LANGUAGE_ID + ".maxFieldValueChars"; // Maximum number of characters to use per field for language detection |
| String MAX_TOTAL_CHARS = LANGUAGE_ID + ".maxTotalChars"; // Maximum number of characters to use per all concatenated fields for language detection |
| |
| String DOCID_FIELD_DEFAULT = "id"; |
| String DOCID_LANGFIELD_DEFAULT = null; |
| String DOCID_LANGSFIELD_DEFAULT = null; |
| String MAP_PATTERN_DEFAULT = "(.*)"; |
| String MAP_REPLACE_DEFAULT = "$1_{lang}"; |
| int MAX_FIELD_VALUE_CHARS_DEFAULT = 10000; |
| int MAX_TOTAL_CHARS_DEFAULT = 20000; |
| |
| // TODO: This default threshold accepts even "uncertain" detections. |
| // Increase &langid.threshold above 0.5 to return only certain detections |
| Double DOCID_THRESHOLD_DEFAULT = 0.5; |
| } |