blob: 967762a2e99f9031f51b18e6bedfa71a2cb56742 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.search;
import java.util.HashMap;
import java.util.Map;
/*
Helper class to assist with mlt query formation for elastic and lucene
*/
public class MoreLikeThisHelperUtil {
/*
A list of fields to fetch and analyze the text from.
Default analyzes all the indexed fields.
*/
public static final String MLT_FILED = "mlt.fl";
/*
The minimum document frequency for a term below which the terms will be ignored from the input document.
Defaults to 5
*/
public static final String MLT_MIN_DOC_FREQ = "mlt.mindf";
/*
The maximum document frequency above which the terms will be ignored from the input document.
This could be useful in order to ignore highly frequent words such as stop words. Defaults to INTEGER.MAX
*/
public static final String MLT_MAX_DOC_FREQ = "mlt.maxdf";
/*
The minimum term frequency (Number of times the term occurs in the input doc)
below which the terms will be ignored from the input document.
Defaults to 2
*/
public static final String MLT_MIN_TERM_FREQ = "mlt.mintf";
/*
Bool value if boost should be supported or not. Only valid for lucene
Not available in elastic.
*/
public static final String MLT_BOOST = "mlt.boost";
/*
Sets the boost value of the whole query. Defaults to 1.0.
*/
public static final String MLT_BOOST_FACTOR = "mlt.qf";
/*
Only For Lucene
*/
public static final String MLT_MAX_DOC_FREQ_PCT = "mlt.maxdfp";
/*
Only For Lucene
*/
public static final String MLT_MAX_NUM_TOKENS_PARSED = "mlt.maxntp";
/*
The maximum number of query terms that will be selected.
Increasing this value gives greater accuracy at the expense of query execution speed. Defaults to 25.
*/
public static final String MLT_MAX_QUERY_TERMS = "mlt.maxqt";
/*
The maximum word length above which the terms will be ignored. The old name max_word_len is deprecated.
Defaults to unbounded
*/
public static final String MLT_MAX_WORD_LENGTH = "mlt.maxwl";
/*
The minimum word length below which the terms will be ignored.
Defaults to 0
*/
public static final String MLT_MIN_WORD_LENGTH = "mlt.minwl";
/*
An array of stop words.
Any word in this set is considered "uninteresting" and ignored.
Only applicable for ELASTIC
*/
public static final String MLT_STOP_WORDS = "mlt.stopwords";
/*
This should have either the id to the doc whose similar docs need to be searched or the complete body of the doc.
Defautls to ID (via the rep:similar query).
*/
public static final String MLT_STREAM_BODY = "stream.body";
/*
After the disjunctive query has been formed,
this parameter controls the number of terms that must match.
(Defaults to "30%").
Only applicable for ELASTIC
*/
public static final String MLT_MIN_SHOULD_MATCH = "mlt.minshouldmatch";
/*
Returns param map for a query string of type mlt.fl=:path&mlt.mindf=0&stream.body=/test/a
*/
public static Map<String, String> getParamMapFromMltQuery(String mltQueryString) {
Map<String, String> paramMap = new HashMap();
try {
for (String param : mltQueryString.split("&")) {
String[] keyValuePair = param.split("=");
if (keyValuePair.length != 2 || keyValuePair[0] == null || keyValuePair[1] == null) {
throw new RuntimeException("Unparsable native MLT query: " + mltQueryString);
} else {
paramMap.put(keyValuePair[0], keyValuePair[1]);
}
}
} catch (Exception e) {
throw new RuntimeException("Error while parsing native MLT query: " + mltQueryString);
}
if (paramMap.size() == 0) {
throw new RuntimeException("No params found while parsing the MLT query : " + mltQueryString);
}
return paramMap;
}
}