Merged /lucene/dev/trunk:r1523396-1523455
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5207@1523456 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/dev-tools/scripts/diffSources.py b/dev-tools/scripts/diffSources.py
index a3c6acc..d8f3b59 100644
--- a/dev-tools/scripts/diffSources.py
+++ b/dev-tools/scripts/diffSources.py
@@ -50,7 +50,7 @@
elif l.endswith('\n'):
l = l[:-1]
if l.startswith('diff ') or l.startswith('Binary files '):
- keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1))
+ keep = not l.endswith('timehints.txt') and l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml') or l.lower().endswith('.html') or l.lower().endswith('.template') or l.lower().endswith('.py') or l.lower().endswith('.g') or l.lower().endswith('.properties')) and l.find('/.svn/') == -1))
if keep:
print
print
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 20a0bd2..f0779b2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,22 @@
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.5
+Detailed Change List
+----------------------
+
+New Features
+----------------------
+
+* SOLR-5167: Add support for AnalyzingInfixSuggester (AnalyzingInfixLookupFactory).
+ (Areek Zillur, Varun Thacker via Robert Muir)
+
+Other Changes
+----------------------
+
+* SOLR-5237: Add indexHeapUsageBytes to LukeRequestHandler, indicating how much
+ heap memory is being used by the underlying Lucene index structures.
+ (Areek Zillur via Robert Muir)
+
================== 4.5.0 ==================
Versions of Major Components
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
index 6bc4e8c..d521cb2 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
@@ -555,6 +555,7 @@
indexInfo.add("numDocs", reader.numDocs());
indexInfo.add("maxDoc", reader.maxDoc());
indexInfo.add("deletedDocs", reader.maxDoc() - reader.numDocs());
+ indexInfo.add("indexHeapUsageBytes", getIndexHeapUsed(reader));
indexInfo.add("version", reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
indexInfo.add("segmentCount", reader.leaves().size());
@@ -569,6 +570,21 @@
return indexInfo;
}
+ /** Returns the sum of RAM bytes used by each segment */
+ private static long getIndexHeapUsed(DirectoryReader reader) {
+ long indexHeapRamBytesUsed = 0;
+ for(AtomicReaderContext atomicReaderContext : reader.leaves()) {
+ AtomicReader atomicReader = atomicReaderContext.reader();
+ if (atomicReader instanceof SegmentReader) {
+ indexHeapRamBytesUsed += ((SegmentReader) atomicReader).ramBytesUsed();
+ } else {
+ // Not supported for any reader that is not a SegmentReader
+ return -1;
+ }
+ }
+ return indexHeapRamBytesUsed;
+ }
+
// Get terribly detailed information about a particular field. This is a very expensive call, use it with caution
// especially on large indexes!
@SuppressWarnings("unchecked")
diff --git a/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
new file mode 100644
index 0000000..e32859e
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/spelling/suggest/fst/AnalyzingInfixLookupFactory.java
@@ -0,0 +1,97 @@
+package org.apache.solr.spelling.suggest.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
+import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.spelling.suggest.LookupFactory;
+
+/**
+ * Factory for {@link AnalyzingInfixSuggester}
+ * @lucene.experimental
+ */
+public class AnalyzingInfixLookupFactory extends LookupFactory {
+ /**
+ * The analyzer used at "query-time" and "build-time" to analyze suggestions.
+ */
+ public static final String QUERY_ANALYZER = "suggestAnalyzerFieldType";
+
+ /**
+ * The path where the underlying index is stored
+ * if no index is found, it will be generated by
+ * the AnalyzingInfixSuggester
+ */
+ public static final String INDEX_PATH = "indexPath";
+
+ /**
+ * Minimum number of leading characters before PrefixQuery is used (default 4).
+ * Prefixes shorter than this are indexed as character ngrams
+ * (increasing index size but making lookups faster)
+ */
+ private static final String MIN_PREFIX_CHARS = "minPrefixChars";
+
+ private static final String DEFAULT_INDEX_PATH = "analyzingInfixSuggesterIndexDir";
+
+ /**
+ * File name for the automaton.
+ */
+ private static final String FILENAME = "iwfsta.bin";
+
+
+ @Override
+ public Lookup create(NamedList params, SolrCore core) {
+ // mandatory parameter
+ Object fieldTypeName = params.get(QUERY_ANALYZER);
+ if (fieldTypeName == null) {
+ throw new IllegalArgumentException("Error in configuration: " + QUERY_ANALYZER + " parameter is mandatory");
+ }
+ FieldType ft = core.getLatestSchema().getFieldTypeByName(fieldTypeName.toString());
+ Analyzer indexAnalyzer = ft.getAnalyzer();
+ Analyzer queryAnalyzer = ft.getQueryAnalyzer();
+
+ // optional parameters
+
+ String indexPath = params.get(INDEX_PATH) != null
+ ? params.get(INDEX_PATH).toString()
+ : DEFAULT_INDEX_PATH;
+
+ int minPrefixChars = params.get(MIN_PREFIX_CHARS) != null
+ ? Integer.parseInt(params.get(MIN_PREFIX_CHARS).toString())
+ : AnalyzingInfixSuggester.DEFAULT_MIN_PREFIX_CHARS;
+
+ try {
+ return new AnalyzingInfixSuggester(core.getSolrConfig().luceneMatchVersion,
+ new File(indexPath), indexAnalyzer, queryAnalyzer, minPrefixChars);
+ } catch (IOException e) {
+ throw new RuntimeException();
+ }
+ }
+
+ @Override
+ public String storeFileName() {
+ return FILENAME;
+ }
+}
diff --git a/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
new file mode 100644
index 0000000..6d276c3
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/analyzingInfixSuggest.txt
@@ -0,0 +1,5 @@
+# simple AnalyzingInfix suggest phrase dictionary for testing
+Japanese Autocomplete and Japanese Highlighter broken
+Add Japanese Kanji number normalization to Kuromoji
+Add decompose compound Japanese Katakana token capability to Kuromoji
+This is just another entry!
\ No newline at end of file
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
index 96b4f7b..b4f560e 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-phrasesuggest.xml
@@ -65,6 +65,24 @@
<str name="queryAnalyzerFieldType">phrase_suggest</str>
</searchComponent>
+ <!-- AnalyzingInfixLookup suggest component (default)-->
+ <searchComponent class="solr.SpellCheckComponent" name="infix_suggest_analyzing">
+ <lst name="spellchecker">
+ <str name="name">infix_suggest_analyzing</str>
+ <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
+ <str name="lookupImpl">org.apache.solr.spelling.suggest.fst.AnalyzingInfixLookupFactory</str>
+ <str name="buildOnCommit">false</str>
+
+ <!-- Suggester properties -->
+ <str name="suggestAnalyzerFieldType">text</str>
+
+ <str name="sourceLocation">analyzingInfixSuggest.txt</str>
+ </lst>
+
+ <!-- specify a fieldtype using keywordtokenizer + lowercase + cleanup -->
+ <str name="queryAnalyzerFieldType">phrase_suggest</str>
+ </searchComponent>
+
<!-- FuzzyLookup suggest component (default)-->
<searchComponent class="solr.SpellCheckComponent" name="fuzzy_suggest_analyzing">
<lst name="spellchecker">
@@ -183,7 +201,20 @@
</arr>
</requestHandler>
- <!-- Fuzzy analyzing handler with 1 max edit -->
+ <!-- Infix analyzing handler (default) -->
+ <requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/infix_suggest_analyzing">
+ <lst name="defaults">
+ <str name="spellcheck">true</str>
+ <str name="spellcheck.dictionary">infix_suggest_analyzing</str>
+ <str name="spellcheck.collate">false</str>
+ <!-- NOTE: if this is false, results are alpha-ordered, not by weight! -->
+ <str name="spellcheck.onlyMorePopular">true</str>
+ </lst>
+ <arr name="components">
+ <str>infix_suggest_analyzing</str>
+ </arr>
+ </requestHandler>
+
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/fuzzy_suggest_analyzing">
<lst name="defaults">
<str name="spellcheck">true</str>
diff --git a/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
new file mode 100644
index 0000000..0ee3e58
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/spelling/suggest/TestAnalyzeInfixSuggestions.java
@@ -0,0 +1,66 @@
+package org.apache.solr.spelling.suggest;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.SpellingParams;
+import org.junit.BeforeClass;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestAnalyzeInfixSuggestions extends SolrTestCaseJ4 {
+ static final String URI_DEFAULT = "/infix_suggest_analyzing";
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-phrasesuggest.xml","schema-phrasesuggest.xml");
+ assertQ(req("qt", URI_DEFAULT, "q", "", SpellingParams.SPELLCHECK_BUILD, "true"));
+ }
+
+ public void testSingle() throws Exception {
+
+ assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "1"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='1']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']"
+ );
+
+ assertQ(req("qt", URI_DEFAULT, "q", "high", SpellingParams.SPELLCHECK_COUNT, "1"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/int[@name='numFound'][.='1']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='high']/arr[@name='suggestion']/str[1][.='Japanese Autocomplete and Japanese <b>High</b>lighter broken']"
+ );
+ }
+
+ public void testMultiple() throws Exception {
+
+ assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "2"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='2']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']"
+ );
+ assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "3"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
+ );
+ assertQ(req("qt", URI_DEFAULT, "q", "japan", SpellingParams.SPELLCHECK_COUNT, "4"),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/int[@name='numFound'][.='3']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[1][.='<b>Japan</b>ese Autocomplete and <b>Japan</b>ese Highlighter broken']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[2][.='Add <b>Japan</b>ese Kanji number normalization to Kuromoji']",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='japan']/arr[@name='suggestion']/str[3][.='Add decompose compound <b>Japan</b>ese Katakana token capability to Kuromoji']"
+ );
+ }
+}
\ No newline at end of file