blob: 6b4c44658085f3c1e36e65fd1d6a951f2f3c0f66 [file] [log] [blame]
Index: solr/CHANGES.txt
===================================================================
--- solr/CHANGES.txt (revision 987287)
+++ solr/CHANGES.txt (working copy)
@@ -62,6 +62,12 @@
* SOLR-1876: All Analyzers and TokenStreams are now final to enforce
the decorator pattern. (rmuir, uschindler)
+* LUCENE-2608: Added the ability to specify the accuracy on a per request basis.
+ Implementations of SolrSpellChecker must change over to the new SolrSpellChecker
+ abstract methods using the new SpellingOptions class. While this change is
+ backward compatible, implementations should migrate to the SpellingOptions class which
+ encapsulates the parameters that were passed in to the methods before the change. (gsingers)
+
Detailed Change List
----------------------
Index: solr/src/test/test-files/solr/conf/solrconfig.xml
===================================================================
--- solr/src/test/test-files/solr/conf/solrconfig.xml (revision 987287)
+++ solr/src/test/test-files/solr/conf/solrconfig.xml (working copy)
@@ -377,7 +377,11 @@
<str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str>
<str name="buildOnCommit">true</str>
</lst>
-
+ <lst name="spellchecker">
+ <str name="name">perDict</str>
+ <str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str>
+ <str name="field">lowerfilt</str>
+ </lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
Index: solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java
===================================================================
--- solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java (revision 987287)
+++ solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java (working copy)
@@ -80,15 +80,16 @@
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("fob");
- SpellingResult result = checker.getSuggestions(tokens, reader);
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true);
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
@@ -118,7 +119,9 @@
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("Solar");
- SpellingResult result = checker.getSuggestions(tokens, reader);
+
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
@@ -128,8 +131,8 @@
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
//test something not in the spell checker
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
suggestions = result.get(tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
@@ -160,7 +163,8 @@
IndexReader reader = core.getSearcher().get().getReader();
Collection<Token> tokens = queryConverter.convert("solar");
- SpellingResult result = checker.getSuggestions(tokens, reader);
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
Map<String, Integer> suggestions = result.get(tokens.iterator().next());
@@ -170,10 +174,10 @@
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
}
}
Index: solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java
===================================================================
--- solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (revision 987287)
+++ solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (working copy)
@@ -125,10 +125,11 @@
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("documemt");
- SpellingResult result = checker.getSuggestions(tokens, reader);
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
- Map<String, Integer> suggestions = result.get(tokens.iterator().next());
+ Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@@ -136,32 +137,33 @@
assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO);
//test something not in the spell checker
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
//test something that is spelled correctly
- tokens = queryConverter.convert("document");
- result = checker.getSuggestions(tokens, reader);
+ spellOpts.tokens = queryConverter.convert("document");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is null and it shouldn't be", suggestions == null);
//Has multiple possibilities, but the exact exists, so that should be returned
- tokens = queryConverter.convert("red");
- result = checker.getSuggestions(tokens, reader, 2);
- assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ spellOpts.tokens = queryConverter.convert("red");
+ spellOpts.count = 2;
+ result = checker.getSuggestions(spellOpts);
+ assertNotNull(result);
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
//Try out something which should have multiple suggestions
- tokens = queryConverter.convert("bug");
- result = checker.getSuggestions(tokens, reader, 2);
- assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
- assertTrue("suggestions is null and it shouldn't be", suggestions != null);
+ spellOpts.tokens = queryConverter.convert("bug");
+ result = checker.getSuggestions(spellOpts);
+ assertNotNull(result);
+ suggestions = result.get(spellOpts.tokens.iterator().next());
+ assertNotNull(suggestions);
assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2);
entry = suggestions.entrySet().iterator().next();
@@ -198,10 +200,11 @@
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("documemt");
- SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
- Map<String, Integer> suggestions = result.get(tokens.iterator().next());
+ Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("documemt is null and it shouldn't be", suggestions != null);
assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@@ -209,16 +212,16 @@
assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);
//test something not in the spell checker
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader, 1, false, true);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
- tokens = queryConverter.convert("document");
- result = checker.getSuggestions(tokens, reader, 1, false, true);
+ spellOpts.tokens = queryConverter.convert("document");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
@@ -304,10 +307,11 @@
IndexReader reader = searcher.getReader();
Collection<Token> tokens = queryConverter.convert("flesh");
- SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true);
+ SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null);
+ SpellingResult result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
//should be lowercased, b/c we are using a lowercasing analyzer
- Map<String, Integer> suggestions = result.get(tokens.iterator().next());
+ Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("flesh is null and it shouldn't be", suggestions != null);
assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
@@ -315,16 +319,16 @@
assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);
//test something not in the spell checker
- tokens = queryConverter.convert("super");
- result = checker.getSuggestions(tokens, reader, 1, false, true);
+ spellOpts.tokens = queryConverter.convert("super");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
- tokens = queryConverter.convert("Caroline");
- result = checker.getSuggestions(tokens, reader, 1, false, true);
+ spellOpts.tokens = queryConverter.convert("Caroline");
+ result = checker.getSuggestions(spellOpts);
assertTrue("result is null and it shouldn't be", result != null);
- suggestions = result.get(tokens.iterator().next());
+ suggestions = result.get(spellOpts.tokens.iterator().next());
assertTrue("suggestions is not null and it should be", suggestions == null);
} finally {
holder.decref();
Index: solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
===================================================================
--- solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java (revision 987287)
+++ solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
@@ -33,7 +34,6 @@
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.spelling.AbstractLuceneSpellChecker;
import org.apache.solr.spelling.IndexBasedSpellChecker;
-import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -133,9 +133,9 @@
assertTrue(cmdExec + " is not equal to " + "build",
cmdExec.equals("build") == true);
NamedList spellCheck = (NamedList) values.get("spellcheck");
- assertTrue("spellCheck is null and it shouldn't be", spellCheck != null);
+ assertNotNull(spellCheck);
NamedList suggestions = (NamedList) spellCheck.get("suggestions");
- assertTrue("suggestions is null and it shouldn't be", suggestions != null);
+ assertNotNull(suggestions);
NamedList document = (NamedList) suggestions.get("documemt");
assertEquals(1, document.get("numFound"));
assertEquals(0, document.get("startOffset"));
@@ -145,7 +145,51 @@
assertEquals("document", theSuggestion.iterator().next());
}
+
@Test
+ public void testPerDictionary() throws Exception {
+ SolrCore core = h.getCore();
+ SearchComponent speller = core.getSearchComponent("spellcheck");
+ assertTrue("speller is null and it shouldn't be", speller != null);
+
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(CommonParams.QT, "spellCheckCompRH");
+ params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
+ params.add(CommonParams.Q, "documemt");
+ params.add(SpellCheckComponent.COMPONENT_NAME, "true");
+ params.add(SpellingParams.SPELLCHECK_DICT, "perDict");
+
+ params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.foo", "bar");
+ params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.bar", "foo");
+
+ SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ NamedList values = rsp.getValues();
+
+ NamedList spellCheck = (NamedList) values.get("spellcheck");
+ NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+ assertNotNull("suggestions", suggestions);
+ NamedList suggestion;
+ Collection<String> theSuggestion;
+ suggestion = (NamedList) suggestions.get("foo");
+ assertEquals(1, suggestion.get("numFound"));
+ assertEquals(0, suggestion.get("startOffset"));
+ assertEquals(suggestion.get("endOffset"), 1);
+ theSuggestion = (Collection<String>) suggestion.get("suggestion");
+ assertEquals(1, theSuggestion.size());
+ assertEquals("bar", theSuggestion.iterator().next());
+
+ suggestion = (NamedList) suggestions.get("bar");
+ assertEquals(1, suggestion.get("numFound"));
+ assertEquals(2, suggestion.get("startOffset"));
+ assertEquals(3, suggestion.get("endOffset"));
+ theSuggestion = (Collection<String>) suggestion.get("suggestion");
+ assertEquals(1, theSuggestion.size());
+ assertEquals("foo", theSuggestion.iterator().next());
+ }
+
+ @Test
public void testCollate() throws Exception {
SolrCore core = h.getCore();
SearchComponent speller = core.getSearchComponent("spellcheck");
Index: solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
===================================================================
--- solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java (revision 0)
+++ solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java (revision 0)
@@ -0,0 +1,68 @@
+package org.apache.solr.handler.component;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.spelling.SolrSpellChecker;
+import org.apache.solr.spelling.SpellingOptions;
+import org.apache.solr.spelling.SpellingResult;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * A Dummy SpellChecker for testing purposes
+ *
+ **/
+public class DummyCustomParamSpellChecker extends SolrSpellChecker {
+
+ @Override
+ public void reload() throws IOException {
+
+ }
+
+ @Override
+ public void build(SolrCore core, SolrIndexSearcher searcher) {
+
+ }
+
+ @Override
+ public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults) throws IOException {
+ return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, 0, null));
+ }
+
+ @Override
+ public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
+
+ SpellingResult result = new SpellingResult();
+ //just spit back out the results
+ Iterator<String> iterator = options.customParams.getParameterNamesIterator();
+ int i = 0;
+ while (iterator.hasNext()){
+ String name = iterator.next();
+ String value = options.customParams.get(name);
+ result.add(new Token(name, i++, i++), Collections.singletonList(value));
+ }
+ return result;
+ }
+}
Property changes on: solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
===================================================================
--- solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (revision 987287)
+++ solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (working copy)
@@ -23,11 +23,13 @@
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.search.spell.LevensteinDistance;
+import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -144,8 +146,12 @@
NamedList response = new SimpleOrderedMap();
IndexReader reader = rb.req.getSearcher().getReader();
boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
- SpellingResult spellingResult = spellChecker.getSuggestions(tokens,
- reader, count, onlyMorePopular, extendedResults);
+ float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
+ SolrParams customParams = getCustomParams(getDictionaryName(params), params);
+ SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults,
+ accuracy, customParams);
+
+ SpellingResult spellingResult = spellChecker.getSuggestions(options);
if (spellingResult != null) {
response.add("suggestions", toNamedList(spellingResult, q,
extendedResults, collate));
@@ -159,6 +165,24 @@
}
}
+ /**
+ * For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
+ * XXXX=YYYY as a param to the custom param list
+ * @param params The original SolrParams
+ * @return The new Params
+ */
+ protected SolrParams getCustomParams(String dictionary, SolrParams params) {
+ ModifiableSolrParams result = new ModifiableSolrParams();
+ Iterator<String> iter = params.getParameterNamesIterator();
+ String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + ".";
+ while (iter.hasNext()){
+ String nxt = iter.next();
+ if (nxt.startsWith(prefix)){
+ result.add(nxt.substring(prefix.length()), params.getParams(nxt));
+ }
+ }
+ return result;
+ }
@Override
@@ -341,13 +365,17 @@
}
protected SolrSpellChecker getSpellChecker(SolrParams params) {
+ return spellCheckers.get(getDictionaryName(params));
+ }
+
+ private String getDictionaryName(SolrParams params) {
String dictName = params.get(SPELLCHECK_DICT);
if (dictName == null) {
dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
}
- return spellCheckers.get(dictName);
+ return dictName;
}
-
+
/**
* @return the spellchecker registered to a given name
*/
Index: solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (revision 987287)
+++ solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (working copy)
@@ -149,30 +149,47 @@
}
return name;
}
-
- @SuppressWarnings("unchecked")
- public SpellingResult getSuggestions(Collection<Token> tokens,
- IndexReader reader, int count, boolean onlyMorePopular,
- boolean extendedResults)
- throws IOException {
- SpellingResult result = new SpellingResult(tokens);
- reader = determineReader(reader);
+
+ /**
+ * Kept around for back compatibility purposes.
+ *
+ * @param tokens The Tokens to be spell checked.
+ * @param reader The (optional) IndexReader. If there is not IndexReader, than extendedResults are not possible
+ * @param count The maximum number of suggestions to return
+ * @param onlyMorePopular TODO
+ * @param extendedResults TODO
+ * @return
+ * @throws IOException
+ */
+ @Override
+ public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults) throws IOException {
+ return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, spellChecker.getAccuracy(), null));
+ }
+
+ @Override
+ public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
+ SpellingResult result = new SpellingResult(options.tokens);
+ IndexReader reader = determineReader(options.reader);
Term term = field != null ? new Term(field, "") : null;
- for (Token token : tokens) {
+ float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy;
+
+ int count = (int) Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
+ for (Token token : options.tokens) {
String tokenText = new String(token.buffer(), 0, token.length());
- String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT),
+ String[] suggestions = spellChecker.suggestSimilar(tokenText,
+ count,
field != null ? reader : null, //workaround LUCENE-1295
field,
- onlyMorePopular);
+ options.onlyMorePopular, theAccuracy);
if (suggestions.length == 1 && suggestions[0].equals(tokenText)) {
//These are spelled the same, continue on
continue;
}
- if (extendedResults == true && reader != null && field != null) {
+ if (options.extendedResults == true && reader != null && field != null) {
term = term.createTerm(tokenText);
result.add(token, reader.docFreq(term));
- int countLimit = Math.min(count, suggestions.length);
+ int countLimit = Math.min(options.count, suggestions.length);
for (int i = 0; i < countLimit; i++) {
term = term.createTerm(suggestions[i]);
result.add(token, suggestions[i], reader.docFreq(term));
@@ -180,8 +197,8 @@
} else {
if (suggestions.length > 0) {
List<String> suggList = Arrays.asList(suggestions);
- if (suggestions.length > count) {
- suggList = suggList.subList(0, count);
+ if (suggestions.length > options.count) {
+ suggList = suggList.subList(0, options.count);
}
result.add(token, suggList);
}
Index: solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java (revision 987287)
+++ solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java (working copy)
@@ -74,6 +74,8 @@
* Assumes count = 1, onlyMorePopular = false, extendedResults = false
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
+ *
+ * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)}
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader) throws IOException {
return getSuggestions(tokens, reader, 1, false, false);
@@ -83,6 +85,8 @@
* Assumes onlyMorePopular = false, extendedResults = false
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
+ *
+ * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)}
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count) throws IOException {
return getSuggestions(tokens, reader, count, false, false);
@@ -93,6 +97,8 @@
* Assumes count = 1.
*
* @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean)
+ *
+ * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)}
*/
public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, boolean onlyMorePopular, boolean extendedResults) throws IOException {
return getSuggestions(tokens, reader, 1, onlyMorePopular, extendedResults);
@@ -108,8 +114,27 @@
* @param onlyMorePopular TODO
* @param extendedResults TODO
* @throws IOException
+ *
+ * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)}
*/
public abstract SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count,
boolean onlyMorePopular, boolean extendedResults)
throws IOException;
+
+ /**
+ * Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
+ * The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first.
+ * <p/>
+ * Note: This method is abstract in Solr 4.0 and beyond and is the recommended way of implementing the spell checker. For now,
+ * it calls {@link #getSuggestions(java.util.Collection, org.apache.lucene.index.IndexReader, boolean, boolean)}.
+ *
+ *
+ * @param options The {@link SpellingOptions} to use
+ * @return The {@link SpellingResult} suggestions
+ * @throws IOException if there is an error producing suggestions
+ */
+ public SpellingResult getSuggestions(SpellingOptions options) throws IOException{
+ return getSuggestions(options.tokens, options.reader, options.count, options.onlyMorePopular, options.extendedResults);
+ }
+
}
Index: solr/src/java/org/apache/solr/spelling/SpellingOptions.java
===================================================================
--- solr/src/java/org/apache/solr/spelling/SpellingOptions.java (revision 0)
+++ solr/src/java/org/apache/solr/spelling/SpellingOptions.java (revision 0)
@@ -0,0 +1,94 @@
+package org.apache.solr.spelling;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
+import org.apache.solr.common.params.SolrParams;
+
+import java.util.Collection;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ *
+ *
+ **/
+public class SpellingOptions {
+
+ /**
+ * The tokens to spell check
+ */
+ public Collection<Token> tokens;
+ /**
+ * An optional {@link org.apache.lucene.index.IndexReader}
+ */
+ public IndexReader reader;
+ /**
+ * The number of suggestions to return, if there are any. Defaults to 1.
+ */
+ public int count = 1;
+ /**
+ * Return only those results that are more popular, as defined by the implementation
+ */
+ public boolean onlyMorePopular;
+ /**
+ * Provide additional, per implementation, information about the results
+ */
+ public boolean extendedResults;
+
+ /**
+ * Optionally restrict the results to have a minimum accuracy level. Per Implementation.
+ * By default set to Float.MIN_VALUE.
+ */
+ public float accuracy = Float.MIN_VALUE;
+
+ /**
+ * Any other custom params can be passed through. May be null and is null by default.
+ */
+ public SolrParams customParams;
+
+ public SpellingOptions() {
+ }
+
+ //A couple of convenience ones
+ public SpellingOptions(Collection<Token> tokens, int count) {
+ this.tokens = tokens;
+ this.count = count;
+ }
+
+ public SpellingOptions(Collection<Token> tokens, IndexReader reader) {
+ this.tokens = tokens;
+ this.reader = reader;
+ }
+
+ public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count) {
+ this.tokens = tokens;
+ this.reader = reader;
+ this.count = count;
+ }
+
+
+ public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults, float accuracy, SolrParams customParams) {
+ this.tokens = tokens;
+ this.reader = reader;
+ this.count = count;
+ this.onlyMorePopular = onlyMorePopular;
+ this.extendedResults = extendedResults;
+ this.accuracy = accuracy;
+ this.customParams = customParams;
+ }
+}
Property changes on: solr/src/java/org/apache/solr/spelling/SpellingOptions.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: solr/src/common/org/apache/solr/common/params/SpellingParams.java
===================================================================
--- solr/src/common/org/apache/solr/common/params/SpellingParams.java (revision 987287)
+++ solr/src/common/org/apache/solr/common/params/SpellingParams.java (working copy)
@@ -81,4 +81,9 @@
* Take the top suggestion for each token and create a new query from it
*/
public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate";
+
+ /**
+ * Certain spelling implementations may allow for an accuracy setting.
+ */
+ public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy";
}
Index: lucene/contrib/CHANGES.txt
===================================================================
--- lucene/contrib/CHANGES.txt (revision 987287)
+++ lucene/contrib/CHANGES.txt (working copy)
@@ -11,6 +11,9 @@
* LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along
with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll)
+ * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class
+ method is also still available. (Grant Ingersoll)
+
======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy
Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java
===================================================================
--- lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (revision 987287)
+++ lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (working copy)
@@ -104,11 +104,21 @@
spellChecker.setAccuracy(0.8f);
checkCommonSuggestions(r);
checkJaroWinklerSuggestions();
+ // the accuracy is set to 0.8 by default, but the best result has a score of 0.925
+ String[] similar = spellChecker.suggestSimilar("fvie", 2, 0.93f);
+ assertTrue(similar.length == 0);
+ similar = spellChecker.suggestSimilar("fvie", 2, 0.92f);
+ assertTrue(similar.length == 1);
+
+ similar = spellChecker.suggestSimilar("fiv", 2);
+ assertTrue(similar.length > 0);
+ assertEquals(similar[0], "five");
spellChecker.setStringDistance(new NGramDistance(2));
spellChecker.setAccuracy(0.5f);
checkCommonSuggestions(r);
checkNGramSuggestions();
+
r.close();
}
@@ -127,8 +137,6 @@
if (!compareSP.isClosed())
compareSP.close();
compIdx.close();
-
-
}
private void checkCommonSuggestions(IndexReader r) throws IOException {
Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
===================================================================
--- lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (revision 987287)
+++ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (working copy)
@@ -63,10 +63,15 @@
public class SpellChecker implements java.io.Closeable {
/**
+ * The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} .
+ */
+ public static final float DEFAULT_ACCURACY = 0.5f;
+
+ /**
* Field name for each word in the ngram index.
*/
public static final String F_WORD = "word";
-
+
private static final Term F_WORD_TERM = new Term(F_WORD);
/**
@@ -75,35 +80,34 @@
// don't modify the directory directly - see #swapSearcher()
// TODO: why is this package private?
Directory spellIndex;
-
/**
* Boost value for start and end grams
*/
private float bStart = 2.0f;
+
private float bEnd = 1.0f;
+ // don't use this searcher directly - see #swapSearcher()
- // don't use this searcher directly - see #swapSearcher()
private IndexSearcher searcher;
-
/*
- * this locks all modifications to the current searcher.
+ * this locks all modifications to the current searcher.
*/
+
private final Object searcherLock = new Object();
-
/*
- * this lock synchronizes all possible modifications to the
+ * this lock synchronizes all possible modifications to the
* current index directory. It should not be possible to try modifying
* the same index concurrently. Note: Do not acquire the searcher lock
- * before acquiring this lock!
+ * before acquiring this lock!
*/
private final Object modifyCurrentIndexLock = new Object();
+
private volatile boolean closed = false;
+ // minimum score for hits generated by the spell checker query
- // minimum score for hits generated by the spell checker query
- private float minScore = 0.5f;
-
+ private float accuracy = DEFAULT_ACCURACY;
+
private StringDistance sd;
-
private Comparator<SuggestWord> comparator;
/**
@@ -202,13 +206,23 @@
}
/**
- * Sets the accuracy 0 &lt; minScore &lt; 1; default 0.5
+ * Sets the accuracy 0 &lt; minScore &lt; 1; default {@link #DEFAULT_ACCURACY}
+ * @param acc The new accuracy
*/
- public void setAccuracy(float minScore) {
- this.minScore = minScore;
+ public void setAccuracy(float acc) {
+ this.accuracy = acc;
}
/**
+ * The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to
+ * decide whether a suggestion is included or not.
+ * @return The current accuracy setting
+ */
+ public float getAccuracy() {
+ return accuracy;
+ }
+
+ /**
* Suggest similar words.
*
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
@@ -224,12 +238,38 @@
* @throws IOException if the underlying index throws an {@link IOException}
* @throws AlreadyClosedException if the Spellchecker is already closed
* @return String[]
+ *
+ * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/
public String[] suggestSimilar(String word, int numSug) throws IOException {
return this.suggestSimilar(word, numSug, null, null, false);
}
/**
+ * Suggest similar words.
+ *
+ * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+ * is not the same as the edit distance strategy used to calculate the best
+ * matching spell-checked word from the hits that Lucene found, one usually has
+ * to retrieve a couple of numSug's in order to get the true best match.
+ *
+ * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
+ * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
+ *
+ * @param word the word you want a spell check done on
+ * @param numSug the number of suggested words
+ * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
+ * @throws IOException if the underlying index throws an {@link IOException}
+ * @throws AlreadyClosedException if the Spellchecker is already closed
+ * @return String[]
+ *
+ * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
+ */
+ public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException {
+ return this.suggestSimilar(word, numSug, null, null, false, accuracy);
+ }
+
+ /**
* Suggest similar words (optionally restricted to a field of an index).
*
* <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
@@ -240,6 +280,8 @@
* <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
* Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
*
+ * <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy.
+ *
* @param word the word you want a spell check done on
* @param numSug the number of suggested words
* @param ir the indexReader of the user index (can be null see field param)
@@ -252,74 +294,107 @@
* @return String[] the sorted list of the suggest words with these 2 criteria:
* first criteria: the edit distance, second criteria (only if restricted mode): the popularity
* of the suggest words in the field of the user index
+ *
+ * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)
*/
public String[] suggestSimilar(String word, int numSug, IndexReader ir,
String field, boolean morePopular) throws IOException {
+ return suggestSimilar(word, numSug, ir, field, morePopular, accuracy);
+ }
+
+
+ /**
+ * Suggest similar words (optionally restricted to a field of an index).
+ *
+ * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms
+ * is not the same as the edit distance strategy used to calculate the best
+ * matching spell-checked word from the hits that Lucene found, one usually has
+ * to retrieve a couple of numSug's in order to get the true best match.
+ *
+ * <p>I.e. if numSug == 1, don't count on that suggestion being the best one.
+ * Thus, you should set this value to <b>at least</b> 5 for a good suggestion.
+ *
+ * @param word the word you want a spell check done on
+ * @param numSug the number of suggested words
+ * @param ir the indexReader of the user index (can be null see field param)
+ * @param field the field of the user index: if field is not null, the suggested
+ * words are restricted to the words present in this field.
+ * @param morePopular return only the suggest words that are as frequent or more frequent than the searched word
+ * (only if restricted mode = (indexReader!=null and field!=null)
+ * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results
+ * @throws IOException if the underlying index throws an {@link IOException}
+ * @throws AlreadyClosedException if the Spellchecker is already closed
+ * @return String[] the sorted list of the suggest words with these 2 criteria:
+ * first criteria: the edit distance, second criteria (only if restricted mode): the popularity
+ * of the suggest words in the field of the user index
+ */
+ public String[] suggestSimilar(String word, int numSug, IndexReader ir,
+ String field, boolean morePopular, float accuracy) throws IOException {
// obtainSearcher calls ensureOpen
final IndexSearcher indexSearcher = obtainSearcher();
try{
- float min = this.minScore;
+
final int lengthWord = word.length();
-
+
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0;
// if the word exists in the real index and we don't care for word frequency, return the word itself
if (!morePopular && freq > 0) {
return new String[] { word };
}
-
+
BooleanQuery query = new BooleanQuery();
String[] grams;
String key;
-
+
for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) {
-
+
key = "gram" + ng; // form key
-
+
grams = formGrams(word, ng); // form word into ngrams (allow dups too)
-
+
if (grams.length == 0) {
continue; // hmm
}
-
+
if (bStart > 0) { // should we boost prefixes?
add(query, "start" + ng, grams[0], bStart); // matches start of word
-
+
}
if (bEnd > 0) { // should we boost suffixes
add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word
-
+
}
for (int i = 0; i < grams.length; i++) {
add(query, key, grams[i]);
}
}
-
+
int maxHits = 10 * numSug;
-
+
// System.out.println("Q: " + query);
ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs;
// System.out.println("HITS: " + hits.length());
SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator);
-
+
// go thru more than 'maxr' matches in case the distance filter triggers
int stop = Math.min(hits.length, maxHits);
SuggestWord sugWord = new SuggestWord();
for (int i = 0; i < stop; i++) {
-
+
sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word
-
+
// don't suggest a word for itself, that would be silly
if (sugWord.string.equals(word)) {
continue;
}
-
+
// edit distance
sugWord.score = sd.getDistance(word,sugWord.string);
- if (sugWord.score < min) {
+ if (sugWord.score < accuracy) {
continue;
}
-
+
if (ir != null && field != null) { // use the user index
sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index
// don't suggest a word that is not present in the field
@@ -330,23 +405,22 @@
sugQueue.insertWithOverflow(sugWord);
if (sugQueue.size() == numSug) {
// if queue full, maintain the minScore score
- min = sugQueue.top().score;
+ accuracy = sugQueue.top().score;
}
sugWord = new SuggestWord();
}
-
+
// convert to array string
String[] list = new String[sugQueue.size()];
for (int i = sugQueue.size() - 1; i >= 0; i--) {
list[i] = sugQueue.pop().string;
}
-
+
return list;
} finally {
releaseSearcher(indexSearcher);
}
}
-
/**
* Add a clause to a boolean query.
*/