| Index: solr/CHANGES.txt |
| =================================================================== |
| --- solr/CHANGES.txt (revision 987287) |
| +++ solr/CHANGES.txt (working copy) |
| @@ -62,6 +62,12 @@ |
| * SOLR-1876: All Analyzers and TokenStreams are now final to enforce |
| the decorator pattern. (rmuir, uschindler) |
| |
| +* LUCENE-2608: Added the ability to specify the accuracy on a per request basis. |
| + Implementations of SolrSpellChecker must change over to the new SolrSpellChecker |
| + abstract methods using the new SpellingOptions class. While this change is |
| + backward compatible, implementations should migrate to the SpellingOptions class which |
| + encapsulates the parameters that were passed in to the methods before the change. (gsingers) |
| + |
| Detailed Change List |
| ---------------------- |
| |
| Index: solr/src/test/test-files/solr/conf/solrconfig.xml |
| =================================================================== |
| --- solr/src/test/test-files/solr/conf/solrconfig.xml (revision 987287) |
| +++ solr/src/test/test-files/solr/conf/solrconfig.xml (working copy) |
| @@ -377,7 +377,11 @@ |
| <str name="comparatorClass">org.apache.solr.spelling.SampleComparator</str> |
| <str name="buildOnCommit">true</str> |
| </lst> |
| - |
| + <lst name="spellchecker"> |
| + <str name="name">perDict</str> |
| + <str name="classname">org.apache.solr.handler.component.DummyCustomParamSpellChecker</str> |
| + <str name="field">lowerfilt</str> |
| + </lst> |
| </searchComponent> |
| |
| <searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/> |
| Index: solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java |
| =================================================================== |
| --- solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java (revision 987287) |
| +++ solr/src/test/org/apache/solr/spelling/FileBasedSpellCheckerTest.java (working copy) |
| @@ -80,15 +80,16 @@ |
| |
| IndexReader reader = core.getSearcher().get().getReader(); |
| Collection<Token> tokens = queryConverter.convert("fob"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader); |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); |
| assertTrue(entry.getKey() + " is not equal to " + "foo", entry.getKey().equals("foo") == true); |
| assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO); |
| |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| suggestions = result.get(tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| @@ -118,7 +119,9 @@ |
| |
| IndexReader reader = core.getSearcher().get().getReader(); |
| Collection<Token> tokens = queryConverter.convert("Solar"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader); |
| + |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| //should be lowercased, b/c we are using a lowercasing analyzer |
| Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| @@ -128,8 +131,8 @@ |
| assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO); |
| |
| //test something not in the spell checker |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| suggestions = result.get(tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| @@ -160,7 +163,8 @@ |
| |
| IndexReader reader = core.getSearcher().get().getReader(); |
| Collection<Token> tokens = queryConverter.convert("solar"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader); |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| //should be lowercased, b/c we are using a lowercasing analyzer |
| Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| @@ -170,10 +174,10 @@ |
| assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO); |
| |
| |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| } |
| } |
| Index: solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java |
| =================================================================== |
| --- solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (revision 987287) |
| +++ solr/src/test/org/apache/solr/spelling/IndexBasedSpellCheckerTest.java (working copy) |
| @@ -125,10 +125,11 @@ |
| |
| IndexReader reader = searcher.getReader(); |
| Collection<Token> tokens = queryConverter.convert("documemt"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader); |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| //should be lowercased, b/c we are using a lowercasing analyzer |
| - Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| + Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("documemt is null and it shouldn't be", suggestions != null); |
| assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); |
| Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); |
| @@ -136,32 +137,33 @@ |
| assertTrue(entry.getValue() + " does not equal: " + SpellingResult.NO_FREQUENCY_INFO, entry.getValue() == SpellingResult.NO_FREQUENCY_INFO); |
| |
| //test something not in the spell checker |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| |
| //test something that is spelled correctly |
| - tokens = queryConverter.convert("document"); |
| - result = checker.getSuggestions(tokens, reader); |
| + spellOpts.tokens = queryConverter.convert("document"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is null and it shouldn't be", suggestions == null); |
| |
| //Has multiple possibilities, but the exact exists, so that should be returned |
| - tokens = queryConverter.convert("red"); |
| - result = checker.getSuggestions(tokens, reader, 2); |
| - assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + spellOpts.tokens = queryConverter.convert("red"); |
| + spellOpts.count = 2; |
| + result = checker.getSuggestions(spellOpts); |
| + assertNotNull(result); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| |
| //Try out something which should have multiple suggestions |
| - tokens = queryConverter.convert("bug"); |
| - result = checker.getSuggestions(tokens, reader, 2); |
| - assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| - assertTrue("suggestions is null and it shouldn't be", suggestions != null); |
| + spellOpts.tokens = queryConverter.convert("bug"); |
| + result = checker.getSuggestions(spellOpts); |
| + assertNotNull(result); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| + assertNotNull(suggestions); |
| assertTrue("suggestions Size: " + suggestions.size() + " is not: " + 2, suggestions.size() == 2); |
| |
| entry = suggestions.entrySet().iterator().next(); |
| @@ -198,10 +200,11 @@ |
| |
| IndexReader reader = searcher.getReader(); |
| Collection<Token> tokens = queryConverter.convert("documemt"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| //should be lowercased, b/c we are using a lowercasing analyzer |
| - Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| + Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("documemt is null and it shouldn't be", suggestions != null); |
| assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); |
| Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); |
| @@ -209,16 +212,16 @@ |
| assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2); |
| |
| //test something not in the spell checker |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| |
| - tokens = queryConverter.convert("document"); |
| - result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + spellOpts.tokens = queryConverter.convert("document"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| } finally { |
| holder.decref(); |
| @@ -304,10 +307,11 @@ |
| |
| IndexReader reader = searcher.getReader(); |
| Collection<Token> tokens = queryConverter.convert("flesh"); |
| - SpellingResult result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, false, true, 0.5f, null); |
| + SpellingResult result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| //should be lowercased, b/c we are using a lowercasing analyzer |
| - Map<String, Integer> suggestions = result.get(tokens.iterator().next()); |
| + Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("flesh is null and it shouldn't be", suggestions != null); |
| assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); |
| Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); |
| @@ -315,16 +319,16 @@ |
| assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); |
| |
| //test something not in the spell checker |
| - tokens = queryConverter.convert("super"); |
| - result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + spellOpts.tokens = queryConverter.convert("super"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| |
| - tokens = queryConverter.convert("Caroline"); |
| - result = checker.getSuggestions(tokens, reader, 1, false, true); |
| + spellOpts.tokens = queryConverter.convert("Caroline"); |
| + result = checker.getSuggestions(spellOpts); |
| assertTrue("result is null and it shouldn't be", result != null); |
| - suggestions = result.get(tokens.iterator().next()); |
| + suggestions = result.get(spellOpts.tokens.iterator().next()); |
| assertTrue("suggestions is not null and it should be", suggestions == null); |
| } finally { |
| holder.decref(); |
| Index: solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java |
| =================================================================== |
| --- solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java (revision 987287) |
| +++ solr/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java (working copy) |
| @@ -24,6 +24,7 @@ |
| import org.apache.solr.common.params.CommonParams; |
| import org.apache.solr.common.params.MapSolrParams; |
| import org.apache.solr.common.params.ModifiableSolrParams; |
| +import org.apache.solr.common.params.SpellingParams; |
| import org.apache.solr.common.util.NamedList; |
| import org.apache.solr.common.util.SimpleOrderedMap; |
| import org.apache.solr.core.SolrCore; |
| @@ -33,7 +34,6 @@ |
| import org.apache.solr.response.SolrQueryResponse; |
| import org.apache.solr.spelling.AbstractLuceneSpellChecker; |
| import org.apache.solr.spelling.IndexBasedSpellChecker; |
| -import org.apache.solr.util.AbstractSolrTestCase; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| @@ -133,9 +133,9 @@ |
| assertTrue(cmdExec + " is not equal to " + "build", |
| cmdExec.equals("build") == true); |
| NamedList spellCheck = (NamedList) values.get("spellcheck"); |
| - assertTrue("spellCheck is null and it shouldn't be", spellCheck != null); |
| + assertNotNull(spellCheck); |
| NamedList suggestions = (NamedList) spellCheck.get("suggestions"); |
| - assertTrue("suggestions is null and it shouldn't be", suggestions != null); |
| + assertNotNull(suggestions); |
| NamedList document = (NamedList) suggestions.get("documemt"); |
| assertEquals(1, document.get("numFound")); |
| assertEquals(0, document.get("startOffset")); |
| @@ -145,7 +145,51 @@ |
| assertEquals("document", theSuggestion.iterator().next()); |
| } |
| |
| + |
| @Test |
| + public void testPerDictionary() throws Exception { |
| + SolrCore core = h.getCore(); |
| + SearchComponent speller = core.getSearchComponent("spellcheck"); |
| + assertTrue("speller is null and it shouldn't be", speller != null); |
| + |
| + ModifiableSolrParams params = new ModifiableSolrParams(); |
| + params.add(CommonParams.QT, "spellCheckCompRH"); |
| + params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true"); |
| + params.add(CommonParams.Q, "documemt"); |
| + params.add(SpellCheckComponent.COMPONENT_NAME, "true"); |
| + params.add(SpellingParams.SPELLCHECK_DICT, "perDict"); |
| + |
| + params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.foo", "bar"); |
| + params.add(SpellingParams.SPELLCHECK_PREFIX + ".perDict.bar", "foo"); |
| + |
| + SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH"); |
| + SolrQueryResponse rsp = new SolrQueryResponse(); |
| + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); |
| + NamedList values = rsp.getValues(); |
| + |
| + NamedList spellCheck = (NamedList) values.get("spellcheck"); |
| + NamedList suggestions = (NamedList) spellCheck.get("suggestions"); |
| + assertNotNull("suggestions", suggestions); |
| + NamedList suggestion; |
| + Collection<String> theSuggestion; |
| + suggestion = (NamedList) suggestions.get("foo"); |
| + assertEquals(1, suggestion.get("numFound")); |
| + assertEquals(0, suggestion.get("startOffset")); |
| + assertEquals(suggestion.get("endOffset"), 1); |
| + theSuggestion = (Collection<String>) suggestion.get("suggestion"); |
| + assertEquals(1, theSuggestion.size()); |
| + assertEquals("bar", theSuggestion.iterator().next()); |
| + |
| + suggestion = (NamedList) suggestions.get("bar"); |
| + assertEquals(1, suggestion.get("numFound")); |
| + assertEquals(2, suggestion.get("startOffset")); |
| + assertEquals(3, suggestion.get("endOffset")); |
| + theSuggestion = (Collection<String>) suggestion.get("suggestion"); |
| + assertEquals(1, theSuggestion.size()); |
| + assertEquals("foo", theSuggestion.iterator().next()); |
| + } |
| + |
| + @Test |
| public void testCollate() throws Exception { |
| SolrCore core = h.getCore(); |
| SearchComponent speller = core.getSearchComponent("spellcheck"); |
| Index: solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java |
| =================================================================== |
| --- solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java (revision 0) |
| +++ solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java (revision 0) |
| @@ -0,0 +1,68 @@ |
| +package org.apache.solr.handler.component; |
| + |
| +import org.apache.lucene.analysis.Token; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.solr.core.SolrCore; |
| +import org.apache.solr.search.SolrIndexSearcher; |
| +import org.apache.solr.spelling.SolrSpellChecker; |
| +import org.apache.solr.spelling.SpellingOptions; |
| +import org.apache.solr.spelling.SpellingResult; |
| + |
| +import java.io.IOException; |
| +import java.util.Collection; |
| +import java.util.Collections; |
| +import java.util.Iterator; |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| + |
| +/** |
| + * A Dummy SpellChecker for testing purposes |
| + * |
| + **/ |
| +public class DummyCustomParamSpellChecker extends SolrSpellChecker { |
| + |
| + @Override |
| + public void reload() throws IOException { |
| + |
| + } |
| + |
| + @Override |
| + public void build(SolrCore core, SolrIndexSearcher searcher) { |
| + |
| + } |
| + |
| + @Override |
| + public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults) throws IOException { |
| + return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, 0, null)); |
| + } |
| + |
| + @Override |
| + public SpellingResult getSuggestions(SpellingOptions options) throws IOException { |
| + |
| + SpellingResult result = new SpellingResult(); |
| + //just spit back out the results |
| + Iterator<String> iterator = options.customParams.getParameterNamesIterator(); |
| + int i = 0; |
| + while (iterator.hasNext()){ |
| + String name = iterator.next(); |
| + String value = options.customParams.get(name); |
| + result.add(new Token(name, i++, i++), Collections.singletonList(value)); |
| + } |
| + return result; |
| + } |
| +} |
| |
| Property changes on: solr/src/test/org/apache/solr/handler/component/DummyCustomParamSpellChecker.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + native |
| |
| Index: solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java |
| =================================================================== |
| --- solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (revision 987287) |
| +++ solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (working copy) |
| @@ -23,11 +23,13 @@ |
| import java.util.concurrent.ConcurrentHashMap; |
| |
| import org.apache.lucene.search.spell.LevensteinDistance; |
| +import org.apache.lucene.search.spell.SpellChecker; |
| import org.apache.lucene.search.spell.StringDistance; |
| import org.apache.lucene.search.spell.SuggestWord; |
| import org.apache.lucene.search.spell.SuggestWordQueue; |
| import org.apache.lucene.util.PriorityQueue; |
| import org.apache.solr.client.solrj.response.SpellCheckResponse; |
| +import org.apache.solr.common.params.ModifiableSolrParams; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| @@ -144,8 +146,12 @@ |
| NamedList response = new SimpleOrderedMap(); |
| IndexReader reader = rb.req.getSearcher().getReader(); |
| boolean collate = params.getBool(SPELLCHECK_COLLATE, false); |
| - SpellingResult spellingResult = spellChecker.getSuggestions(tokens, |
| - reader, count, onlyMorePopular, extendedResults); |
| + float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); |
| + SolrParams customParams = getCustomParams(getDictionaryName(params), params); |
| + SpellingOptions options = new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, |
| + accuracy, customParams); |
| + |
| + SpellingResult spellingResult = spellChecker.getSuggestions(options); |
| if (spellingResult != null) { |
| response.add("suggestions", toNamedList(spellingResult, q, |
| extendedResults, collate)); |
| @@ -159,6 +165,24 @@ |
| } |
| } |
| |
| + /** |
| + * For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add |
| + * XXXX=YYYY as a param to the custom param list |
| + * @param params The original SolrParams |
| + * @return The new Params |
| + */ |
| + protected SolrParams getCustomParams(String dictionary, SolrParams params) { |
| + ModifiableSolrParams result = new ModifiableSolrParams(); |
| + Iterator<String> iter = params.getParameterNamesIterator(); |
| + String prefix = SpellingParams.SPELLCHECK_PREFIX + "." + dictionary + "."; |
| + while (iter.hasNext()){ |
| + String nxt = iter.next(); |
| + if (nxt.startsWith(prefix)){ |
| + result.add(nxt.substring(prefix.length()), params.getParams(nxt)); |
| + } |
| + } |
| + return result; |
| + } |
| |
| |
| @Override |
| @@ -341,13 +365,17 @@ |
| } |
| |
| protected SolrSpellChecker getSpellChecker(SolrParams params) { |
| + return spellCheckers.get(getDictionaryName(params)); |
| + } |
| + |
| + private String getDictionaryName(SolrParams params) { |
| String dictName = params.get(SPELLCHECK_DICT); |
| if (dictName == null) { |
| dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME; |
| } |
| - return spellCheckers.get(dictName); |
| + return dictName; |
| } |
| - |
| + |
| /** |
| * @return the spellchecker registered to a given name |
| */ |
| Index: solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java |
| =================================================================== |
| --- solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (revision 987287) |
| +++ solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java (working copy) |
| @@ -149,30 +149,47 @@ |
| } |
| return name; |
| } |
| - |
| - @SuppressWarnings("unchecked") |
| - public SpellingResult getSuggestions(Collection<Token> tokens, |
| - IndexReader reader, int count, boolean onlyMorePopular, |
| - boolean extendedResults) |
| - throws IOException { |
| - SpellingResult result = new SpellingResult(tokens); |
| - reader = determineReader(reader); |
| + |
| + /** |
| + * Kept around for back compatibility purposes. |
| + * |
| + * @param tokens The Tokens to be spell checked. |
| + * @param reader The (optional) IndexReader. If there is not IndexReader, than extendedResults are not possible |
| + * @param count The maximum number of suggestions to return |
| + * @param onlyMorePopular TODO |
| + * @param extendedResults TODO |
| + * @return |
| + * @throws IOException |
| + */ |
| + @Override |
| + public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults) throws IOException { |
| + return getSuggestions(new SpellingOptions(tokens, reader, count, onlyMorePopular, extendedResults, spellChecker.getAccuracy(), null)); |
| + } |
| + |
| + @Override |
| + public SpellingResult getSuggestions(SpellingOptions options) throws IOException { |
| + SpellingResult result = new SpellingResult(options.tokens); |
| + IndexReader reader = determineReader(options.reader); |
| Term term = field != null ? new Term(field, "") : null; |
| - for (Token token : tokens) { |
| + float theAccuracy = (options.accuracy == Float.MIN_VALUE) ? spellChecker.getAccuracy() : options.accuracy; |
| + |
| + int count = (int) Math.max(options.count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT); |
| + for (Token token : options.tokens) { |
| String tokenText = new String(token.buffer(), 0, token.length()); |
| - String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT), |
| + String[] suggestions = spellChecker.suggestSimilar(tokenText, |
| + count, |
| field != null ? reader : null, //workaround LUCENE-1295 |
| field, |
| - onlyMorePopular); |
| + options.onlyMorePopular, theAccuracy); |
| if (suggestions.length == 1 && suggestions[0].equals(tokenText)) { |
| //These are spelled the same, continue on |
| continue; |
| } |
| |
| - if (extendedResults == true && reader != null && field != null) { |
| + if (options.extendedResults == true && reader != null && field != null) { |
| term = term.createTerm(tokenText); |
| result.add(token, reader.docFreq(term)); |
| - int countLimit = Math.min(count, suggestions.length); |
| + int countLimit = Math.min(options.count, suggestions.length); |
| for (int i = 0; i < countLimit; i++) { |
| term = term.createTerm(suggestions[i]); |
| result.add(token, suggestions[i], reader.docFreq(term)); |
| @@ -180,8 +197,8 @@ |
| } else { |
| if (suggestions.length > 0) { |
| List<String> suggList = Arrays.asList(suggestions); |
| - if (suggestions.length > count) { |
| - suggList = suggList.subList(0, count); |
| + if (suggestions.length > options.count) { |
| + suggList = suggList.subList(0, options.count); |
| } |
| result.add(token, suggList); |
| } |
| Index: solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java |
| =================================================================== |
| --- solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java (revision 987287) |
| +++ solr/src/java/org/apache/solr/spelling/SolrSpellChecker.java (working copy) |
| @@ -74,6 +74,8 @@ |
| * Assumes count = 1, onlyMorePopular = false, extendedResults = false |
| * |
| * @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean) |
| + * |
| + * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)} |
| */ |
| public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader) throws IOException { |
| return getSuggestions(tokens, reader, 1, false, false); |
| @@ -83,6 +85,8 @@ |
| * Assumes onlyMorePopular = false, extendedResults = false |
| * |
| * @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean) |
| + * |
| + * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)} |
| */ |
| public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count) throws IOException { |
| return getSuggestions(tokens, reader, count, false, false); |
| @@ -93,6 +97,8 @@ |
| * Assumes count = 1. |
| * |
| * @see #getSuggestions(Collection, org.apache.lucene.index.IndexReader, int, boolean, boolean) |
| + * |
| + * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)} |
| */ |
| public SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, boolean onlyMorePopular, boolean extendedResults) throws IOException { |
| return getSuggestions(tokens, reader, 1, onlyMorePopular, extendedResults); |
| @@ -108,8 +114,27 @@ |
| * @param onlyMorePopular TODO |
| * @param extendedResults TODO |
| * @throws IOException |
| + * |
| + * @deprecated This method will be removed in 4.x in favor of {@link #getSuggestions(org.apache.solr.spelling.SpellingOptions)} |
| */ |
| public abstract SpellingResult getSuggestions(Collection<Token> tokens, IndexReader reader, int count, |
| boolean onlyMorePopular, boolean extendedResults) |
| throws IOException; |
| + |
| + /** |
| + * Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer. |
| + * The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion first. |
| + * <p/> |
| + * Note: This method is abstract in Solr 4.0 and beyond and is the recommended way of implementing the spell checker. For now, |
| + * it calls {@link #getSuggestions(java.util.Collection, org.apache.lucene.index.IndexReader, boolean, boolean)}. |
| + * |
| + * |
| + * @param options The {@link SpellingOptions} to use |
| + * @return The {@link SpellingResult} suggestions |
| + * @throws IOException if there is an error producing suggestions |
| + */ |
| + public SpellingResult getSuggestions(SpellingOptions options) throws IOException{ |
| + return getSuggestions(options.tokens, options.reader, options.count, options.onlyMorePopular, options.extendedResults); |
| + } |
| + |
| } |
| Index: solr/src/java/org/apache/solr/spelling/SpellingOptions.java |
| =================================================================== |
| --- solr/src/java/org/apache/solr/spelling/SpellingOptions.java (revision 0) |
| +++ solr/src/java/org/apache/solr/spelling/SpellingOptions.java (revision 0) |
| @@ -0,0 +1,94 @@ |
| +package org.apache.solr.spelling; |
| + |
| +import org.apache.lucene.analysis.Token; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.solr.common.params.SolrParams; |
| + |
| +import java.util.Collection; |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| + |
| +/** |
| + * |
| + * |
| + **/ |
| +public class SpellingOptions { |
| + |
| + /** |
| + * The tokens to spell check |
| + */ |
| + public Collection<Token> tokens; |
| + /** |
| + * An optional {@link org.apache.lucene.index.IndexReader} |
| + */ |
| + public IndexReader reader; |
| + /** |
| + * The number of suggestions to return, if there are any. Defaults to 1. |
| + */ |
| + public int count = 1; |
| + /** |
| + * Return only those results that are more popular, as defined by the implementation |
| + */ |
| + public boolean onlyMorePopular; |
| + /** |
| + * Provide additional, per implementation, information about the results |
| + */ |
| + public boolean extendedResults; |
| + |
| + /** |
| + * Optionally restrict the results to have a minimum accuracy level. Per Implementation. |
| + * By default set to Float.MIN_VALUE. |
| + */ |
| + public float accuracy = Float.MIN_VALUE; |
| + |
| + /** |
| + * Any other custom params can be passed through. May be null and is null by default. |
| + */ |
| + public SolrParams customParams; |
| + |
| + public SpellingOptions() { |
| + } |
| + |
| + //A couple of convenience ones |
| + public SpellingOptions(Collection<Token> tokens, int count) { |
| + this.tokens = tokens; |
| + this.count = count; |
| + } |
| + |
| + public SpellingOptions(Collection<Token> tokens, IndexReader reader) { |
| + this.tokens = tokens; |
| + this.reader = reader; |
| + } |
| + |
| + public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count) { |
| + this.tokens = tokens; |
| + this.reader = reader; |
| + this.count = count; |
| + } |
| + |
| + |
| + public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, boolean onlyMorePopular, boolean extendedResults, float accuracy, SolrParams customParams) { |
| + this.tokens = tokens; |
| + this.reader = reader; |
| + this.count = count; |
| + this.onlyMorePopular = onlyMorePopular; |
| + this.extendedResults = extendedResults; |
| + this.accuracy = accuracy; |
| + this.customParams = customParams; |
| + } |
| +} |
| |
| Property changes on: solr/src/java/org/apache/solr/spelling/SpellingOptions.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + native |
| |
| Index: solr/src/common/org/apache/solr/common/params/SpellingParams.java |
| =================================================================== |
| --- solr/src/common/org/apache/solr/common/params/SpellingParams.java (revision 987287) |
| +++ solr/src/common/org/apache/solr/common/params/SpellingParams.java (working copy) |
| @@ -81,4 +81,9 @@ |
| * Take the top suggestion for each token and create a new query from it |
| */ |
| public static final String SPELLCHECK_COLLATE = SPELLCHECK_PREFIX + "collate"; |
| + |
| + /** |
| + * Certain spelling implementations may allow for an accuracy setting. |
| + */ |
| + public static final String SPELLCHECK_ACCURACY = SPELLCHECK_PREFIX + "accuracy"; |
| } |
| Index: lucene/contrib/CHANGES.txt |
| =================================================================== |
| --- lucene/contrib/CHANGES.txt (revision 987287) |
| +++ lucene/contrib/CHANGES.txt (working copy) |
| @@ -11,6 +11,9 @@ |
| * LUCENE-2479: Added ability to provide a sort comparator for spelling suggestions along |
| with two implementations. The existing comparator (score, then frequency) is the default (Grant Ingersoll) |
| |
| + * LUCENE-2608: Added the ability to specify the accuracy at method time in the SpellChecker. The per class |
| + method is also still available. (Grant Ingersoll) |
| + |
| ======================= Lucene 3.x (not yet released) ======================= |
| |
| Changes in backwards compatibility policy |
| Index: lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java |
| =================================================================== |
| --- lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (revision 987287) |
| +++ lucene/contrib/spellchecker/src/test/org/apache/lucene/search/spell/TestSpellChecker.java (working copy) |
| @@ -104,11 +104,21 @@ |
| spellChecker.setAccuracy(0.8f); |
| checkCommonSuggestions(r); |
| checkJaroWinklerSuggestions(); |
| + // the accuracy is set to 0.8 by default, but the best result has a score of 0.925 |
| + String[] similar = spellChecker.suggestSimilar("fvie", 2, 0.93f); |
| + assertTrue(similar.length == 0); |
| + similar = spellChecker.suggestSimilar("fvie", 2, 0.92f); |
| + assertTrue(similar.length == 1); |
| + |
| + similar = spellChecker.suggestSimilar("fiv", 2); |
| + assertTrue(similar.length > 0); |
| + assertEquals(similar[0], "five"); |
| |
| spellChecker.setStringDistance(new NGramDistance(2)); |
| spellChecker.setAccuracy(0.5f); |
| checkCommonSuggestions(r); |
| checkNGramSuggestions(); |
| + |
| r.close(); |
| } |
| |
| @@ -127,8 +137,6 @@ |
| if (!compareSP.isClosed()) |
| compareSP.close(); |
| compIdx.close(); |
| - |
| - |
| } |
| |
| private void checkCommonSuggestions(IndexReader r) throws IOException { |
| Index: lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java |
| =================================================================== |
| --- lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (revision 987287) |
| +++ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (working copy) |
| @@ -63,10 +63,15 @@ |
| public class SpellChecker implements java.io.Closeable { |
| |
| /** |
| + * The default minimum score to use, if not specified by calling {@link #setAccuracy(float)} . |
| + */ |
| + public static final float DEFAULT_ACCURACY = 0.5f; |
| + |
| + /** |
| * Field name for each word in the ngram index. |
| */ |
| public static final String F_WORD = "word"; |
| - |
| + |
| private static final Term F_WORD_TERM = new Term(F_WORD); |
| |
| /** |
| @@ -75,35 +80,34 @@ |
| // don't modify the directory directly - see #swapSearcher() |
| // TODO: why is this package private? |
| Directory spellIndex; |
| - |
| /** |
| * Boost value for start and end grams |
| */ |
| private float bStart = 2.0f; |
| + |
| private float bEnd = 1.0f; |
| + // don't use this searcher directly - see #swapSearcher() |
| |
| - // don't use this searcher directly - see #swapSearcher() |
| private IndexSearcher searcher; |
| - |
| /* |
| - * this locks all modifications to the current searcher. |
| + * this locks all modifications to the current searcher. |
| */ |
| + |
| private final Object searcherLock = new Object(); |
| - |
| /* |
| - * this lock synchronizes all possible modifications to the |
| + * this lock synchronizes all possible modifications to the |
| * current index directory. It should not be possible to try modifying |
| * the same index concurrently. Note: Do not acquire the searcher lock |
| - * before acquiring this lock! |
| + * before acquiring this lock! |
| */ |
| private final Object modifyCurrentIndexLock = new Object(); |
| + |
| private volatile boolean closed = false; |
| + // minimum score for hits generated by the spell checker query |
| |
| - // minimum score for hits generated by the spell checker query |
| - private float minScore = 0.5f; |
| - |
| + private float accuracy = DEFAULT_ACCURACY; |
| + |
| private StringDistance sd; |
| - |
| private Comparator<SuggestWord> comparator; |
| |
| /** |
| @@ -202,13 +206,23 @@ |
| } |
| |
| /** |
| - * Sets the accuracy 0 < minScore < 1; default 0.5 |
| + * Sets the accuracy 0 < minScore < 1; default {@link #DEFAULT_ACCURACY} |
| + * @param acc The new accuracy |
| */ |
| - public void setAccuracy(float minScore) { |
| - this.minScore = minScore; |
| + public void setAccuracy(float acc) { |
| + this.accuracy = acc; |
| } |
| |
| /** |
| + * The accuracy (minimum score) to be used, unless overridden in {@link #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float)}, to |
| + * decide whether a suggestion is included or not. |
| + * @return The current accuracy setting |
| + */ |
| + public float getAccuracy() { |
| + return accuracy; |
| + } |
| + |
| + /** |
| * Suggest similar words. |
| * |
| * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| @@ -224,12 +238,38 @@ |
| * @throws IOException if the underlying index throws an {@link IOException} |
| * @throws AlreadyClosedException if the Spellchecker is already closed |
| * @return String[] |
| + * |
| + * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) |
| */ |
| public String[] suggestSimilar(String word, int numSug) throws IOException { |
| return this.suggestSimilar(word, numSug, null, null, false); |
| } |
| |
| /** |
| + * Suggest similar words. |
| + * |
| + * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| + * is not the same as the edit distance strategy used to calculate the best |
| + * matching spell-checked word from the hits that Lucene found, one usually has |
| + * to retrieve a couple of numSug's in order to get the true best match. |
| + * |
| + * <p>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| + * Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| + * |
| + * @param word the word you want a spell check done on |
| + * @param numSug the number of suggested words |
| + * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results |
| + * @throws IOException if the underlying index throws an {@link IOException} |
| + * @throws AlreadyClosedException if the Spellchecker is already closed |
| + * @return String[] |
| + * |
| + * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) |
| + */ |
| + public String[] suggestSimilar(String word, int numSug, float accuracy) throws IOException { |
| + return this.suggestSimilar(word, numSug, null, null, false, accuracy); |
| + } |
| + |
| + /** |
| * Suggest similar words (optionally restricted to a field of an index). |
| * |
| * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| @@ -240,6 +280,8 @@ |
| * <p>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| * Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| * |
| + * <p>Uses the {@link #getAccuracy()} value passed into the constructor as the accuracy. |
| + * |
| * @param word the word you want a spell check done on |
| * @param numSug the number of suggested words |
| * @param ir the indexReader of the user index (can be null see field param) |
| @@ -252,74 +294,107 @@ |
| * @return String[] the sorted list of the suggest words with these 2 criteria: |
| * first criteria: the edit distance, second criteria (only if restricted mode): the popularity |
| * of the suggest words in the field of the user index |
| + * |
| + * @see #suggestSimilar(String, int, org.apache.lucene.index.IndexReader, String, boolean, float) |
| */ |
| public String[] suggestSimilar(String word, int numSug, IndexReader ir, |
| String field, boolean morePopular) throws IOException { |
| + return suggestSimilar(word, numSug, ir, field, morePopular, accuracy); |
| + } |
| + |
| + |
| + /** |
| + * Suggest similar words (optionally restricted to a field of an index). |
| + * |
| + * <p>As the Lucene similarity that is used to fetch the most relevant n-grammed terms |
| + * is not the same as the edit distance strategy used to calculate the best |
| + * matching spell-checked word from the hits that Lucene found, one usually has |
| + * to retrieve a couple of numSug's in order to get the true best match. |
| + * |
| + * <p>I.e. if numSug == 1, don't count on that suggestion being the best one. |
| + * Thus, you should set this value to <b>at least</b> 5 for a good suggestion. |
| + * |
| + * @param word the word you want a spell check done on |
| + * @param numSug the number of suggested words |
| + * @param ir the indexReader of the user index (can be null see field param) |
| + * @param field the field of the user index: if field is not null, the suggested |
| + * words are restricted to the words present in this field. |
| + * @param morePopular return only the suggest words that are as frequent or more frequent than the searched word |
| + * (only if restricted mode = (indexReader!=null and field!=null) |
| + * @param accuracy The minimum score a suggestion must have in order to qualify for inclusion in the results |
| + * @throws IOException if the underlying index throws an {@link IOException} |
| + * @throws AlreadyClosedException if the Spellchecker is already closed |
| + * @return String[] the sorted list of the suggest words with these 2 criteria: |
| + * first criteria: the edit distance, second criteria (only if restricted mode): the popularity |
| + * of the suggest words in the field of the user index |
| + */ |
| + public String[] suggestSimilar(String word, int numSug, IndexReader ir, |
| + String field, boolean morePopular, float accuracy) throws IOException { |
| // obtainSearcher calls ensureOpen |
| final IndexSearcher indexSearcher = obtainSearcher(); |
| try{ |
| - float min = this.minScore; |
| + |
| final int lengthWord = word.length(); |
| - |
| + |
| final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0; |
| final int goalFreq = (morePopular && ir != null && field != null) ? freq : 0; |
| // if the word exists in the real index and we don't care for word frequency, return the word itself |
| if (!morePopular && freq > 0) { |
| return new String[] { word }; |
| } |
| - |
| + |
| BooleanQuery query = new BooleanQuery(); |
| String[] grams; |
| String key; |
| - |
| + |
| for (int ng = getMin(lengthWord); ng <= getMax(lengthWord); ng++) { |
| - |
| + |
| key = "gram" + ng; // form key |
| - |
| + |
| grams = formGrams(word, ng); // form word into ngrams (allow dups too) |
| - |
| + |
| if (grams.length == 0) { |
| continue; // hmm |
| } |
| - |
| + |
| if (bStart > 0) { // should we boost prefixes? |
| add(query, "start" + ng, grams[0], bStart); // matches start of word |
| - |
| + |
| } |
| if (bEnd > 0) { // should we boost suffixes |
| add(query, "end" + ng, grams[grams.length - 1], bEnd); // matches end of word |
| - |
| + |
| } |
| for (int i = 0; i < grams.length; i++) { |
| add(query, key, grams[i]); |
| } |
| } |
| - |
| + |
| int maxHits = 10 * numSug; |
| - |
| + |
| // System.out.println("Q: " + query); |
| ScoreDoc[] hits = indexSearcher.search(query, null, maxHits).scoreDocs; |
| // System.out.println("HITS: " + hits.length()); |
| SuggestWordQueue sugQueue = new SuggestWordQueue(numSug, comparator); |
| - |
| + |
| // go thru more than 'maxr' matches in case the distance filter triggers |
| int stop = Math.min(hits.length, maxHits); |
| SuggestWord sugWord = new SuggestWord(); |
| for (int i = 0; i < stop; i++) { |
| - |
| + |
| sugWord.string = indexSearcher.doc(hits[i].doc).get(F_WORD); // get orig word |
| - |
| + |
| // don't suggest a word for itself, that would be silly |
| if (sugWord.string.equals(word)) { |
| continue; |
| } |
| - |
| + |
| // edit distance |
| sugWord.score = sd.getDistance(word,sugWord.string); |
| - if (sugWord.score < min) { |
| + if (sugWord.score < accuracy) { |
| continue; |
| } |
| - |
| + |
| if (ir != null && field != null) { // use the user index |
| sugWord.freq = ir.docFreq(new Term(field, sugWord.string)); // freq in the index |
| // don't suggest a word that is not present in the field |
| @@ -330,23 +405,22 @@ |
| sugQueue.insertWithOverflow(sugWord); |
| if (sugQueue.size() == numSug) { |
| // if queue full, maintain the minScore score |
| - min = sugQueue.top().score; |
| + accuracy = sugQueue.top().score; |
| } |
| sugWord = new SuggestWord(); |
| } |
| - |
| + |
| // convert to array string |
| String[] list = new String[sugQueue.size()]; |
| for (int i = sugQueue.size() - 1; i >= 0; i--) { |
| list[i] = sugQueue.pop().string; |
| } |
| - |
| + |
| return list; |
| } finally { |
| releaseSearcher(indexSearcher); |
| } |
| } |
| - |
| /** |
| * Add a clause to a boolean query. |
| */ |