blob: ea40f326c959ac71daf525707f2b11bab5602922 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.spelling;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.SpellCheckComponent;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass;
import org.junit.Test;
@SuppressTempFileChecks(bugUrl = "https://issues.apache.org/jira/browse/SOLR-1877 Spellcheck IndexReader leak bug?")
public class WordBreakSolrSpellCheckerTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig-spellcheckcomponent.xml","schema.xml");
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "pain table paintablepine pi ne in able")));
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "paint able pineapple goodness in")));
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "pa in table pineapplegoodness")));
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "printable line in ample food mess")));
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "printable in pointable paint able")));
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt", "printable in puntable paint able ")));
assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "paint able in pintable plantable")));
assertNull(h.validateUpdate(adoc("id", "7", "lowerfilt", "zxcvqwtp fg hj")));
assertNull(h.validateUpdate(commit()));
//docfreq=7: in
//docfreq=5: able
//docfreq=4: paint
//docfreq=3: printable
//docfreq=2: table
//docfreq=1: {all others}
}
@Test
public void testStandAlone() throws Exception {
SolrCore core = h.getCore();
WordBreakSolrSpellChecker checker = new WordBreakSolrSpellChecker();
NamedList<String> params = new NamedList<>();
params.add("field", "lowerfilt");
params.add(WordBreakSolrSpellChecker.PARAM_BREAK_WORDS, "true");
params.add(WordBreakSolrSpellChecker.PARAM_COMBINE_WORDS, "true");
params.add(WordBreakSolrSpellChecker.PARAM_MAX_CHANGES, "10");
checker.init(params, core);
//TODO can we use core.withSearcher ? refcounting here is confusing; not sure if intentional
RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
QueryConverter qc = new SpellingQueryConverter();
qc.setAnalyzer(new MockAnalyzer(random()));
{
//Prior to SOLR-8175, the required term would cause an AIOOBE.
Collection<Token> tokens = qc.convert("+pine apple good ness");
SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader(), 10);
SpellingResult result = checker.getSuggestions(spellOpts);
searcher.decref();
assertTrue(result != null && result.getSuggestions() != null);
assertTrue(result.getSuggestions().size()==5);
}
Collection<Token> tokens = qc.convert("paintable pine apple good ness");
SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader(), 10);
SpellingResult result = checker.getSuggestions(spellOpts);
searcher.decref();
assertTrue(result != null && result.getSuggestions() != null);
assertTrue(result.getSuggestions().size()==9);
for(Map.Entry<Token, LinkedHashMap<String, Integer>> s : result.getSuggestions().entrySet()) {
Token orig = s.getKey();
String[] corr = s.getValue().keySet().toArray(new String[0]);
if(orig.toString().equals("paintable")) {
assertTrue(orig.startOffset()==0);
assertTrue(orig.endOffset()==9);
assertTrue(orig.length()==9);
assertTrue(corr.length==3);
assertTrue(corr[0].equals("paint able")); //1 op ; max doc freq=5
assertTrue(corr[1].equals("pain table")); //1 op ; max doc freq=2
assertTrue(corr[2].equals("pa in table")); //2 ops
} else if(orig.toString().equals("pine apple")) {
assertTrue(orig.startOffset()==10);
assertTrue(orig.endOffset()==20);
assertTrue(orig.length()==10);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("pineapple"));
} else if(orig.toString().equals("paintable pine")) {
assertTrue(orig.startOffset()==0);
assertTrue(orig.endOffset()==14);
assertTrue(orig.length()==14);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("paintablepine"));
} else if(orig.toString().equals("good ness")) {
assertTrue(orig.startOffset()==21);
assertTrue(orig.endOffset()==30);
assertTrue(orig.length()==9);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("goodness"));
} else if(orig.toString().equals("pine apple good ness")) {
assertTrue(orig.startOffset()==10);
assertTrue(orig.endOffset()==30);
assertTrue(orig.length()==20);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("pineapplegoodness"));
} else if(orig.toString().equals("pine")) {
assertTrue(orig.startOffset()==10);
assertTrue(orig.endOffset()==14);
assertTrue(orig.length()==4);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("pi ne"));
} else if(orig.toString().equals("pine")) {
assertTrue(orig.startOffset()==10);
assertTrue(orig.endOffset()==14);
assertTrue(orig.length()==4);
assertTrue(corr.length==1);
assertTrue(corr[0].equals("pi ne"));
} else if(orig.toString().equals("apple")) {
assertTrue(orig.startOffset()==15);
assertTrue(orig.endOffset()==20);
assertTrue(orig.length()==5);
assertTrue(corr.length==0);
} else if(orig.toString().equals("good")) {
assertTrue(orig.startOffset()==21);
assertTrue(orig.endOffset()==25);
assertTrue(orig.length()==4);
assertTrue(corr.length==0);
} else if(orig.toString().equals("ness")) {
assertTrue(orig.startOffset()==26);
assertTrue(orig.endOffset()==30);
assertTrue(orig.length()==4);
assertTrue(corr.length==0);
}else {
fail("Unexpected original result: " + orig);
}
}
}
@Test
public void testInConjunction() throws Exception {
assertQ(req(
"q", "lowerfilt:(paintable pine apple good ness)",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.SPELLCHECK_BUILD, "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true"),
"//lst[@name='suggestions']/lst[1]/@name='paintable'",
"//lst[@name='suggestions']/lst[2]/@name='pine'",
"//lst[@name='suggestions']/lst[3]/@name='apple'",
"//lst[@name='suggestions']/lst[4]/@name='good'",
"//lst[@name='suggestions']/lst[5]/@name='ness'",
"//lst[@name='paintable']/int[@name='numFound']=8",
"//lst[@name='paintable']/int[@name='startOffset']=11",
"//lst[@name='paintable']/int[@name='endOffset']=20",
"//lst[@name='paintable']/int[@name='origFreq']=0",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/str[@name='word']='printable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/int[@name='freq']=3",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/str[@name='word']='paint able'", //1 op
"//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/int[@name='freq']=5",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/str[@name='word']='pintable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/str[@name='word']='pain table'", //1 op
"//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/int[@name='freq']=2",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/str[@name='word']='pointable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/str[@name='word']='pa in table'", //2 ops
"//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/int[@name='freq']=7",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/str[@name='word']='plantable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/int[@name='freq']=1",
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/str[@name='word']='puntable'", //SolrSpellChecker result interleaved
"//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/int[@name='freq']=1",
"//lst[@name='pine']/int[@name='numFound']=2",
"//lst[@name='pine']/int[@name='startOffset']=21",
"//lst[@name='pine']/int[@name='endOffset']=25",
"//lst[@name='pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='line'",
"//lst[@name='pine']/arr[@name='suggestion']/lst[2]/str[@name='word']='pi ne'",
"//lst[@name='apple']/int[@name='numFound']=1",
"//lst[@name='apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='ample'",
"//lst[@name='good']/int[@name='numFound']=1",
"//lst[@name='good']/arr[@name='suggestion']/lst[1]/str[@name='word']='food'",
"//lst[@name='ness']/int[@name='numFound']=1",
"//lst[@name='ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='mess'",
"//lst[@name='pine apple']/int[@name='numFound']=1",
"//lst[@name='pine apple']/int[@name='startOffset']=21",
"//lst[@name='pine apple']/int[@name='endOffset']=31",
"//lst[@name='pine apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapple'",
"//lst[@name='paintable pine']/int[@name='numFound']=1",
"//lst[@name='paintable pine']/int[@name='startOffset']=11",
"//lst[@name='paintable pine']/int[@name='endOffset']=25",
"//lst[@name='paintable pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='paintablepine'",
"//lst[@name='good ness']/int[@name='numFound']=1",
"//lst[@name='good ness']/int[@name='startOffset']=32",
"//lst[@name='good ness']/int[@name='endOffset']=41",
"//lst[@name='good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='goodness'",
"//lst[@name='pine apple good ness']/int[@name='numFound']=1",
"//lst[@name='pine apple good ness']/int[@name='startOffset']=21",
"//lst[@name='pine apple good ness']/int[@name='endOffset']=41",
"//lst[@name='pine apple good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapplegoodness'"
);
}
@Test
public void testCollate() throws Exception {
assertQ(req(
"q", "lowerfilt:(paintable pine apple godness)",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.SPELLCHECK_BUILD, "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(printable line ample goodness)'",
"//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(paintablepine ample goodness)'",
"//lst[@name='collation'][3 ]/str[@name='collationQuery']='lowerfilt:(printable pineapple goodness)'",
"//lst[@name='collation'][4 ]/str[@name='collationQuery']='lowerfilt:(paint able line ample goodness)'",
"//lst[@name='collation'][5 ]/str[@name='collationQuery']='lowerfilt:(printable pi ne ample goodness)'",
"//lst[@name='collation'][6 ]/str[@name='collationQuery']='lowerfilt:(paint able pineapple goodness)'",
"//lst[@name='collation'][7 ]/str[@name='collationQuery']='lowerfilt:(paint able pi ne ample goodness)'",
"//lst[@name='collation'][8 ]/str[@name='collationQuery']='lowerfilt:(pintable line ample goodness)'",
"//lst[@name='collation'][9 ]/str[@name='collationQuery']='lowerfilt:(pintable pineapple goodness)'",
"//lst[@name='collation'][10]/str[@name='collationQuery']='lowerfilt:(pintable pi ne ample goodness)'",
"//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='paintable']='pintable'",
"//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='pine']='pi ne'",
"//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='apple']='ample'",
"//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='godness']='goodness'"
);
assertQ(req(
"q", "lowerfilt:(pine AND apple)",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(line AND ample)'",
"//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pineapple)'",
"//lst[@name='collation'][3 ]/str[@name='collationQuery']='lowerfilt:((pi AND ne) AND ample)'"
);
assertQ(req(
"q", "lowerfilt:pine AND NOT lowerfilt:apple",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:line AND NOT lowerfilt:ample'",
"//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pi AND ne) AND NOT lowerfilt:ample'"
);
assertQ(req(
"q", "lowerfilt:pine NOT lowerfilt:apple",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:line NOT lowerfilt:ample'",
"//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pi AND ne) NOT lowerfilt:ample'"
);
assertQ(req(
"q", "lowerfilt:(+pine -apple)",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(+line -ample)'",
"//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(+pi +ne -ample)'"
);
assertQ(req(
"q", "lowerfilt:(+printableinpuntableplantable)",
"qt", "/spellCheckWithWordbreak",
"indent", "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, "1",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(+printable +in +puntable +plantable)'"
);
assertQ(req(
"q", "zxcv AND qwtp AND fghj",
"qt", "/spellCheckWithWordbreak",
"defType", "edismax",
"qf", "lowerfilt",
"indent", "true",
SpellCheckComponent.SPELLCHECK_BUILD, "true",
SpellCheckComponent.COMPONENT_NAME, "true",
SpellCheckComponent.SPELLCHECK_ACCURACY, ".75",
SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_COLLATE, "true",
SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"),
"//lst[@name='collation'][1 ]/str[@name='collationQuery']='zxcvqwtp AND (fg AND hj)'"
);
}
}