blob: 1b5f5ee9592821eebd5407ecdda6028fd6a95b44 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.util.TestHarness;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.HashMap;
public class TestComplexPhraseQParserPlugin extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema15.xml");
}
@Override
public void setUp() throws Exception {
super.setUp();
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
@Test
public void testDefaultField() {
assertU(adoc("text", "john smith", "id", "1"));
assertU(adoc("text", "johathon smith", "id", "2"));
assertU(adoc("text", "john percival smith", "id", "3"));
assertU(commit());
assertU(optimize());
assertQ(req("q", "{!complexphrase} \"john smith\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='1']"
);
assertQ(req("q", "{!complexphrase} \"j* smyth~\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase} \"(jo* -john) smith\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase} \"jo* smith\"~2")
, "//result[@numFound='3']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
, "//doc[./str[@name='id']='3']"
);
assertQ(req("q", "{!complexphrase} \"jo* [sma TO smz]\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase} \"john\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='3']"
);
assertQ(req("q", "{!complexphrase} \"(john johathon) smith\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
}
@Test
public void test() {
HashMap<String, String> args = new HashMap<String, String>();
args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
args.put(CommonParams.FL, "id");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"", 0, 200, args);
assertU(adoc("name", "john smith", "id", "1"));
assertU(adoc("name", "johathon smith", "id", "2"));
assertU(adoc("name", "john percival smith", "id", "3"));
assertU(commit());
assertU(optimize());
assertQ("Simple multi-term still works",
sumLRF.makeRequest("name:\"john smith\""),
"//doc[./str[@name='id']='1']",
"//result[@numFound='1']"
);
assertQ(req("q", "{!complexphrase} name:\"john smith\""),
"//doc[./str[@name='id']='1']",
"//result[@numFound='1']"
);
assertQ("wildcards and fuzzies are OK in phrases",
sumLRF.makeRequest("name:\"j* smyth~\""),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='2']",
"//result[@numFound='2']"
);
assertQ("boolean logic works",
sumLRF.makeRequest("name:\"(jo* -john) smith\""),
"//doc[./str[@name='id']='2']",
"//result[@numFound='1']"
);
assertQ("position logic works",
sumLRF.makeRequest("name:\"jo* smith\"~2"),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='2']",
"//doc[./str[@name='id']='3']",
"//result[@numFound='3']"
);
assertQ("range queries supported",
sumLRF.makeRequest("name:\"jo* [sma TO smz]\""),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='2']",
"//result[@numFound='2']"
);
assertQ("Simple single-term still works",
sumLRF.makeRequest("name:\"john\""),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='3']",
"//result[@numFound='2']"
);
assertQ("OR inside phrase works",
sumLRF.makeRequest("name:\"(john johathon) smith\""),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='2']",
"//result[@numFound='2']"
);
assertQEx("don't parse subqueries",
"SyntaxError",
sumLRF.makeRequest("_query_:\"{!prefix f=name v=smi}\""), SolrException.ErrorCode.BAD_REQUEST
);
assertQEx("don't parse subqueries",
"SyntaxError",
sumLRF.makeRequest("{!prefix f=name v=smi}"), SolrException.ErrorCode.BAD_REQUEST
);
}
@Test
public void testPhraseHighlighter() {
HashMap<String, String> args = new HashMap<String, String>();
args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
args.put(CommonParams.FL, "id");
args.put(HighlightParams.HIGHLIGHT, Boolean.TRUE.toString());
args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
args.put(HighlightParams.FIELD_MATCH, Boolean.FALSE.toString());
args.put(HighlightParams.FRAGSIZE, String.valueOf(0));
args.put(HighlightParams.FIELDS, "name");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"", 0, 200, args);
assertU(adoc("name", "john smith smith john", "id", "1"));
assertU(adoc("name", "johathon smith smith johathon", "id", "2"));
assertU(adoc("name", "john percival smith", "id", "3"));
assertU(commit());
assertU(optimize());
assertQ("range queries supported",
sumLRF.makeRequest("name:[sma TO smz]"),
"//doc[./str[@name='id']='1']",
"//doc[./str[@name='id']='2']",
"//doc[./str[@name='id']='3']",
"//result[@numFound='3']"
);
sumLRF = h.getRequestFactory("", 0, 200, args);
assertQ("PhraseHighlighter=true Test",
sumLRF.makeRequest("name:\"(john johathon) smith\""),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> <em>smith</em> smith john']",
"//lst[@name='highlighting']/lst[@name='2']",
"//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> <em>smith</em> smith johathon']"
);
args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.FALSE.toString());
sumLRF = h.getRequestFactory("", 0, 200, args);
assertQ("PhraseHighlighter=false Test",
sumLRF.makeRequest("name:\"(john johathon) smith\""),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> <em>smith</em> <em>smith</em> <em>john</em>']",
"//lst[@name='highlighting']/lst[@name='2']",
"//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> <em>smith</em> <em>smith</em> <em>johathon</em>']"
);
/*
assertQ("Highlight Plain Prefix Query Test",
sumLRF.makeRequest("name:jo*"),
"//lst[@name='highlighting']/lst[@name='1']",
"//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> smith smith <em>john</em>']",
"//lst[@name='highlighting']/lst[@name='2']",
"//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> smith smith <em>johathon</em>']",
"//lst[@name='highlighting']/lst[@name='3']",
"//lst[@name='3']/arr[@name='name']/str[.='<em>john</em> percival smith']"
);
*/
}
@Test
public void testMultipleFields() {
assertU(adoc("text", "protein digest", "name", "dna rules", "id", "1"));
assertU(adoc("text", "digest protein", "name", "rna is the workhorse", "id", "2"));
assertU(adoc("text", "dna rules", "name", "protein digest", "id", "3"));
assertU(adoc("text", "dna really rules", "name", "digest protein", "id", "4"));
assertU(commit());
assertU(optimize());
assertQ(req("q", "{!complexphrase} name:\"protein digest\" AND text:\"dna rules\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='3']"
);
assertQ(req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"d* r*\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='3']"
);
assertQ(req("q", "{!complexphrase inOrder=\"false\"} name:\"dna* rule*\" AND text:\"prot* diges*\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='1']"
);
assertQ(req("q", "{!complexphrase inOrder=false} name:\"protein digest\" AND text:\"dna rules\"~2")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='3']"
, "//doc[./str[@name='id']='4']"
);
assertQ(req("q", "{!complexphrase inOrder=\"true\"} name:\"protein digest\" AND text:\"dna rules\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='3']"
);
}
@Test
public void testUnorderedPhraseQuery() {
assertU(adoc("text", "protein digest", "id", "1"));
assertU(adoc("text", "digest protein", "id", "2"));
assertU(adoc("name", "protein digest", "id", "3"));
assertU(adoc("name", "digest protein", "id", "4"));
assertU(commit());
assertU(optimize());
/**
* ordered phrase query return only fist document
*/
assertQ(req("q", "{!complexphrase} \"protein digest\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='1']"
);
assertQ(req("q", "{!complexphrase} \"pro* di*\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='1']"
);
assertQ(req("q", "{!complexphrase} name:\"protein digest\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='3']"
);
assertQ(req("q", "{!complexphrase} name:\"pro* di*\"")
, "//result[@numFound='1']"
, "//doc[./str[@name='id']='3']"
);
/**
* unordered phrase query returns two documents.
*/
assertQ(req("q", "{!complexphrase inOrder=false} \"digest protein\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase inOrder=false} \"di* pro*\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase inOrder=false} name:\"digest protein\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='3']"
, "//doc[./str[@name='id']='4']"
);
assertQ(req("q", "{!complexphrase inOrder=false} name:\"di* pro*\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='3']"
, "//doc[./str[@name='id']='4']"
);
/**
* inOrder parameter can be defined with local params syntax.
*/
assertQ(req("q", "{!complexphrase inOrder=false} \"di* pro*\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
assertQ(req("q", "{!complexphrase inOrder=true} \"di* pro*\"")
, "//result[@numFound='1']"
);
/**
* inOrder and df parameters can be defined with local params syntax.
*/
assertQ(req("q", "{!complexphrase inOrder=false df=name} \"di* pro*\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='3']"
, "//doc[./str[@name='id']='4']"
);
}
/**
* the query "sulfur-reducing bacteria" was crashing due to the dash inside the phrase.
*/
@Test public void testHyphenInPhrase() {
assertU(adoc("text", "sulfur-reducing bacteria", "id", "1"));
assertU(adoc("text", "sulfur reducing bacteria", "id", "2"));
assertU(adoc("name", "sulfur-reducing bacteria", "id", "3"));
assertU(adoc("name", "sulfur reducing bacteria", "id", "4"));
assertU(commit());
assertU(optimize());
assertQ(req("q", "{!complexphrase} \"sulfur-reducing bacteria\"")
, "//result[@numFound='2']"
, "//doc[./str[@name='id']='1']"
, "//doc[./str[@name='id']='2']"
);
// the analysis for "name" currently does not break on "-" (only whitespace) and thus only matches one doc
assertQ(req("q", "{!complexphrase} name:\"sulfur-reducing bacteria\"")
, "//result[@numFound='1']"
);
}
}