blob: bcfdb28dd152f35f843e0124fcd2286d8cb29d4a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.util.SolrPluginUtils;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema12.xml");
index();
}
public static void index() throws Exception {
assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
"name", "Zapp Brannigan"));
assertU(adoc("id", "43" ,
"title", "Democratic Order op Planets"));
assertU(adoc("id", "44", "trait_ss", "Tool",
"name", "The Zapper"));
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
"title", "25 star General"));
assertU(adoc("id", "46",
"trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula",
"t_special", "literal:colon value",
"movies_t", "first is Mission: Impossible, second is Terminator 2: Judgement Day. Terminator:3 ok...",
"foo_i", "8"
));
assertU(adoc("id", "47", "trait_ss", "Pig",
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
assertU(adoc("id", "50", "text_sw", "start new big city end"));
assertU(adoc("id", "51", "store", "12.34,-56.78"));
assertU(adoc("id", "52", "text_sw", "tekna theou klethomen"));
assertU(adoc("id", "53", "text_sw", "nun tekna theou esmen"));
assertU(adoc("id", "54", "text_sw", "phanera estin ta tekna tou theou"));
assertU(adoc("id", "55", "standardtok", "大"));
assertU(adoc("id", "56", "standardtok", "大亚"));
assertU(adoc("id", "57", "standardtok", "大亚湾"));
assertU(adoc("id", "58", "HTMLstandardtok", "大"));
assertU(adoc("id", "59", "HTMLstandardtok", "大亚"));
assertU(adoc("id", "60", "HTMLstandardtok", "大亚湾"));
assertU(adoc("id", "61", "text_sw", "bazaaa")); // synonyms in an expansion group
assertU(commit());
}
@Test
public void testSyntax() throws Exception {
// a bare * should be treated as *:*
assertJQ(req("defType","edismax", "q","*", "df","doesnotexist_s")
,"/response/docs/[0]==" // make sure we get something...
);
assertJQ(req("defType","edismax", "q","doesnotexist_s:*")
,"/response/numFound==0" // nothing should be found
);
assertJQ(req("defType","edismax","q","doesnotexist_s:*")
,"/response/numFound==0" // nothing should be found
);
assertJQ(req("defType","edismax","q","doesnotexist_s:( * * * )")
,"/response/numFound==0" // nothing should be found
);
}
public void testTrailingOperators() throws Exception {
// really just test that exceptions aren't thrown by
// single + -
assertJQ(req("defType","edismax", "q","-")
,"/response==");
assertJQ(req("defType","edismax", "q","+")
,"/response==");
assertJQ(req("defType","edismax", "q","+ - +")
,"/response==");
assertJQ(req("defType","edismax", "q","- + -")
,"/response==");
assertJQ(req("defType","edismax", "q","id:47 +")
,"/response/numFound==1");
assertJQ(req("defType","edismax", "q","id:47 -")
,"/response/numFound==1");
Random r = random();
for (int i=0; i<100; i++) {
StringBuilder sb = new StringBuilder();
for (int j=0; j<r.nextInt(10); j++) {
switch (r.nextInt(3)) {
case 0: sb.append(' '); break;
case 1: sb.append('+'); break;
case 2: sb.append('-'); break;
case 3: sb.append((char)r.nextInt(127)); break;
}
}
String q = sb.toString();
assertJQ(req("defType","edismax", "q",q)
,"/response==");
}
}
public void testLowercaseOperators() {
assertQ("Upper case operator",
req("q","Zapp AND Brannigan",
"qf", "name",
"lowercaseOperators", "false",
"defType","edismax")
,"*[count(//doc)=1]");
assertQ("Upper case operator, allow lowercase",
req("q","Zapp AND Brannigan",
"qf", "name",
"lowercaseOperators", "true",
"defType","edismax")
,"*[count(//doc)=1]");
assertQ("Lower case operator, don't allow lowercase operators",
req("q","Zapp and Brannigan",
"qf", "name",
"q.op", "AND",
"lowercaseOperators", "false",
"defType","edismax")
,"*[count(//doc)=0]");
assertQ("Lower case operator, allow lower case operators",
req("q","Zapp and Brannigan",
"qf", "name",
"q.op", "AND",
"lowercaseOperators", "true",
"defType","edismax")
,"*[count(//doc)=1]");
}
// test the edismax query parser based on the dismax parser
public void testFocusQueryParser() {
String allq = "id:[42 TO 51]";
String allr = "*[count(//doc)=10]";
String oner = "*[count(//doc)=1]";
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
assertQ("blank q",
req("q"," ",
"q.alt",allq,
"defType","edismax")
,allr);
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("standard request handler returns all matches",
req(allq),
allr
);
assertQ("edismax query parser returns all matches",
req("q", allq,
"defType", "edismax"
),
allr
);
assertQ(req("defType", "edismax", "qf", "trait_ss",
"q","Tool"), twor
);
// test that field types that aren't applicable don't cause an exception to be thrown
assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
"q","Tool"), twor
);
// test that numeric field types can be queried
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","foo_i:100"), oner
);
// test that numeric field types can be queried
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","foo_i:-100"), oner
);
// test that numeric field types can be queried via qf
assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
"q","100"), oner
);
assertQ("qf defaults to df",
req("defType", "edismax", "df", "trait_ss",
"q","Tool"), twor
);
assertQ("qf defaults to defaultSearchField"
, req( "defType", "edismax"
,"q","op")
, twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","op"), twor
);
assertQ(req("defType", "edismax",
"qf", "name title subject text",
"q.op", "AND",
"q","Order op"), oner
);
assertQ(req("defType", "edismax",
"qf", "name title subject text",
"q.op", "OR",
"q","Order op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order AND op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order and op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","+Order op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order OR op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order or op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","*:*"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","star OR (-star)"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","id:42 OR (-id:42)"), allr
);
// test that basic synonyms work
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","GB"), oner
);
// test for stopword removal in main query part
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","the big"), twor
);
// test for stopwords not removed
assertQ(req("defType", "edismax",
"qf", "text_sw",
"stopwords","false",
"q.op","AND",
"q","the big"), oner
);
// searching for a literal colon value when clearly not used for a field
assertQ("expected doc is missing (using standard)",
req("q", "t_special:literal\\:colon"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/field)",
req("q", "t_special:literal\\:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/field)",
req("q", "t_special:literal:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/qf)",
req("q", "literal\\:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ(req("defType","edismax", "mm","100%", "q","terminator:3", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission:Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission : Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission: Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 2: Judgement Day", "qf","movies_t"),
oner);
// make sure the clause wasn't eliminated
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 10: Judgement Day", "qf","movies_t"),
nor);
// throw in a numeric field
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"),
nor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
oner);
assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
// special psuedo-fields like _query_ and _val_
// special fields (and real field id) should be included by default
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
// should also work when explicitly allowed
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id _query_",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id",
"uf", "_query_",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
// should fail when prohibited
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "* -_query_", // explicitly excluded
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
nor);
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id", // excluded by ommision
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
nor);
/** stopword removal in conjunction with multi-word synonyms at query time
* break this test.
// multi-word synonyms
// remove id:50 which contans the false match
assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
"q","-id:50 nyc"), oner
);
**/
/*** these fail because multi-word synonyms are being used at query time
// this will incorrectly match "new big city"
assertQ(req("defType", "edismax", "qf", "id title",
"q","nyc"), oner
);
// this will incorrectly match "new big city"
assertQ(req("defType", "edismax", "qf", "title",
"q","the big apple"), nor
);
***/
}
public void testBoostQuery() {
assertQ(
req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "id:54^100", "bq", "id:53^10", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='54']",
"//doc[2]/str[@name='id'][.='53']",
"//doc[3]/str[@name='id'][.='52']"
);
// non-trivial bqs
assertQ(req("q", "tekna",
"qf", "text_sw",
"defType", "edismax",
"bq", "(text_sw:blasdfadsf id:54)^100",
"bq", "id:[53 TO 53]^10",
"fq", "id:[52 TO 54]",
"fl", "id,score"),
"//doc[1]/str[@name='id'][.='54']",
"//doc[2]/str[@name='id'][.='53']",
"//doc[3]/str[@name='id'][.='52']"
);
// genuine negative boosts are not legal
// see SOLR-3823, SOLR-3278, LUCENE-4378 and
// https://wiki.apache.org/solr/SolrRelevancyFAQ#How_do_I_give_a_negative_.28or_very_low.29_boost_to_documents_that_match_a_query.3F
assertQ(
req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "(*:* -id:54)^100", "bq", "id:53^10", "bq", "id:52", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='53']",
"//doc[2]/str[@name='id'][.='52']",
"//doc[3]/str[@name='id'][.='54']"
);
}
public void testUserFields() {
String allr = "*[count(//doc)=10]";
String oner = "*[count(//doc)=1]";
String nor = "*[count(//doc)=0]";
// User fields
// Default is allow all "*"
// If a list of fields are given, only those are allowed "foo bar"
// Possible to invert with "-" syntax:
// Disallow all: "-*"
// Allow all but id: "* -id"
// Also supports "dynamic" field name wildcarding
assertQ(req("defType","edismax", "q","id:42"),
oner);
// SOLR-3377 - parens should be allowed immediately before field name
assertQ(req("defType","edismax", "q","( id:42 )"),
oner);
assertQ(req("defType","edismax", "q","(id:42)"),
oner);
assertQ(req("defType","edismax", "q","(+id:42)"),
oner);
assertQ(req("defType","edismax", "q","+(+id:42)"),
oner);
assertQ(req("defType","edismax", "q","+(+((id:42)))"),
oner);
assertQ(req("defType","edismax", "q","+(+((+id:42)))"),
oner);
assertQ(req("defType","edismax", "q"," +( +( ( +id:42) ) ) "),
oner);
assertQ(req("defType","edismax", "q","(id:(*:*)^200)"),
allr);
assertQ(req("defType","edismax", "uf","id", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","-*", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","loremipsum", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","* -id", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","* -loremipsum", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","*^5.0", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42^10.0"),
oner);
assertQ(req("defType","edismax", "uf","na*", "q","name:Zapp"),
oner);
assertQ(req("defType","edismax", "uf","*me", "q","name:Zapp"),
oner);
assertQ(req("defType","edismax", "uf","* -na*", "q","name:Zapp"),
nor);
assertQ(req("defType","edismax", "uf","*me -name", "q","name:Zapp"),
nor);
assertQ(req("defType","edismax", "uf","*ame -*e", "q","name:Zapp"),
nor);
// Boosts from user fields
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+id:42']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^5.0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+id:42^5.0']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^2.0 id^5.0 -xyz", "q","name:foo"),
"//str[@name='parsedquery_toString'][.='+name:foo^2.0']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","i*^5.0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+id:42^5.0']");
assertQ(req("defType","edismax", "uf","-*", "q","cannons"),
oner);
assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner);
}
public void testAliasing() throws Exception {
String oner = "*[count(//doc)=1]";
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
// Aliasing
// Single field
assertQ(req("defType","edismax", "q","myalias:Zapp"),
nor);
assertQ(req("defType","edismax", "q","myalias:Zapp", "f.myalias.qf","name"),
oner);
// Multi field
assertQ(req("defType","edismax", "uf", "myalias", "q","myalias:(Zapp Obnoxious)", "f.myalias.qf","name^2.0 mytrait_ss^5.0", "mm", "50%"),
oner);
// Multi field
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "f.myalias.qf","name^2.0 mytrait_ss^5.0"),
nor);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0"), twor);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0", "mm", "100%"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","who^10.0 where^3.0", "f.who.qf","name^2.0", "f.where.qf", "mytrait_ss^5.0"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias", "f.myalias.qf","name mytrait_ss", "uf", "myalias"), oner);
assertQ(req("defType","edismax", "uf","who", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0", "qf", "id"), twor);
assertQ(req("defType","edismax", "uf","* -name", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0"), twor);
}
public void testAliasingBoost() throws Exception {
assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias", "f.myalias.qf","name trait_ss^0.5"), "//result/doc[1]/str[@name='id']=42", "//result/doc[2]/str[@name='id']=47");//doc 42 should score higher than 46
assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias^100 name", "f.myalias.qf","trait_ss^0.5"), "//result/doc[1]/str[@name='id']=47", "//result/doc[2]/str[@name='id']=42");//Now the order should be inverse
}
public void testCyclicAliasing() throws Exception {
try {
ignoreException(".*Field aliases lead to a cycle.*");
try {
h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","who"));
fail("Simple cyclic alising not detected");
} catch (SolrException e) {
assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle"));
}
try {
h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who"));
fail("Cyclic alising not detected");
} catch (SolrException e) {
assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle"));
}
try {
h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3","f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field6", "f.field3.qf","field6"));
} catch (SolrException e) {
fail("This is not cyclic alising");
}
try {
h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3", "f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field4"));
fail("Cyclic alising not detected");
} catch (SolrException e) {
assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle"));
}
try {
h.query(req("defType","edismax", "q","who:(Zapp Pig)", "qf","field1", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who"));
fail("Cyclic alising not detected");
} catch (SolrException e) {
assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle"));
}
} finally {
resetExceptionIgnores();
}
}
public void testOperatorsWithLiteralColons() {
assertU(adoc("id", "142", "a_s", "bogus:xxx", "text_s", "yak"));
assertU(adoc("id", "143", "a_s", "bogus:xxx"));
assertU(adoc("id", "144", "text_s", "yak"));
assertU(adoc("id", "145", "a_s", "a_s:xxx", "text_s", "yak"));
assertU(adoc("id", "146", "a_s", "a_s:xxx"));
assertU(adoc("id", "147", "a_s", "AND", "a_s", "NOT"));
assertU(commit());
assertQ(req("q", "bogus:xxx AND text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0"),
"//*[@numFound='1']",
"//str[@name='id'][.='142']");
assertQ(req("q", "a_s:xxx AND text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"uf", "text_s"),
"//*[@numFound='1']",
"//str[@name='id'][.='145']");
assertQ(req("q", "NOT bogus:xxx +text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"debugQuery", "true"),
"//*[@numFound='2']",
"//str[@name='id'][.='144']",
"//str[@name='id'][.='145']");
assertQ(req("q", "NOT a_s:xxx +text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"uf", "text_s"),
"//*[@numFound='2']",
"//str[@name='id'][.='142']",
"//str[@name='id'][.='144']");
assertQ(req("q", "+bogus:xxx yak",
"fl", "id",
"qf", "a_s b_s text_s",
"defType", "edismax",
"mm", "0"),
"//*[@numFound='2']",
"//str[@name='id'][.='142']",
"//str[@name='id'][.='143']");
assertQ(req("q", "+a_s:xxx yak",
"fl", "id",
"qf", "a_s b_s text_s",
"defType", "edismax",
"mm", "0",
"uf", "b_s"),
"//*[@numFound='2']",
"//str[@name='id'][.='145']",
"//str[@name='id'][.='146']");
}
// test phrase fields including pf2 pf3 and phrase slop
public void testPfPs() {
assertU(adoc("id", "s0", "phrase_sw", "foo bar a b c", "boost_d", "1.0"));
assertU(adoc("id", "s1", "phrase_sw", "foo a bar b c", "boost_d", "2.0"));
assertU(adoc("id", "s2", "phrase_sw", "foo a b bar c", "boost_d", "3.0"));
assertU(adoc("id", "s3", "phrase_sw", "foo a b c bar", "boost_d", "4.0"));
assertU(commit());
assertQ("default order assumption wrong",
req("q", "foo bar",
"qf", "phrase_sw",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s3']",
"//doc[2]/str[@name='id'][.='s2']",
"//doc[3]/str[@name='id'][.='s1']",
"//doc[4]/str[@name='id'][.='s0']");
assertQ("pf not working",
req("q", "foo bar",
"qf", "phrase_sw",
"pf", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("pf2 not working",
req("q", "foo bar",
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("pf3 not working",
req("q", "a b bar",
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s2']");
assertQ("ps not working for pf2",
req("q", "bar foo",
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"ps", "2",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("ps not working for pf3",
req("q", "a bar foo",
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"ps", "3",
"bf", "boost_d",
"fl", "score,*",
"debugQuery", "true",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ("ps/ps2/ps3 with default slop overrides not working",
req("q", "zzzz xxxx cccc vvvv",
"qf", "phrase_sw",
"pf", "phrase_sw~1^10 phrase_sw~2^20 phrase_sw^30",
"pf2", "phrase_sw~2^22 phrase_sw^33",
"pf3", "phrase_sw~2^222 phrase_sw^333",
"ps", "3",
"defType", "edismax",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~1^10.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~2^20.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~3^30.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~2^22.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~2^22.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~2^22.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~3^33.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~3^33.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~3^33.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~2^222.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~2^222.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~3^333.0')]",
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~3^333.0')]"
);
assertQ(
"ps2 not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps2",
"2", "bf", "boost_d", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Specifying slop in pf2 param not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw~2^10", "bf",
"boost_d", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Slop in ps2 parameter should override ps",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps",
"0", "ps2", "2", "bf", "boost_d", "fl", "score,*", "defType",
"edismax"), "//doc[1]/str[@name='id'][.='s0']");
assertQ(
"ps3 not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw^10", "ps3",
"3", "bf", "boost_d", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ(
"Specifying slop in pf3 param not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw~3^10", "bf",
"boost_d", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ("ps2 should not override slop specified inline in pf2",
req("q", "zzzz xxxx cccc vvvv",
"qf", "phrase_sw",
"pf2", "phrase_sw~2^22",
"ps2", "4",
"defType", "edismax",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~2^22.0')]"
);
}
/**
* verify that all reserved characters are properly escaped when being set in
* {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
*
* @see ExtendedDismaxQParser#splitIntoClauses(String, boolean)
*/
@Test
public void testEscapingOfReservedCharacters() throws Exception {
// create a document that contains all reserved characters
String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
assertU(adoc("id", "reservedChars",
"name", allReservedCharacters,
"cat_s", "foo/"));
assertU(commit());
// the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
// character)
String query = allReservedCharacters.replace("\\", "\\\\");
// query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped
// query will then be used
assertQ("Escaping reserved characters",
req("q", query,
"qf", "name",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
// Query string field 'cat_s' for special char / - causes SyntaxError without patch SOLR-3467
assertQ("Escaping string with reserved / character",
req("q", "foo/",
"qf", "cat_s",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(
"Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "id"),
"*[count(//doc)=3]");
assertQ(
"Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text"),
"*[count(//doc)=3]");
}
/**
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
*/
public void testCJK() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
}
/**
* test that minShouldMatch works with aliasing
* for implicit boolean queries
*/
public void testCJKAliasing() throws Exception {
// single field
assertQ("test cjk (aliasing+disjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (aliasing+minShouldMatch)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (aliasing+conjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
// multifield
assertQ("test cjk (aliasing+disjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]");
assertQ("test cjk (aliasing+minShouldMatch)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("test cjk (aliasing+conjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]");
}
/** Test that we apply boosts correctly */
public void testCJKBoosts() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='57']");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='57']");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='57']");
// now boost the other field
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='60']");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='60']");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='60']");
}
/** always apply minShouldMatch to the inner booleanqueries
* created from whitespace, as these are never structured lucene queries
* but only come from unstructured text */
public void testCJKStructured() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾 OR bogus",
"qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾 OR bogus",
"qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (conjunction)",
req("q", "大亚湾 OR bogus",
"qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
}
/**
* Test that we don't apply minShouldMatch to the inner boolean queries
* when there are synonyms (these are indicated by coordination factor)
*/
public void testSynonyms() throws Exception {
// document only contains baraaa, but should still match.
assertQ("test synonyms",
req("q", "fooaaa",
"qf", "text_sw",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
}
public void testEdismaxSimpleExtension() throws SyntaxError {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "foo bar");
params.set("qf", "subject title^5");
params.set("qf_fr", "subject_fr title_fr^5");
params.set("qf_en", "subject_en title_en^5");
params.set("qf_es", "subject_es title_es^5");
MultilanguageQueryParser parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params));
Query query = parser.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title", "foo", 5, false));
assertTrue(containsClause(query, "title", "bar", 5, false));
assertTrue(containsClause(query, "subject", "foo", 1, false));
assertTrue(containsClause(query, "subject", "bar", 1, false));
params.set("language", "es");
parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params));
query = parser.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title_es", "foo", 5, false));
assertTrue(containsClause(query, "title_es", "bar", 5, false));
assertTrue(containsClause(query, "subject_es", "foo", 1, false));
assertTrue(containsClause(query, "subject_es", "bar", 1, false));
FuzzyDismaxQParser parser2 = new FuzzyDismaxQParser("foo bar absence", new ModifiableSolrParams(), params, req(params));
query = parser2.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title", "foo", 5, false));
assertTrue(containsClause(query, "title", "bar", 5, false));
assertTrue(containsClause(query, "title", "absence", 5, true));
}
private boolean containsClause(Query query, String field, String value,
int boost, boolean fuzzy) {
if(query instanceof BooleanQuery) {
return containsClause((BooleanQuery)query, field, value, boost, fuzzy);
}
if(query instanceof DisjunctionMaxQuery) {
return containsClause((DisjunctionMaxQuery)query, field, value, boost, fuzzy);
}
if(query instanceof TermQuery && !fuzzy) {
return containsClause((TermQuery)query, field, value, boost);
}
if(query instanceof FuzzyQuery && fuzzy) {
return containsClause((FuzzyQuery)query, field, value, boost);
}
return false;
}
private boolean containsClause(FuzzyQuery query, String field, String value,
int boost) {
if(query.getTerm().field().equals(field) &&
query.getTerm().bytes().utf8ToString().equals(value) &&
query.getBoost() == boost) {
return true;
}
return false;
}
private boolean containsClause(BooleanQuery query, String field, String value, int boost, boolean fuzzy) {
for(BooleanClause clause:query.getClauses()) {
if(containsClause(clause.getQuery(), field, value, boost, fuzzy)) {
return true;
}
}
return false;
}
private boolean containsClause(TermQuery query, String field, String value, int boost) {
if(query.getTerm().field().equals(field) &&
query.getTerm().bytes().utf8ToString().equals(value) &&
query.getBoost() == boost) {
return true;
}
return false;
}
private boolean containsClause(DisjunctionMaxQuery query, String field, String value, int boost, boolean fuzzy) {
for(Query disjunct:query.getDisjuncts()) {
if(containsClause(disjunct, field, value, boost, fuzzy)) {
return true;
}
}
return false;
}
class MultilanguageQueryParser extends ExtendedDismaxQParser {
public MultilanguageQueryParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
protected ExtendedDismaxConfiguration createConfiguration(String qstr,
SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new MultilanguageDismaxConfiguration(localParams, params, req);
}
class MultilanguageDismaxConfiguration extends ExtendedDismaxConfiguration {
public MultilanguageDismaxConfiguration(SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(localParams, params, req);
String language = params.get("language");
if(language != null) {
super.queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams("qf_" + language));
}
}
}
}
class FuzzyDismaxQParser extends ExtendedDismaxQParser {
public FuzzyDismaxQParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser,
String field) {
return new FuzzyQueryParser(qParser, field);
}
class FuzzyQueryParser extends ExtendedSolrQueryParser{
private Set<String> frequentlyMisspelledWords;
public FuzzyQueryParser(QParser parser, String defaultField) {
super(parser, defaultField);
frequentlyMisspelledWords = new HashSet<String>();
frequentlyMisspelledWords.add("absence");
}
@Override
protected Query getFieldQuery(String field,
String val, boolean quoted) throws SyntaxError {
if(frequentlyMisspelledWords.contains(val)) {
return getFuzzyQuery(field, val, 0.75F);
}
return super.getFieldQuery(field, val, quoted);
}
}
}
}