blob: f7e4bb07dc6c2ae7b98a81777378f36e3ce6418d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Utils;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.util.SolrPluginUtils;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
initCore("solrconfig.xml", "schema12.xml");
index();
}
public static void index() throws Exception {
assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
"name", "Zapp Brannigan"));
assertU(adoc("id", "43" ,
"title", "Democratic Order op Planets"));
assertU(adoc("id", "44", "trait_ss", "Tool",
"name", "The Zapper"));
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
"title", "25 star General"));
assertU(adoc("id", "46",
"trait_ss", "Obnoxious",
"subject", "Defeated the pacifists op the Gandhi nebula",
"t_special", "literal:colon value",
"movies_t", "first is Mission: Impossible, second is Terminator 2: Judgement Day. Terminator:3 ok...",
"foo_i", "8"
));
assertU(adoc("id", "47", "trait_ss", "Pig",
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
assertU(adoc("id", "50", "text_sw", "start new big city end"));
assertU(adoc("id", "51", "store", "12.34,-56.78"));
assertU(adoc("id", "52", "text_sw", "tekna theou klethomen"));
assertU(adoc("id", "53", "text_sw", "nun tekna theou esmen"));
assertU(adoc("id", "54", "text_sw", "phanera estin ta tekna tou theou"));
assertU(adoc("id", "55", "standardtok", "大"));
assertU(adoc("id", "56", "standardtok", "大亚"));
assertU(adoc("id", "57", "standardtok", "大亚湾"));
assertU(adoc("id", "58", "HTMLstandardtok", "大"));
assertU(adoc("id", "59", "HTMLstandardtok", "大亚"));
assertU(adoc("id", "60", "HTMLstandardtok", "大亚湾"));
assertU(adoc("id", "61", "text_sw", "bazaaa")); // synonyms in an expansion group
assertU(adoc("id", "62", "text_sw", "oil stocks"));
assertU(adoc("id", "63", "text_sw", "gold stocks"));
assertU(adoc("id", "64", "text_sw", "stocks gold stockade"));
assertU(adoc("id", "65", "text_sw", "snake oil"));
// SOLR-8812 user query example
assertU(adoc("id", "66", "text_sw", "hair ties barbie"));
assertU(adoc("id", "67", "text_sw", "hair ties"));
assertU(adoc("id", "68", "text_sw", "hair barbie"));
assertU(adoc("id", "69", "text_sw", "ties barbie"));
assertU(adoc("id", "70", "text_sw", "hair"));
assertU(adoc("id", "71", "text_sw", "ties"));
assertU(adoc("id", "72", "text_sw", "wifi ATM"));
assertU(adoc("id", "73", "shingle23", "A B X D E"));
assertU(adoc("id", "74", "isocharfilter", "niño"));
// assertU(adoc("id", "74", "text_pick_best", "tabby"));
// assertU(adoc("id", "74", "text_as_distinct", "persian"));
assertU(commit());
}
@Test
public void testSyntax() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
// a bare * should be treated as *:*
assertJQ(req("defType", "edismax", "q", "*", "df", "doesnotexist_s", "sow", sow)
, "/response/docs/[0]==" // make sure we get something...
);
assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow)
, "/response/numFound==0" // nothing should be found
);
assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow)
, "/response/numFound==0" // nothing should be found
);
assertJQ(req("defType", "edismax", "q", "doesnotexist_s:( * * * )", "sow", sow)
, "/response/numFound==0" // nothing should be found
);
}
}
public void testTrailingOperators() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
// really just test that exceptions aren't thrown by
// single + -
assertJQ(req("defType", "edismax", "q", "-", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "+", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "+ - +", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "- + -", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "id:47 +", "sow", sow)
, "/response/numFound==1");
assertJQ(req("defType", "edismax", "q", "id:47 -", "sow", sow)
, "/response/numFound==1");
Random r = random();
for (int i=0; i<100; i++) {
StringBuilder sb = new StringBuilder();
for (int j=0; j<r.nextInt(10); j++) {
switch (r.nextInt(3)) {
case 0: sb.append(' '); break;
case 1: sb.append('+'); break;
case 2: sb.append('-'); break;
case 3: sb.append((char)r.nextInt(127)); break;
}
}
String q = sb.toString();
assertJQ(req("defType", "edismax", "q", q, "sow", sow)
, "/response==");
}
}
}
public void testLowercaseOperators() {
for (String sow : Arrays.asList("true", "false")) {
assertQ("Upper case operator",
req("q", "Zapp AND Brannigan",
"qf", "name",
"lowercaseOperators", "false",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Upper case operator, allow lowercase",
req("q", "Zapp AND Brannigan",
"qf", "name",
"lowercaseOperators", "true",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Lower case operator, don't allow lowercase operators",
req("q", "Zapp and Brannigan",
"qf", "name",
"q.op", "AND",
"lowercaseOperators", "false",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("The default for lowercaseOperators should not allow lower case and",
req("q", "Zapp and Brannigan",
"qf", "name",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("Lower case operator, allow lower case operators",
req("q", "Zapp and Brannigan",
"qf", "name",
"q.op", "AND",
"lowercaseOperators", "true",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
}
}
public void testCharFilter() throws Exception {
// test that charfilter was applied by the indexer
assertQ(req("defType", "edismax",
"stopwords","false",
"qf", "isocharfilter",
"q","nino"), "*[count(//doc)=1]"
);
// test that charfilter was applied to the query
assertQ(req("defType", "edismax",
"stopwords","false",
"qf", "isocharfilter",
"q","niño"), "*[count(//doc)=1]"
);
}
// test the edismax query parser based on the dismax parser
public void testFocusQueryParser() {
String allq = "id:[42 TO 51]";
String allr = "*[count(//doc)=10]";
String oner = "*[count(//doc)=1]";
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
assertQ("blank q",
req("q"," ",
"q.alt",allq,
"defType","edismax")
,allr);
assertQ("ideographic space should be considered whitespace",
req("q","\u3000",
"q.alt",allq,
"defType","edismax")
,allr);
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("standard request handler returns all matches",
req(allq),
allr
);
assertQ("edismax query parser returns all matches",
req("q", allq,
"defType", "edismax"
),
allr
);
assertQ(req("defType", "edismax", "qf", "trait_ss",
"q","Tool"), twor
);
// test that field types that aren't applicable don't cause an exception to be thrown
assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
"q","Tool"), twor
);
// test that numeric field types can be queried
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","foo_i:100"), oner
);
// test that numeric field types can be queried
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","foo_i:-100"), oner
);
// test that numeric field types can be queried via qf
assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
"q","100"), oner
);
assertQ("qf defaults to df",
req("defType", "edismax", "df", "trait_ss",
"q","Tool"), twor
);
assertQ("qf defaults to defaultSearchField"
, req( "defType", "edismax"
,"q","op")
, twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","op"), twor
);
assertQ(req("defType", "edismax",
"qf", "name title subject text_sw",
"q.op", "AND",
"q","Order op"), oner
);
assertQ(req("defType", "edismax",
"qf", "name title subject text_sw",
"q.op", "OR",
"q","Order op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order AND op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order and op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","+Order op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order OR op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order or op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","*:*"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","star OR (-star)"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","id:42 OR (-id:42)"), allr
);
// test that basic synonyms work
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","GB"), oner
);
// test for stopword removal in main query part
assertQ(req("defType", "edismax", "qf", "text_sw",
"q","the big"), twor
);
// test for stopwords not removed
assertQ(req("defType", "edismax",
"qf", "text_sw",
"stopwords","false",
"q.op","AND",
"q","the big"), oner
);
// searching for a literal colon value when clearly not used for a field
assertQ("expected doc is missing (using standard)",
req("q", "t_special:literal\\:colon"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/field)",
req("q", "t_special:literal\\:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/field)",
req("q", "t_special:literal:colon",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using escaped edismax w/qf)",
req("q", "literal\\:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ("expected doc is missing (using un-escaped edismax w/qf)",
req("q", "literal:colon",
"qf", "t_special",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='46']");
assertQ(req("defType","edismax", "mm","100%", "q","terminator:3", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission:Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission : Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Mission: Impossible", "qf","movies_t"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 2: Judgement Day", "qf","movies_t"),
oner);
// make sure the clause wasn't eliminated
assertQ(req("defType","edismax", "mm","100%", "q","Terminator 10: Judgement Day", "qf","movies_t"),
nor);
// throw in a numeric field
assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"),
twor);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","true"),
nor);
// When sow=false, the per-field query structures differ (no "Terminator" query on integer field foo_i),
// so a dismax-per-field is constructed. As a result, mm=100% is applied per-field instead of per-term;
// since there is only one term (100) required in the foo_i field's dismax, the query can match docs that
// only have the 100 term in the foo_i field, and don't necessarily have "Terminator" in any field.
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i", "sow","false"),
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"), // default sow=false
oner);
assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"),
oner);
assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"),
twor);
// special psuedo-fields like _query_ and _val_
// _query_ should be excluded by default
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\"",
"debugQuery", "true"),
nor,
"//str[@name='parsedquery_toString'][.='+(((text:queri) (text:\"geofilt d 20 sfield store pt 12 34 56 78\"))~2)']");
// again; this time use embedded local-params style
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space
nor);
// should work when explicitly allowed
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id _query_",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id",
"uf", "_query_",
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
oner);
// again; this time use embedded local-params style
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id",
"uf", "_query_",
"q", " {!geofilt d=20 sfield=store pt=12.34,-56.78}"),//notice leading space
oner);
// should fail when prohibited
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "* -_query_", // explicitly excluded
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
nor);
assertQ(req("defType", "edismax",
"mm", "100%",
"fq", "id:51",
"uf", "id", // excluded by omission
"q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""),
nor);
/** stopword removal in conjunction with multi-word synonyms at query time
* break this test.
// multi-word synonyms
// remove id:50 which contans the false match
assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
"q","-id:50 nyc"), oner
);
**/
/*** these fail because multi-word synonyms are being used at query time
// this will incorrectly match "new big city"
assertQ(req("defType", "edismax", "qf", "id title",
"q","nyc"), oner
);
// this will incorrectly match "new big city"
assertQ(req("defType", "edismax", "qf", "title",
"q","the big apple"), nor
);
***/
}
public void testBoostQuery() {
assertQ(
req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "id:54^100", "bq", "id:53^10", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='54']",
"//doc[2]/str[@name='id'][.='53']",
"//doc[3]/str[@name='id'][.='52']"
);
// non-trivial bqs
assertQ(req("q", "tekna",
"qf", "text_sw",
"defType", "edismax",
"bq", "(text_sw:blasdfadsf id:54)^100",
"bq", "id:[53 TO 53]^10",
"fq", "id:[52 TO 54]",
"fl", "id,score"),
"//doc[1]/str[@name='id'][.='54']",
"//doc[2]/str[@name='id'][.='53']",
"//doc[3]/str[@name='id'][.='52']"
);
// genuine negative boosts are not legal
// see SOLR-3823, SOLR-3278, LUCENE-4378 and
// https://wiki.apache.org/solr/SolrRelevancyFAQ#How_do_I_give_a_negative_.28or_very_low.29_boost_to_documents_that_match_a_query.3F
assertQ(
req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "(*:* -id:54)^100", "bq", "id:53^10", "bq", "id:52", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='53']",
"//doc[2]/str[@name='id'][.='52']",
"//doc[3]/str[@name='id'][.='54']"
);
}
@Test
public void testBf() {
assertQ(
req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bf", "ord(id)", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='54']",
"//doc[2]/str[@name='id'][.='53']",
"//doc[3]/str[@name='id'][.='52']"
);
assertQ(req("q", "tekna", "qf", "text_sw", "defType", "edismax",
"bf", "if(and(query({!v='id:53'})),120,if(query({!v='id:52'}),10,0))", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='53']",
"//doc[2]/str[@name='id'][.='52']",
"//doc[3]/str[@name='id'][.='54']");
// adding value from a field
// 0 would be returned for negative values or docs w/o a value
assertQ(req("q", "*:*", "qf", "text_sw", "defType", "edismax",
"bf", "foo_i", "fq", "id:[47 TO 49]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='48']",
// these should have identical score, in non-deterministic order
"//doc[str[@name='id'][.='47'] and float[@name='score'][.='1.0']]",
"//doc[str[@name='id'][.='49'] and float[@name='score'][.='1.0']]");
}
@Test
public void testBoost() {
assertQ(
req("q", "*:*", "qf", "text_sw", "defType", "edismax", "boost", "exists(foo_i)", "fq", "id:[47 TO 49]",
"fl", "id,score", "boost", "if(not(query({!v=id:49})),10,1)"),
"//doc[1]/str[@name='id'][.='48']",
"//doc[2]/str[@name='id'][.='49']",
"//doc[3]/str[@name='id'][.='47']"
);
assertQ(req("q", "tekna", "qf", "text_sw", "defType", "edismax",
"boost", "if(and(query({!v='id:53'})),120,if(query({!v='id:52'}),0.0002,1))", "fq", "id:[52 TO 54]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='53']",
"//doc[2]/str[@name='id'][.='54']",
"//doc[3]/str[@name='id'][.='52']");
// adding value from a field
// using sum to verify the order
// 0 would be returned for negative values or if the field value is not present
assertQ(req("q", "*:*", "qf", "text_sw", "defType", "edismax",
"boost", "sum(foo_i,1)", "fq", "id:[48 TO 50]", "fl", "id,score"),
"//doc[1]/str[@name='id'][.='48']",
"//doc[2]/str[@name='id'][.='50']",
"//doc[3]/str[@name='id'][.='49']");
}
public void testUserFields() {
String allr = "*[count(//doc)=10]";
String oner = "*[count(//doc)=1]";
String nor = "*[count(//doc)=0]";
// User fields
// Default is allow all "*"
// If a list of fields are given, only those are allowed "foo bar"
// Possible to invert with "-" syntax:
// Disallow all: "-*"
// Allow all but id: "* -id"
// Also supports "dynamic" field name wildcarding
assertQ(req("defType","edismax", "q","id:42"),
oner);
// SOLR-3377 - parens should be allowed immediately before field name
assertQ(req("defType","edismax", "q","( id:42 )"),
oner);
assertQ(req("defType","edismax", "q","(id:42)"),
oner);
assertQ(req("defType","edismax", "q","(+id:42)"),
oner);
assertQ(req("defType","edismax", "q","+(+id:42)"),
oner);
assertQ(req("defType","edismax", "q","+(+((id:42)))"),
oner);
assertQ(req("defType","edismax", "q","+(+((+id:42)))"),
oner);
assertQ(req("defType","edismax", "q"," +( +( ( +id:42) ) ) "),
oner);
assertQ(req("defType","edismax", "q","(id:(*:*)^200)"),
allr);
assertQ(req("defType","edismax", "uf","id", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","-*", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","loremipsum", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","* -id", "q","id:42"),
nor);
assertQ(req("defType","edismax", "uf","* -loremipsum", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","*^5.0", "q","id:42"),
oner);
assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42^10.0"),
oner);
assertQ(req("defType","edismax", "uf","na*", "q","name:Zapp"),
oner);
assertQ(req("defType","edismax", "uf","*me", "q","name:Zapp"),
oner);
assertQ(req("defType","edismax", "uf","* -na*", "q","name:Zapp"),
nor);
assertQ(req("defType","edismax", "uf","*me -name", "q","name:Zapp"),
nor);
assertQ(req("defType","edismax", "uf","*ame -*e", "q","name:Zapp"),
nor);
// Boosts from user fields
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+id:42']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^5.0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+(id:42)^5.0']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^2.0 id^5.0 -xyz", "q","name:foo"),
"//str[@name='parsedquery_toString'][.='+(name:foo)^2.0']");
assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","i*^5.0", "q","id:42"),
"//str[@name='parsedquery_toString'][.='+(id:42)^5.0']");
assertQ(req("defType","edismax", "uf","-*", "q","cannons", "qf","text"),
oner);
assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner);
}
public void testAliasing() throws Exception {
String oner = "*[count(//doc)=1]";
String twor = "*[count(//doc)=2]";
String nor = "*[count(//doc)=0]";
// Aliasing
// Single field
assertQ(req("defType","edismax", "q","myalias:Zapp"),
nor);
assertQ(req("defType","edismax", "q","myalias:Zapp", "f.myalias.qf","name"),
oner);
// Multi field
assertQ(req("defType","edismax", "uf", "myalias", "q","myalias:(Zapp Obnoxious)", "f.myalias.qf","name^2.0 mytrait_ss^5.0", "mm", "50%"),
oner);
// Multi field
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "f.myalias.qf","name^2.0 mytrait_ss^5.0"),
nor);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0"), twor);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0", "mm", "100%"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","who^10.0 where^3.0", "f.who.qf","name^2.0", "f.where.qf", "mytrait_ss^5.0"), oner);
assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias", "f.myalias.qf","name mytrait_ss", "uf", "myalias"), oner);
assertQ(req("defType","edismax", "uf","who", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0", "qf", "id"), twor);
assertQ(req("defType","edismax", "uf","* -name", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0"), twor);
}
public void testAliasingBoost() throws Exception {
assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias", "f.myalias.qf","name trait_ss^0.1"), "//result/doc[1]/str[@name='id']=42", "//result/doc[2]/str[@name='id']=47");//doc 42 should score higher than 46
assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias^100 name", "f.myalias.qf","trait_ss^0.1"), "//result/doc[1]/str[@name='id']=47", "//result/doc[2]/str[@name='id']=42");//Now the order should be inverse
}
/** SOLR-13203 **/
public void testUfDynamicField() throws Exception {
try {
ignoreException("dynamic field");
SolrException exception = expectThrows(SolrException.class,
() -> h.query(req("uf", "fl=trait*,id", "defType", "edismax")));
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
assertEquals("dynamic field name must start or end with *",
exception.getMessage());
} finally {
resetExceptionIgnores();
}
// simple test to validate dynamic uf parsing works
assertQ(req("uf", "trait* id", "defType", "edismax"));
}
public void testCyclicAliasing() throws Exception {
try {
ignoreException(".*Field aliases lead to a cycle.*");
SolrException e = expectThrows(SolrException.class, "Simple cyclic alising not detected",
() -> h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","who")));
assertCyclicDetectionErrorMessage(e);
e = expectThrows(SolrException.class, "Cyclic alising not detected",
() -> h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")));
assertCyclicDetectionErrorMessage(e);
e = expectThrows(SolrException.class, "Cyclic aliasing not detected", () -> h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3","f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field6", "f.field3.qf","field6")));
assertFalse("This is not cyclic aliasing", e.getCause().getMessage().contains("Field aliases lead to a cycle"));
assertTrue("Should throw exception due to invalid field name", e.getCause().getMessage().contains("not a valid field name"));
e = expectThrows(SolrException.class, "Cyclic alising not detected",
() -> h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3", "f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field4")));
assertCyclicDetectionErrorMessage(e);
e = expectThrows(SolrException.class, "Cyclic alising not detected",
() -> h.query(req("defType","edismax", "q","who:(Zapp Pig)", "qf","text", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")));
assertCyclicDetectionErrorMessage(e);
} finally {
resetExceptionIgnores();
}
}
private void assertCyclicDetectionErrorMessage(SolrException e) {
assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle"));
}
public void testOperatorsWithLiteralColons() {
assertU(adoc("id", "142", "a_s", "bogus:xxx", "text_s", "yak"));
assertU(adoc("id", "143", "a_s", "bogus:xxx"));
assertU(adoc("id", "144", "text_s", "yak"));
assertU(adoc("id", "145", "a_s", "a_s:xxx", "text_s", "yak"));
assertU(adoc("id", "146", "a_s", "a_s:xxx"));
assertU(adoc("id", "147", "a_s", "AND", "a_s", "NOT"));
assertU(commit());
assertQ(req("q", "bogus:xxx AND text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0"),
"//*[@numFound='1']",
"//str[@name='id'][.='142']");
assertQ(req("q", "a_s:xxx AND text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"uf", "text_s"),
"//*[@numFound='1']",
"//str[@name='id'][.='145']");
assertQ(req("q", "NOT bogus:xxx +text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"debugQuery", "true"),
"//*[@numFound='2']",
"//str[@name='id'][.='144']",
"//str[@name='id'][.='145']");
assertQ(req("q", "NOT a_s:xxx +text_s:yak",
"fl", "id",
"qf", "a_s b_s",
"defType", "edismax",
"mm", "0",
"uf", "text_s"),
"//*[@numFound='2']",
"//str[@name='id'][.='142']",
"//str[@name='id'][.='144']");
assertQ(req("q", "+bogus:xxx yak",
"fl", "id",
"qf", "a_s b_s text_s",
"defType", "edismax",
"mm", "0"),
"//*[@numFound='2']",
"//str[@name='id'][.='142']",
"//str[@name='id'][.='143']");
assertQ(req("q", "+a_s:xxx yak",
"fl", "id",
"qf", "a_s b_s text_s",
"defType", "edismax",
"mm", "0",
"uf", "b_s"),
"//*[@numFound='2']",
"//str[@name='id'][.='145']",
"//str[@name='id'][.='146']");
}
// test phrase fields including pf2 pf3 and phrase slop
public void testPfPs() {
assertU(adoc("id", "s0", "phrase_sw", "foo bar a b c", "boost_d", "1.0"));
assertU(adoc("id", "s1", "phrase_sw", "foo a bar b c", "boost_d", "2.0"));
assertU(adoc("id", "s2", "phrase_sw", "foo a b bar c", "boost_d", "3.0"));
assertU(adoc("id", "s3", "phrase_sw", "foo a b c bar", "boost_d", "4.0"));
assertU(commit());
assertQ("default order assumption wrong",
req("q", "foo bar",
"qf", "phrase_sw",
"bf", "boost_d",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s3']",
"//doc[2]/str[@name='id'][.='s2']",
"//doc[3]/str[@name='id'][.='s1']",
"//doc[4]/str[@name='id'][.='s0']");
assertQ("pf not working",
req("q", "foo bar",
"qf", "phrase_sw",
"pf", "phrase_sw^10",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("pf2 not working",
req("q", "foo bar",
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("pf3 not working",
req("q", "a b bar",
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s2']");
assertQ("ps not working for pf2",
req("q", "bar foo",
"qf", "phrase_sw",
"pf2", "phrase_sw^10",
"ps", "2",
"fl", "score,*",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ("ps not working for pf3",
req("q", "a bar foo",
"qf", "phrase_sw",
"pf3", "phrase_sw^10",
"ps", "3",
"fl", "score,*",
"debugQuery", "true",
"defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ("ps/ps2/ps3 with default slop overrides not working",
req("q", "zzzz xxxx cccc vvvv",
"qf", "phrase_sw",
"pf", "phrase_sw~1^10 phrase_sw~2^20 phrase_sw^30",
"pf2", "phrase_sw~2^22 phrase_sw^33",
"pf3", "phrase_sw~2^222 phrase_sw^333",
"ps", "3",
"defType", "edismax",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx cccc vvvv\"~1)^10.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx cccc vvvv\"~2)^20.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx cccc vvvv\"~3)^30.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx\"~2)^22.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"xxxx cccc\"~2)^22.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"cccc vvvv\"~2)^22.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx\"~3)^33.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"xxxx cccc\"~3)^33.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"cccc vvvv\"~3)^33.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx cccc\"~2)^222.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"xxxx cccc vvvv\"~2)^222.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx cccc\"~3)^333.0')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"xxxx cccc vvvv\"~3)^333.0')]"
);
assertQ(
"ps2 not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps2",
"2", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Specifying slop in pf2 param not working",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw~2^10",
"fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s0']");
assertQ(
"Slop in ps2 parameter should override ps",
req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps",
"0", "ps2", "2", "fl", "score,*", "defType",
"edismax"), "//doc[1]/str[@name='id'][.='s0']");
assertQ(
"ps3 not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw^10", "ps3",
"3", "fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ(
"Specifying slop in pf3 param not working",
req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw~3^10",
"fl", "score,*", "defType", "edismax"),
"//doc[1]/str[@name='id'][.='s1']");
assertQ("ps2 should not override slop specified inline in pf2",
req("q", "zzzz xxxx cccc vvvv",
"qf", "phrase_sw",
"pf2", "phrase_sw~2^22",
"ps2", "4",
"defType", "edismax",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"zzzz xxxx\"~2)^22.0')]"
);
assertQ("phrase field queries spanning multiple fields should be within their own dismax queries",
req("q", "aaaa bbbb cccc",
"qf", "phrase_sw phrase1_sw",
"pf2", "phrase_sw phrase1_sw",
"pf3", "phrase_sw phrase1_sw",
"defType", "edismax",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"aaaa bbbb\" | phrase1_sw:\"aaaa bbbb\")')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"bbbb cccc\" | phrase1_sw:\"bbbb cccc\")')]",
"//str[@name='parsedquery'][contains(.,'(phrase_sw:\"aaaa bbbb cccc\" | phrase1_sw:\"aaaa bbbb cccc\")')]"
);
}
@Test
public void testWhitespaceCharacters() throws Exception {
assertU(adoc("id", "whitespaceChars",
"cat_s", "foo\nfoo"));
assertU(commit());
assertQ(req("q", "(\"foo\nfoo\")",
"qf", "cat_s",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "{!edismax qf=cat_s v='[\"foo\nfoo\" TO \"foo\nfoo\"]'}")
, "*[count(//doc)=1]");
assertQ(req("q", "{!edismax qf=cat_s v='[ \"foo\nfoo\" TO \"foo\nfoo\"]'}")
, "*[count(//doc)=1]");
}
@Test
public void testDoubleQuoteCharacters() throws Exception {
assertU(adoc("id", "doubleQuote",
"cat_s", "foo\"foo"));
assertU(commit());
assertQ(req("q", "cat_s:[\"foo\\\"foo\" TO \"foo\\\"foo\"]",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "cat_s:\"foo\\\"foo\"",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "cat_s:foo\\\"foo",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(req("q", "cat_s:foo\"foo",
"qf", "name",
"defType", "edismax")
, "*[count(//doc)=1]");
}
/**
* verify that all reserved characters are properly escaped when being set in
* {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
*
* @see ExtendedDismaxQParser#splitIntoClauses(String, boolean)
*/
@Test
public void testEscapingOfReservedCharacters() throws Exception {
// create a document that contains all reserved characters
String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
assertU(adoc("id", "reservedChars",
"name", allReservedCharacters,
"cat_s", "foo/"));
assertU(commit());
// the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
// character)
String query = allReservedCharacters.replace("\\", "\\\\");
// query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped
// query will then be used
assertQ("Escaping reserved characters",
req("q", query,
"qf", "name",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
// Query string field 'cat_s' for special char / - causes SyntaxError without patch SOLR-3467
assertQ("Escaping string with reserved / character",
req("q", "foo/",
"qf", "cat_s",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ(
"Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "id"),
"*[count(//doc)=3]");
assertQ(
"Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text"),
"*[count(//doc)=3]");
}
/**
* Repeating some of test cases as direct calls to splitIntoClauses
*/
@Test
public void testSplitIntoClauses() throws Exception {
String query = "(\"foo\nfoo\")";
SolrQueryRequest request = req("q", query,
"qf", "cat_s",
"defType", "edismax");
ExtendedDismaxQParser parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
List<ExtendedDismaxQParser.Clause> clauses = parser.splitIntoClauses(query, false);
Assert.assertEquals(3, clauses.size());
assertClause(clauses.get(0), "\\(", false, true);
assertClause(clauses.get(1), "foo\nfoo", true, false);
assertClause(clauses.get(2), "\\)", false, true);
query = "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]";
request = req("q", query,
"qf", "cat_s",
"defType", "edismax");
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
clauses = parser.splitIntoClauses(query, false);
Assert.assertEquals(5, clauses.size());
assertClause(clauses.get(0), "\\[", false, true, "cat_s");
assertClause(clauses.get(1), "foo\nfoo", true, false);
assertClause(clauses.get(2), "TO", true, false);
assertClause(clauses.get(3), "foo\nfoo", true, false);
assertClause(clauses.get(4), "\\]", false, true);
query = "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]";
request = req("q", query,
"qf", "cat_s",
"defType", "edismax");
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
clauses = parser.splitIntoClauses(query, false);
Assert.assertEquals(5, clauses.size());
assertClause(clauses.get(0), "\\[", true, true, "cat_s");
assertClause(clauses.get(1), "foo\nfoo", true, false);
assertClause(clauses.get(2), "TO", true, false);
assertClause(clauses.get(3), "foo\nfoo", true, false);
assertClause(clauses.get(4), "\\]", false, true);
String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
// the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
// character)
query = allReservedCharacters.replace("\\", "\\\\");
request = req("q", query,
"qf", "name",
"mm", "100%",
"defType", "edismax");
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
clauses = parser.splitIntoClauses(query, false);
Assert.assertEquals(1, clauses.size());
assertClause(clauses.get(0), "\\!\\(\\)\\:\\^\\[\\]\\{\\}\\~\\*\\?\\\"\\+\\-\\\\\\|\\&\\/", false, true);
query = "foo/";
request = req("q", query,
"qf", "name",
"mm", "100%",
"defType", "edismax");
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
clauses = parser.splitIntoClauses(query, false);
Assert.assertEquals(1, clauses.size());
assertClause(clauses.get(0), "foo\\/", false, true);
}
private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
boolean hasSpecialSyntax, String field) {
Assert.assertEquals(value, clause.val);
Assert.assertEquals(hasWhitespace, clause.hasWhitespace);
Assert.assertEquals(hasSpecialSyntax, clause.hasSpecialSyntax);
Assert.assertEquals(field, clause.field);
}
private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
boolean hasSpecialSyntax) {
assertClause(clause, value, hasWhitespace, hasSpecialSyntax, null);
}
/**
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
*/
public void testCJK() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
}
/**
* test that minShouldMatch works with aliasing
* for implicit boolean queries
*/
public void testCJKAliasing() throws Exception {
// single field
assertQ("test cjk (aliasing+disjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (aliasing+minShouldMatch)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (aliasing+conjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
// multifield
assertQ("test cjk (aliasing+disjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]");
assertQ("test cjk (aliasing+minShouldMatch)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("test cjk (aliasing+conjunction)",
req("q", "myalias:大亚湾",
"f.myalias.qf", "standardtok HTMLstandardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]");
}
/** Test that we apply boosts correctly */
public void testCJKBoosts() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='57']");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='57']");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok^2 HTMLstandardtok",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='57']");
// now boost the other field
assertQ("test cjk (disjunction)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='60']");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='60']");
assertQ("test cjk (conjunction)",
req("q", "大亚湾",
"qf", "standardtok HTMLstandardtok^2",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='60']");
}
/** always apply minShouldMatch to the inner booleanqueries
* created from whitespace, as these are never structured lucene queries
* but only come from unstructured text */
public void testCJKStructured() throws Exception {
assertQ("test cjk (disjunction)",
req("q", "大亚湾 OR bogus",
"qf", "standardtok",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test cjk (minShouldMatch)",
req("q", "大亚湾 OR bogus",
"qf", "standardtok",
"mm", "67%",
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test cjk (conjunction)",
req("q", "大亚湾 OR bogus", // +(((((standardtok:大 standardtok:亚 standardtok:湾)~3)) (standardtok:bogus))~2)
"qf", "standardtok",
"mm", "100%",
"defType", "edismax")
, "//*[@numFound='0']");
}
/**
* Test that we don't apply minShouldMatch to the inner boolean queries
* when there are synonyms (these are indicated by coordination factor)
*/
public void testSynonyms() throws Exception {
// document only contains baraaa, but should still match.
assertQ("test synonyms",
req("q", "fooaaa",
"qf", "text_sw",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=1]");
}
/**
* Test that the default operator and MM are interacting appropriately when both provided
*/
public void testDefaultOperatorWithMm() throws Exception {
// Text we are searching
// "line up and fly directly at the enemy death cannons, clogging them with wreckage!"
assertQ("test default operator with mm (AND + 0% => 0 hits)",
req("q", "(line notfound) OR notfound",
"qf", "text",
"q.op", "AND",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 0% => 1 hit)",
req("q", "line notfound OR notfound",
"qf", "text",
"q.op", "OR",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 100% => 0 hits)",
req("q", "line notfound OR notfound",
"qf", "text",
"q.op", "OR",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 35% => 1 hit)",
req("q", "line notfound notfound2 OR notfound",
"qf", "text",
"q.op", "OR",
"mm", "35%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 75% => 0 hits)",
req("q", "line notfound notfound2 OR notfound3",
"qf", "text",
"q.op", "OR",
"mm", "75%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (AND + 0% => 1 hit)",
req("q", "(line enemy) OR notfound",
"qf", "text",
"q.op", "AND",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 50% => 1 hit)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text",
"q.op", "AND",
"mm", "50%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 75% => 0 hits)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text",
"q.op", "AND",
"mm", "75%",
"defType", "edismax")
, "*[count(//doc)=0]");
}
/**
* Test that minShouldMatch applies to Optional terms only
*/
public void testMinShouldMatchOptional() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
assertQ("test minShouldMatch (top level optional terms only)",
req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1)
"qf", "text_sw",
"mm", "50%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("test minShouldMatch (top level optional terms only) local mm=50%",
req("q", "{!edismax qf=text_sw mm=50% sow=" + sow + " v='stocks oil gold'}")
, "*[count(//doc)=4]");
assertQ("test minShouldMatch (top level optional and negative terms mm=50%)",
req("q", "stocks oil gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~1)
"qf", "text_sw",
"mm", "50%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test minShouldMatch (top level optional and negative terms local mm=50%)",
req("q", "{!edismax qf=text_sw mm=50% sow=" + sow + " v='stocks oil gold -stockade'}")
, "*[count(//doc)=3]");
assertQ("test minShouldMatch (top level optional and negative terms mm=100%)",
req("q", "stocks gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~2)
"qf", "text_sw",
"mm", "100%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test minShouldMatch (top level optional and negative terms local mm=100%)",
req("q", "{!edismax qf=text_sw mm=100% sow=" + sow + " v='stocks gold -stockade'}")
, "*[count(//doc)=1]");
assertQ("test minShouldMatch (top level required terms only)",
req("q", "stocks AND oil", // +(+(text_sw:stock) +(text_sw:oil))
"qf", "text_sw",
"mm", "50%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test minShouldMatch (top level required terms only) local mm=50%)",
req("q", "{!edismax qf=text_sw mm=50% sow=" + sow + " v='stocks AND oil'}")
, "*[count(//doc)=1]");
assertQ("test minShouldMatch (top level optional and required terms)",
req("q", "oil gold +stocks", // +(((text_sw:oil) (text_sw:gold) +(text_sw:stock))~1)
"qf", "text_sw",
"mm", "50%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test minShouldMatch (top level optional and required terms) local mm=50%)",
req("q", "{!edismax qf=text_sw mm=50% sow=" + sow + " v='oil gold +stocks'}")
, "*[count(//doc)=3]");
assertQ("test minShouldMatch (top level optional with explicit OR and parens)",
req("q", "(snake OR stocks) oil",
"qf", "text_sw",
"mm", "100%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("test minShouldMatch (top level optional with explicit OR and parens) local mm=100%)",
req("q", "{!edismax qf=text_sw mm=100% sow=" + sow + " v='(snake OR stocks) oil'}")
, "*[count(//doc)=2]");
// The results for these two appear odd, but are correct as per BooleanQuery processing.
// See: http://searchhub.org/2011/12/28/why-not-and-or-and-not/
// Non-parenthesis OR/AND precedence is not true to abstract boolean logic in solr when q.op = AND
// and when q.op = OR all three clauses are top-level and optional so mm takes over
assertQ("test minShouldMatch (top level optional with explicit OR without parens)",
req("q", "snake OR stocks oil",
"qf", "text_sw",
"q.op", "OR",
"mm", "100%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test minShouldMatch (top level optional with explicit OR without parens) local mm=100%)",
req("q", "{!edismax qf=text_sw q.op=OR mm=100% sow=" + sow + " v='snake OR stocks oil'}")
, "*[count(//doc)=0]");
assertQ("test minShouldMatch (top level optional with explicit OR without parens)",
req("q", "snake OR stocks oil",
"qf", "text_sw",
"q.op", "AND",
"mm", "100%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test minShouldMatch (top level optional with explicit OR without parens) local mm=100%)",
req("q", "{!edismax qf=text_sw q.op=AND mm=100% sow=" + sow + " v='snake OR stocks oil'}")
, "*[count(//doc)=0]");
// SOLR-9174
assertQ("test minShouldMatch=1<-1 with explicit OR, one impossible clause, and no explicit q.op",
req("q", "barbie OR (hair AND nonexistentword)",
"qf", "text_sw",
"mm", "1<-1",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("test local minShouldMatch=1<-1 with explicit OR, one impossible clause, and no explicit q.op",
req("q", "{!edismax qf=text_sw mm=1<-1 sow=" + sow + " v='barbie OR (hair AND nonexistentword)'}")
, "*[count(//doc)=3]");
}
}
/* SOLR-8812 */
@Test
public void testDefaultMM() throws Exception {
// Ensure MM is off when explicit operators (+/-/OR/NOT) are used and no explicit mm spec is specified.
for (String sow : Arrays.asList("true", "false")) {
assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%",
req("q", "oil OR stocks",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%",
req("q", "oil or stocks",
"qf", "text_sw",
"q.op", "AND",
"lowercaseOperators", "true",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "oil OR stocks",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "oil stocks",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "oil stocks",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "oil stocks",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair ties -barbie",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair ties NOT barbie",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair ties -barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair ties NOT barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair AND ties -barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair AND ties -barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("No explicit non-AND operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair AND ties barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "hair AND ties barbie",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair AND ties barbie",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair and ties barbie",
"qf", "text_sw",
"lowercaseOperators", "true",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=2]");
assertQ("Explicit '-' operator in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "hair ties -barbie",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "hair ties NOT barbie",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%",
req("q", "hair OR ties barbie",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=3]");
assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair OR ties barbie",
"qf", "text_sw",
"q.op", "OR",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=6]");
assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair OR ties barbie",
"qf", "text_sw",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=6]");
assertQ("Explicit '+' operator in query with no explicit mm and q.op=AND => mm = 0%",
req("q", "hair ties +barbie",
"qf", "text_sw",
"q.op", "AND",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=1]");
}
}
public void testEdismaxSimpleExtension() throws SyntaxError {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("q", "foo bar");
params.set("qf", "subject title^5");
params.set("qf_fr", "subject_fr title_fr^5");
params.set("qf_en", "subject_en title_en^5");
params.set("qf_es", "subject_es title_es^5");
MultilanguageQueryParser parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params));
Query query = parser.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title", "foo", 5, false));
assertTrue(containsClause(query, "title", "bar", 5, false));
assertTrue(containsClause(query, "subject", "foo", 1, false));
assertTrue(containsClause(query, "subject", "bar", 1, false));
params.set("language", "es");
parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params));
query = parser.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title_es", "foo", 5, false));
assertTrue(containsClause(query, "title_es", "bar", 5, false));
assertTrue(containsClause(query, "subject_es", "foo", 1, false));
assertTrue(containsClause(query, "subject_es", "bar", 1, false));
FuzzyDismaxQParser parser2 = new FuzzyDismaxQParser("foo bar absence", new ModifiableSolrParams(), params, req(params));
query = parser2.parse();
assertNotNull(query);
assertTrue(containsClause(query, "title", "foo", 5, false));
assertTrue(containsClause(query, "title", "bar", 5, false));
assertTrue(containsClause(query, "title", "absence", 5, true));
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "text_sw" field has synonyms loaded from synonyms.txt
// retrieve the single document containing literal "wifi"
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
// trigger the "wi fi => wifi" synonym
assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","false")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","true")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi") // default sow=false
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf","text_sw title", "q","{!edismax sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf", "text_sw title", "q","{!edismax sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("qf", "text_sw title", "q", "{!edismax}wi fi") // default sow=false
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"sow", "false",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"sow", "true",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"debugQuery", "true"), // Default sow=false
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
}
public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception {
// When the *structure* of produced queries is different in each field,
// sow=true produces boolean-of-dismax query structure,
// and sow=false produces dismax-of-boolean query structure.
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "true",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:oliv | title:olive))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((title:the))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:other | title:other))')]"
);
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "false",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery(((text_sw:oliv text_sw:other) | (title:olive title:the title:other)))')]"
);
// When field's analysis produce different query structures, mm processing is always done on the boolean query.
// sow=true produces (boolean-of-dismax)~<mm> query structure,
// and sow=false produces dismax-of-(boolean)~<mm> query structure.
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "true",
"mm", "100%",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+(DisjunctionMaxQuery((text_sw:oliv | title:olive)) DisjunctionMaxQuery((title:the)) DisjunctionMaxQuery((text_sw:other | title:other)))~3')]"
);
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "false",
"mm", "100%",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery((((text_sw:oliv text_sw:other)~2) | ((title:olive title:the title:other)~3)))')]"
);
// When the *structure* of produced queries is the same in each field,
// sow=false/true produce the same boolean-of-dismax query structure
for (String sow : Arrays.asList("true", "false")) {
assertQ(req("qf", "text_sw title",
"q", "olive blah other",
"defType", "edismax",
"sow", sow,
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'"
+ "+(DisjunctionMaxQuery((text_sw:oliv | title:olive))"
+ " DisjunctionMaxQuery((text_sw:blah | title:blah))"
+ " DisjunctionMaxQuery((text_sw:other | title:other)))')]"
);
}
}
public void testOperatorsAndMultiWordSynonyms() throws Exception {
// The "text_sw" field has synonyms loaded from synonyms.txt
// retrieve the single document containing literal "wifi"
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
// trigger the "wi fi => wifi" synonym
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi", "sow","false")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi* fi", "sow","false")
, "/response/numFound==1" // matches because wi* matches "wifi"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","w? fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi~1 fi", "sow","false")
, "/response/numFound==4" // matches because wi~1 matches ti (stemmed "ties")
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^2 fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^=2 fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi +fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi -fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi !fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi*", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi?", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi~1", "sow","false")
, "/response/numFound==4" // matches because fi~1 matches ti (stemmed "ties")
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^2", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^=2", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi text_sw:fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi NOT fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AND ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi && ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) AND ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) && ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi OR NotThereAtAll", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi || NotThereAtAll", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) OR NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) || NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi\" fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi \"fi\"", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi) fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi (fi)", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","/wi/ fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi /fi/", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+(wi fi)", "sow","false")
, "/response/numFound==1"
);
@SuppressWarnings({"rawtypes"})
Map all = (Map) Utils.fromJSONString(h.query(req("q", "*:*", "rows", "0", "wt", "json")));
int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString());
int allDocsExceptOne = totalDocs - 1;
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-(wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!(wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT (wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^=2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:(wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+ATM wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT* wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT? wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"ATM\" wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi +ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT*", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT?", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi \"ATM\"", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi fi\"~2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:\"wi fi\"", "sow","false")
, "/response/numFound==1"
);
}
public void testAutoGeneratePhraseQueries() throws Exception {
ModifiableSolrParams noSowParams = new ModifiableSolrParams();
noSowParams.add("df", "text");
ModifiableSolrParams sowFalseParams = new ModifiableSolrParams();
sowFalseParams.add("sow", "false");
sowFalseParams.add("df", "text");
ModifiableSolrParams sowTrueParams = new ModifiableSolrParams();
sowTrueParams.add("sow", "true");
sowTrueParams.add("df", "text");
// From synonyms.txt:
//
// crow blackbird, grackle
for (SolrParams params : Arrays.asList(noSowParams, sowFalseParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("text:grackle", "edismax", req); // "text" has autoGeneratePhraseQueries="true"
Query q = qParser.getQuery();
assertEquals("+((text:\"crow blackbird\" text:grackl))", q.toString());
}
}
try (SolrQueryRequest req = req(sowTrueParams)) {
QParser qParser = QParser.getParser("text:grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])", q.toString());
}
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams, sowFalseParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("text_sw:grackle", "edismax", req); // "text_sw" doesn't specify autoGeneratePhraseQueries => default false
Query q = qParser.getQuery();
assertEquals("+(((+text_sw:crow +text_sw:blackbird) text_sw:grackl))", q.toString());
}
}
Stream.of(noSowParams, sowTrueParams, sowFalseParams).forEach(p->p.add("qf", "text text_sw"));
for (SolrParams params : Arrays.asList(noSowParams, sowFalseParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+(((text:\"crow blackbird\" text:grackl))"
+ " | (((+text_sw:crow +text_sw:blackbird) text_sw:grackl)))",
q.toString());
qParser = QParser.getParser("grackle wi fi", "edismax", req);
q = qParser.getQuery();
assertEquals("+(((text:\"crow blackbird\" text:grackl) text:wifi)"
+ " | (((+text_sw:crow +text_sw:blackbird) text_sw:grackl) text_sw:wifi))",
q.toString());
}
}
try (SolrQueryRequest req = req(sowTrueParams)) {
QParser qParser = QParser.getParser("grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+(spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])"
+ " | (((+text_sw:crow +text_sw:blackbird) text_sw:grackl)))",
q.toString());
qParser = QParser.getParser("grackle wi fi", "edismax", req);
q = qParser.getQuery();
assertEquals("+((spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])"
+ " | (((+text_sw:crow +text_sw:blackbird) text_sw:grackl))) (text:wi | text_sw:wi) (text:fi | text_sw:fi))",
q.toString());
}
}
public void testSowFalseWithBoost() throws Exception {
try (SolrQueryRequest req = req("sow", "false", "qf", "subject title")) {
QParser qParser = QParser.getParser("one two", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+((title:one | subject:on) (title:two | subject:two))", q.toString());
}
try (SolrQueryRequest req = req("sow", "false", "qf", "subject title^5")) {
QParser qParser = QParser.getParser("one two", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+(((title:one)^5.0 | subject:on) ((title:two)^5.0 | subject:two))", q.toString());
}
try (SolrQueryRequest req = req("sow", "false", "qf", "subject^3 title")) {
QParser qParser = QParser.getParser("one two", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+((title:one | (subject:on)^3.0) (title:two | (subject:two)^3.0))", q.toString());
}
try (SolrQueryRequest req = req("sow", "false", "qf", "subject^10 title^20")) {
QParser qParser = QParser.getParser("one two", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+(((title:one)^20.0 | (subject:on)^10.0) ((title:two)^20.0 | (subject:two)^10.0))", q.toString());
}
}
private boolean containsClause(Query query, String field, String value,
int boost, boolean fuzzy) {
float queryBoost = 1f;
if (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query;
query = bq.getQuery();
queryBoost = bq.getBoost();
}
if(query instanceof BooleanQuery) {
return containsClause((BooleanQuery)query, field, value, boost, fuzzy);
}
if(query instanceof DisjunctionMaxQuery) {
return containsClause((DisjunctionMaxQuery)query, field, value, boost, fuzzy);
}
if (boost != queryBoost) {
return false;
}
if(query instanceof TermQuery && !fuzzy) {
return containsClause((TermQuery)query, field, value);
}
if(query instanceof FuzzyQuery && fuzzy) {
return containsClause((FuzzyQuery)query, field, value);
}
return false;
}
private boolean containsClause(FuzzyQuery query, String field, String value) {
if(query.getTerm().field().equals(field) &&
query.getTerm().bytes().utf8ToString().equals(value)) {
return true;
}
return false;
}
private boolean containsClause(BooleanQuery query, String field, String value, int boost, boolean fuzzy) {
for(BooleanClause clause:query) {
if(containsClause(clause.getQuery(), field, value, boost, fuzzy)) {
return true;
}
}
return false;
}
private boolean containsClause(TermQuery query, String field, String value) {
if(query.getTerm().field().equals(field) &&
query.getTerm().bytes().utf8ToString().equals(value)) {
return true;
}
return false;
}
private boolean containsClause(DisjunctionMaxQuery query, String field, String value, int boost, boolean fuzzy) {
for(Query disjunct:query.getDisjuncts()) {
if(containsClause(disjunct, field, value, boost, fuzzy)) {
return true;
}
}
return false;
}
static class MultilanguageQueryParser extends ExtendedDismaxQParser {
public MultilanguageQueryParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
protected ExtendedDismaxConfiguration createConfiguration(String qstr,
SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new MultilanguageDismaxConfiguration(localParams, params, req);
}
class MultilanguageDismaxConfiguration extends ExtendedDismaxConfiguration {
public MultilanguageDismaxConfiguration(SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(localParams, params, req);
String language = params.get("language");
if(language != null) {
super.queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams("qf_" + language));
}
}
}
}
static class FuzzyDismaxQParser extends ExtendedDismaxQParser {
private static final float MIN_SIMILARITY = 0.75F;
public FuzzyDismaxQParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}
@Override
protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser,
String field) {
return new FuzzyQueryParser(qParser, field);
}
class FuzzyQueryParser extends ExtendedSolrQueryParser{
private Set<String> frequentlyMisspelledWords;
public FuzzyQueryParser(QParser parser, String defaultField) {
super(parser, defaultField);
frequentlyMisspelledWords = new HashSet<>();
frequentlyMisspelledWords.add("absence");
frequentlyMisspelledWords.add("absenc");
}
@Override
protected Query getFieldQuery(String field,
String val, boolean quoted, boolean raw) throws SyntaxError {
if(frequentlyMisspelledWords.contains(val)) {
return getFuzzyQuery(field, val, MIN_SIMILARITY);
}
return super.getFieldQuery(field, val, quoted, raw);
}
/**
* Handle multi-term queries by repacking boolean queries with frequently misspelled term
* queries rewritten as fuzzy queries.
**/
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries,
boolean fieldEnableGraphQueries, SynonymQueryStyle synonymQueryStyle)
throws SyntaxError {
Query q = super.newFieldQuery
(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle);
if (q instanceof BooleanQuery) {
boolean rewrittenSubQ = false; // dirty flag: rebuild the repacked query?
BooleanQuery.Builder builder = newBooleanQuery();
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
Query subQ = clause.getQuery();
if (subQ instanceof TermQuery) {
Term subTerm = ((TermQuery)subQ).getTerm();
if (frequentlyMisspelledWords.contains(subTerm.text())) {
rewrittenSubQ = true;
Query fuzzySubQ = newFuzzyQuery(subTerm, MIN_SIMILARITY, getFuzzyPrefixLength());
clause = newBooleanClause(fuzzySubQ, clause.getOccur());
}
}
builder.add(clause);
}
if (rewrittenSubQ) {
builder.setMinimumNumberShouldMatch(((BooleanQuery)q).getMinimumNumberShouldMatch());
q = builder.build();
}
}
return q;
}
}
}
@Test
public void testShingleQueries() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("sow", "false");
params.add("defType", "edismax");
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("shingle23:(A B C)", req);
Query q = qParser.getQuery();
assertEquals("Synonym(shingle23:A_B shingle23:A_B_C) shingle23:B_C", q.toString());
}
assertJQ(req("df", "shingle23", "q", "A B C", "sow", "false")
, "/response/numFound==1"
);
}
/** SOLR-11512 */
@Test
public void killInfiniteRecursionParse() throws Exception {
SolrException exception = expectThrows(SolrException.class, () -> {
h.query(req("defType", "edismax", "q", "*", "qq", "{!edismax v=something}", "bq", "{!edismax v=$qq}"));
});
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
assertTrue(exception.getMessage().contains("Infinite Recursion detected parsing query"));
}
/** SOLR-5163 */
@Test
public void testValidateQueryFields() throws Exception {
// field aliasing covered by test - testAliasing
ModifiableSolrParams params = new ModifiableSolrParams();
params.add("defType", "edismax");
params.add("df", "text");
params.add("q", "olive AND other");
params.add("qf", "subject^3 title");
params.add("debugQuery", "true");
// test valid field names
String response = h.query(req(params));
assertTrue(response.contains("+DisjunctionMaxQuery((title:olive | " +
"(subject:oliv)^3.0)) +DisjunctionMaxQuery((title:other | (subject:other)^3.0))"));
// test invalid field name
params.set("qf", "subject^3 nosuchfield");
SolrException exception = expectThrows(SolrException.class, () -> h.query(req(params)));
assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, exception.code());
assertEquals("org.apache.solr.search.SyntaxError: Query Field 'nosuchfield' is not a valid field name",
exception.getMessage());
}
}