| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.solr.search; |
| |
| import java.util.HashSet; |
| import java.util.Random; |
| import java.util.Set; |
| |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.DisjunctionMaxQuery; |
| import org.apache.lucene.search.FuzzyQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.params.ModifiableSolrParams; |
| import org.apache.solr.common.params.SolrParams; |
| import org.apache.solr.request.SolrQueryRequest; |
| import org.apache.solr.util.AbstractSolrTestCase; |
| import org.apache.solr.util.SolrPluginUtils; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| public class TestExtendedDismaxParser extends SolrTestCaseJ4 { |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ |
| initCore("solrconfig.xml", "schema12.xml"); |
| index(); |
| } |
| |
| public static void index() throws Exception { |
| assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious", |
| "name", "Zapp Brannigan")); |
| assertU(adoc("id", "43" , |
| "title", "Democratic Order op Planets")); |
| assertU(adoc("id", "44", "trait_ss", "Tool", |
| "name", "The Zapper")); |
| assertU(adoc("id", "45", "trait_ss", "Chauvinist", |
| "title", "25 star General")); |
| assertU(adoc("id", "46", |
| "trait_ss", "Obnoxious", |
| "subject", "Defeated the pacifists op the Gandhi nebula", |
| "t_special", "literal:colon value", |
| "movies_t", "first is Mission: Impossible, second is Terminator 2: Judgement Day. Terminator:3 ok...", |
| "foo_i", "8" |
| )); |
| assertU(adoc("id", "47", "trait_ss", "Pig", |
| "text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!")); |
| assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100")); |
| assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100")); |
| assertU(adoc("id", "50", "text_sw", "start new big city end")); |
| assertU(adoc("id", "51", "store", "12.34,-56.78")); |
| assertU(adoc("id", "52", "text_sw", "tekna theou klethomen")); |
| assertU(adoc("id", "53", "text_sw", "nun tekna theou esmen")); |
| assertU(adoc("id", "54", "text_sw", "phanera estin ta tekna tou theou")); |
| assertU(adoc("id", "55", "standardtok", "大")); |
| assertU(adoc("id", "56", "standardtok", "大亚")); |
| assertU(adoc("id", "57", "standardtok", "大亚湾")); |
| assertU(adoc("id", "58", "HTMLstandardtok", "大")); |
| assertU(adoc("id", "59", "HTMLstandardtok", "大亚")); |
| assertU(adoc("id", "60", "HTMLstandardtok", "大亚湾")); |
| assertU(adoc("id", "61", "text_sw", "bazaaa")); // synonyms in an expansion group |
| assertU(commit()); |
| } |
| |
| @Test |
| public void testSyntax() throws Exception { |
| // a bare * should be treated as *:* |
| assertJQ(req("defType","edismax", "q","*", "df","doesnotexist_s") |
| ,"/response/docs/[0]==" // make sure we get something... |
| ); |
| assertJQ(req("defType","edismax", "q","doesnotexist_s:*") |
| ,"/response/numFound==0" // nothing should be found |
| ); |
| assertJQ(req("defType","edismax","q","doesnotexist_s:*") |
| ,"/response/numFound==0" // nothing should be found |
| ); |
| assertJQ(req("defType","edismax","q","doesnotexist_s:( * * * )") |
| ,"/response/numFound==0" // nothing should be found |
| ); |
| } |
| |
| |
| public void testTrailingOperators() throws Exception { |
| // really just test that exceptions aren't thrown by |
| // single + - |
| |
| assertJQ(req("defType","edismax", "q","-") |
| ,"/response=="); |
| |
| assertJQ(req("defType","edismax", "q","+") |
| ,"/response=="); |
| |
| assertJQ(req("defType","edismax", "q","+ - +") |
| ,"/response=="); |
| |
| assertJQ(req("defType","edismax", "q","- + -") |
| ,"/response=="); |
| |
| assertJQ(req("defType","edismax", "q","id:47 +") |
| ,"/response/numFound==1"); |
| |
| assertJQ(req("defType","edismax", "q","id:47 -") |
| ,"/response/numFound==1"); |
| |
| Random r = random(); |
| for (int i=0; i<100; i++) { |
| StringBuilder sb = new StringBuilder(); |
| for (int j=0; j<r.nextInt(10); j++) { |
| switch (r.nextInt(3)) { |
| case 0: sb.append(' '); break; |
| case 1: sb.append('+'); break; |
| case 2: sb.append('-'); break; |
| case 3: sb.append((char)r.nextInt(127)); break; |
| } |
| } |
| |
| String q = sb.toString(); |
| assertJQ(req("defType","edismax", "q",q) |
| ,"/response=="); |
| } |
| } |
| |
| |
| public void testLowercaseOperators() { |
| assertQ("Upper case operator", |
| req("q","Zapp AND Brannigan", |
| "qf", "name", |
| "lowercaseOperators", "false", |
| "defType","edismax") |
| ,"*[count(//doc)=1]"); |
| |
| assertQ("Upper case operator, allow lowercase", |
| req("q","Zapp AND Brannigan", |
| "qf", "name", |
| "lowercaseOperators", "true", |
| "defType","edismax") |
| ,"*[count(//doc)=1]"); |
| |
| assertQ("Lower case operator, don't allow lowercase operators", |
| req("q","Zapp and Brannigan", |
| "qf", "name", |
| "q.op", "AND", |
| "lowercaseOperators", "false", |
| "defType","edismax") |
| ,"*[count(//doc)=0]"); |
| |
| assertQ("Lower case operator, allow lower case operators", |
| req("q","Zapp and Brannigan", |
| "qf", "name", |
| "q.op", "AND", |
| "lowercaseOperators", "true", |
| "defType","edismax") |
| ,"*[count(//doc)=1]"); |
| } |
| |
| // test the edismax query parser based on the dismax parser |
| public void testFocusQueryParser() { |
| String allq = "id:[42 TO 51]"; |
| String allr = "*[count(//doc)=10]"; |
| String oner = "*[count(//doc)=1]"; |
| String twor = "*[count(//doc)=2]"; |
| String nor = "*[count(//doc)=0]"; |
| |
| assertQ("blank q", |
| req("q"," ", |
| "q.alt",allq, |
| "defType","edismax") |
| ,allr); |
| |
| assertQ("expected doc is missing (using un-escaped edismax w/qf)", |
| req("q", "literal:colon", |
| "qf", "t_special", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| |
| assertQ("standard request handler returns all matches", |
| req(allq), |
| allr |
| ); |
| |
| assertQ("edismax query parser returns all matches", |
| req("q", allq, |
| "defType", "edismax" |
| ), |
| allr |
| ); |
| |
| assertQ(req("defType", "edismax", "qf", "trait_ss", |
| "q","Tool"), twor |
| ); |
| |
| // test that field types that aren't applicable don't cause an exception to be thrown |
| assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b", |
| "q","Tool"), twor |
| ); |
| |
| // test that numeric field types can be queried |
| assertQ(req("defType", "edismax", "qf", "text_sw", |
| "q","foo_i:100"), oner |
| ); |
| |
| // test that numeric field types can be queried |
| assertQ(req("defType", "edismax", "qf", "text_sw", |
| "q","foo_i:-100"), oner |
| ); |
| |
| // test that numeric field types can be queried via qf |
| assertQ(req("defType", "edismax", "qf", "text_sw foo_i", |
| "q","100"), oner |
| ); |
| |
| assertQ("qf defaults to df", |
| req("defType", "edismax", "df", "trait_ss", |
| "q","Tool"), twor |
| ); |
| |
| assertQ("qf defaults to defaultSearchField" |
| , req( "defType", "edismax" |
| ,"q","op") |
| , twor |
| ); |
| |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","op"), twor |
| ); |
| assertQ(req("defType", "edismax", |
| "qf", "name title subject text", |
| "q.op", "AND", |
| "q","Order op"), oner |
| ); |
| assertQ(req("defType", "edismax", |
| "qf", "name title subject text", |
| "q.op", "OR", |
| "q","Order op"), twor |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","Order AND op"), oner |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","Order and op"), oner |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","+Order op"), oner |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","Order OR op"), twor |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","Order or op"), twor |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","*:*"), allr |
| ); |
| |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","star OR (-star)"), allr |
| ); |
| assertQ(req("defType", "edismax", "qf", "name title subject text", |
| "q","id:42 OR (-id:42)"), allr |
| ); |
| |
| // test that basic synonyms work |
| assertQ(req("defType", "edismax", "qf", "text_sw", |
| "q","GB"), oner |
| ); |
| |
| // test for stopword removal in main query part |
| assertQ(req("defType", "edismax", "qf", "text_sw", |
| "q","the big"), twor |
| ); |
| |
| // test for stopwords not removed |
| assertQ(req("defType", "edismax", |
| "qf", "text_sw", |
| "stopwords","false", |
| "q.op","AND", |
| "q","the big"), oner |
| ); |
| |
| // searching for a literal colon value when clearly not used for a field |
| assertQ("expected doc is missing (using standard)", |
| req("q", "t_special:literal\\:colon"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| assertQ("expected doc is missing (using escaped edismax w/field)", |
| req("q", "t_special:literal\\:colon", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| assertQ("expected doc is missing (using un-escaped edismax w/field)", |
| req("q", "t_special:literal:colon", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| assertQ("expected doc is missing (using escaped edismax w/qf)", |
| req("q", "literal\\:colon", |
| "qf", "t_special", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| assertQ("expected doc is missing (using un-escaped edismax w/qf)", |
| req("q", "literal:colon", |
| "qf", "t_special", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='46']"); |
| |
| assertQ(req("defType","edismax", "mm","100%", "q","terminator:3", "qf","movies_t"), |
| oner); |
| assertQ(req("defType","edismax", "mm","100%", "q","Mission:Impossible", "qf","movies_t"), |
| oner); |
| assertQ(req("defType","edismax", "mm","100%", "q","Mission : Impossible", "qf","movies_t"), |
| oner); |
| assertQ(req("defType","edismax", "mm","100%", "q","Mission: Impossible", "qf","movies_t"), |
| oner); |
| assertQ(req("defType","edismax", "mm","100%", "q","Terminator 2: Judgement Day", "qf","movies_t"), |
| oner); |
| |
| // make sure the clause wasn't eliminated |
| assertQ(req("defType","edismax", "mm","100%", "q","Terminator 10: Judgement Day", "qf","movies_t"), |
| nor); |
| |
| // throw in a numeric field |
| assertQ(req("defType","edismax", "mm","0", "q","Terminator: 100", "qf","movies_t foo_i"), |
| twor); |
| |
| assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 100", "qf","movies_t foo_i"), |
| nor); |
| |
| assertQ(req("defType","edismax", "mm","100%", "q","Terminator: 8", "qf","movies_t foo_i"), |
| oner); |
| |
| assertQ(req("defType","edismax", "mm","0", "q","movies_t:Terminator 100", "qf","movies_t foo_i"), |
| twor); |
| |
| // special psuedo-fields like _query_ and _val_ |
| |
| // special fields (and real field id) should be included by default |
| assertQ(req("defType", "edismax", |
| "mm", "100%", |
| "fq", "id:51", |
| "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), |
| oner); |
| // should also work when explicitly allowed |
| assertQ(req("defType", "edismax", |
| "mm", "100%", |
| "fq", "id:51", |
| "uf", "id _query_", |
| "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), |
| oner); |
| assertQ(req("defType", "edismax", |
| "mm", "100%", |
| "fq", "id:51", |
| "uf", "id", |
| "uf", "_query_", |
| "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), |
| oner); |
| |
| // should fail when prohibited |
| assertQ(req("defType", "edismax", |
| "mm", "100%", |
| "fq", "id:51", |
| "uf", "* -_query_", // explicitly excluded |
| "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), |
| nor); |
| assertQ(req("defType", "edismax", |
| "mm", "100%", |
| "fq", "id:51", |
| "uf", "id", // excluded by ommision |
| "q", "_query_:\"{!geofilt d=20 sfield=store pt=12.34,-56.78}\""), |
| nor); |
| |
| |
| /** stopword removal in conjunction with multi-word synonyms at query time |
| * break this test. |
| // multi-word synonyms |
| // remove id:50 which contans the false match |
| assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true", |
| "q","-id:50 nyc"), oner |
| ); |
| **/ |
| |
| /*** these fail because multi-word synonyms are being used at query time |
| // this will incorrectly match "new big city" |
| assertQ(req("defType", "edismax", "qf", "id title", |
| "q","nyc"), oner |
| ); |
| |
| // this will incorrectly match "new big city" |
| assertQ(req("defType", "edismax", "qf", "title", |
| "q","the big apple"), nor |
| ); |
| ***/ |
| |
| } |
| |
| public void testBoostQuery() { |
| assertQ( |
| req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "id:54^100", "bq", "id:53^10", "fq", "id:[52 TO 54]", "fl", "id,score"), |
| "//doc[1]/str[@name='id'][.='54']", |
| "//doc[2]/str[@name='id'][.='53']", |
| "//doc[3]/str[@name='id'][.='52']" |
| ); |
| |
| // non-trivial bqs |
| assertQ(req("q", "tekna", |
| "qf", "text_sw", |
| "defType", "edismax", |
| "bq", "(text_sw:blasdfadsf id:54)^100", |
| "bq", "id:[53 TO 53]^10", |
| "fq", "id:[52 TO 54]", |
| "fl", "id,score"), |
| "//doc[1]/str[@name='id'][.='54']", |
| "//doc[2]/str[@name='id'][.='53']", |
| "//doc[3]/str[@name='id'][.='52']" |
| ); |
| |
| // genuine negative boosts are not legal |
| // see SOLR-3823, SOLR-3278, LUCENE-4378 and |
| // https://wiki.apache.org/solr/SolrRelevancyFAQ#How_do_I_give_a_negative_.28or_very_low.29_boost_to_documents_that_match_a_query.3F |
| assertQ( |
| req("q", "tekna", "qf", "text_sw", "defType", "edismax", "bq", "(*:* -id:54)^100", "bq", "id:53^10", "bq", "id:52", "fq", "id:[52 TO 54]", "fl", "id,score"), |
| "//doc[1]/str[@name='id'][.='53']", |
| "//doc[2]/str[@name='id'][.='52']", |
| "//doc[3]/str[@name='id'][.='54']" |
| ); |
| } |
| |
| public void testUserFields() { |
| String allr = "*[count(//doc)=10]"; |
| String oner = "*[count(//doc)=1]"; |
| String nor = "*[count(//doc)=0]"; |
| |
| // User fields |
| // Default is allow all "*" |
| // If a list of fields are given, only those are allowed "foo bar" |
| // Possible to invert with "-" syntax: |
| // Disallow all: "-*" |
| // Allow all but id: "* -id" |
| // Also supports "dynamic" field name wildcarding |
| assertQ(req("defType","edismax", "q","id:42"), |
| oner); |
| |
| // SOLR-3377 - parens should be allowed immediately before field name |
| assertQ(req("defType","edismax", "q","( id:42 )"), |
| oner); |
| assertQ(req("defType","edismax", "q","(id:42)"), |
| oner); |
| assertQ(req("defType","edismax", "q","(+id:42)"), |
| oner); |
| assertQ(req("defType","edismax", "q","+(+id:42)"), |
| oner); |
| assertQ(req("defType","edismax", "q","+(+((id:42)))"), |
| oner); |
| assertQ(req("defType","edismax", "q","+(+((+id:42)))"), |
| oner); |
| assertQ(req("defType","edismax", "q"," +( +( ( +id:42) ) ) "), |
| oner); |
| assertQ(req("defType","edismax", "q","(id:(*:*)^200)"), |
| allr); |
| |
| assertQ(req("defType","edismax", "uf","id", "q","id:42"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","-*", "q","id:42"), |
| nor); |
| |
| assertQ(req("defType","edismax", "uf","loremipsum", "q","id:42"), |
| nor); |
| |
| assertQ(req("defType","edismax", "uf","* -id", "q","id:42"), |
| nor); |
| |
| assertQ(req("defType","edismax", "uf","* -loremipsum", "q","id:42"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","*^5.0", "q","id:42"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","id^5.0", "q","id:42^10.0"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","na*", "q","name:Zapp"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","*me", "q","name:Zapp"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","* -na*", "q","name:Zapp"), |
| nor); |
| |
| assertQ(req("defType","edismax", "uf","*me -name", "q","name:Zapp"), |
| nor); |
| |
| assertQ(req("defType","edismax", "uf","*ame -*e", "q","name:Zapp"), |
| nor); |
| |
| // Boosts from user fields |
| assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "q","id:42"), |
| "//str[@name='parsedquery_toString'][.='+id:42']"); |
| |
| assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^5.0", "q","id:42"), |
| "//str[@name='parsedquery_toString'][.='+id:42^5.0']"); |
| |
| assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","*^2.0 id^5.0 -xyz", "q","name:foo"), |
| "//str[@name='parsedquery_toString'][.='+name:foo^2.0']"); |
| |
| assertQ(req("defType","edismax", "debugQuery","true", "rows","0", "uf","i*^5.0", "q","id:42"), |
| "//str[@name='parsedquery_toString'][.='+id:42^5.0']"); |
| |
| |
| assertQ(req("defType","edismax", "uf","-*", "q","cannons"), |
| oner); |
| |
| assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner); |
| |
| } |
| |
| public void testAliasing() throws Exception { |
| String oner = "*[count(//doc)=1]"; |
| String twor = "*[count(//doc)=2]"; |
| String nor = "*[count(//doc)=0]"; |
| |
| // Aliasing |
| // Single field |
| assertQ(req("defType","edismax", "q","myalias:Zapp"), |
| nor); |
| |
| assertQ(req("defType","edismax", "q","myalias:Zapp", "f.myalias.qf","name"), |
| oner); |
| |
| // Multi field |
| assertQ(req("defType","edismax", "uf", "myalias", "q","myalias:(Zapp Obnoxious)", "f.myalias.qf","name^2.0 mytrait_ss^5.0", "mm", "50%"), |
| oner); |
| |
| // Multi field |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), |
| nor); |
| |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 mytrait_ss^5.0"), oner); |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0"), twor); |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias^10.0", "f.myalias.qf","name^2.0 trait_ss^5.0", "mm", "100%"), oner); |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","who^10.0 where^3.0", "f.who.qf","name^2.0", "f.where.qf", "mytrait_ss^5.0"), oner); |
| |
| assertQ(req("defType","edismax", "q","Zapp Obnoxious", "qf","myalias", "f.myalias.qf","name mytrait_ss", "uf", "myalias"), oner); |
| |
| assertQ(req("defType","edismax", "uf","who", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0", "qf", "id"), twor); |
| assertQ(req("defType","edismax", "uf","* -name", "q","who:(Zapp Obnoxious)", "f.who.qf", "name^2.0 trait_ss^5.0"), twor); |
| |
| } |
| |
| public void testAliasingBoost() throws Exception { |
| assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias", "f.myalias.qf","name trait_ss^0.5"), "//result/doc[1]/str[@name='id']=42", "//result/doc[2]/str[@name='id']=47");//doc 42 should score higher than 46 |
| assertQ(req("defType","edismax", "q","Zapp Pig", "qf","myalias^100 name", "f.myalias.qf","trait_ss^0.5"), "//result/doc[1]/str[@name='id']=47", "//result/doc[2]/str[@name='id']=42");//Now the order should be inverse |
| } |
| |
| public void testCyclicAliasing() throws Exception { |
| try { |
| ignoreException(".*Field aliases lead to a cycle.*"); |
| try { |
| h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","who")); |
| fail("Simple cyclic alising not detected"); |
| } catch (SolrException e) { |
| assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); |
| } |
| |
| try { |
| h.query(req("defType","edismax", "q","blarg", "qf","who", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")); |
| fail("Cyclic alising not detected"); |
| } catch (SolrException e) { |
| assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); |
| } |
| |
| try { |
| h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3","f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field6", "f.field3.qf","field6")); |
| } catch (SolrException e) { |
| fail("This is not cyclic alising"); |
| } |
| |
| try { |
| h.query(req("defType","edismax", "q","blarg", "qf","field1", "f.field1.qf","field2 field3", "f.field2.qf","field4 field5", "f.field4.qf","field5", "f.field5.qf","field4")); |
| fail("Cyclic alising not detected"); |
| } catch (SolrException e) { |
| assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); |
| } |
| |
| try { |
| h.query(req("defType","edismax", "q","who:(Zapp Pig)", "qf","field1", "f.who.qf","name","f.name.qf","myalias", "f.myalias.qf","who")); |
| fail("Cyclic alising not detected"); |
| } catch (SolrException e) { |
| assertTrue(e.getCause().getMessage().contains("Field aliases lead to a cycle")); |
| } |
| } finally { |
| resetExceptionIgnores(); |
| } |
| } |
| |
| public void testOperatorsWithLiteralColons() { |
| assertU(adoc("id", "142", "a_s", "bogus:xxx", "text_s", "yak")); |
| assertU(adoc("id", "143", "a_s", "bogus:xxx")); |
| assertU(adoc("id", "144", "text_s", "yak")); |
| assertU(adoc("id", "145", "a_s", "a_s:xxx", "text_s", "yak")); |
| assertU(adoc("id", "146", "a_s", "a_s:xxx")); |
| assertU(adoc("id", "147", "a_s", "AND", "a_s", "NOT")); |
| assertU(commit()); |
| |
| assertQ(req("q", "bogus:xxx AND text_s:yak", |
| "fl", "id", |
| "qf", "a_s b_s", |
| "defType", "edismax", |
| "mm", "0"), |
| "//*[@numFound='1']", |
| "//str[@name='id'][.='142']"); |
| |
| assertQ(req("q", "a_s:xxx AND text_s:yak", |
| "fl", "id", |
| "qf", "a_s b_s", |
| "defType", "edismax", |
| "mm", "0", |
| "uf", "text_s"), |
| "//*[@numFound='1']", |
| "//str[@name='id'][.='145']"); |
| |
| assertQ(req("q", "NOT bogus:xxx +text_s:yak", |
| "fl", "id", |
| "qf", "a_s b_s", |
| "defType", "edismax", |
| "mm", "0", |
| "debugQuery", "true"), |
| "//*[@numFound='2']", |
| "//str[@name='id'][.='144']", |
| "//str[@name='id'][.='145']"); |
| |
| assertQ(req("q", "NOT a_s:xxx +text_s:yak", |
| "fl", "id", |
| "qf", "a_s b_s", |
| "defType", "edismax", |
| "mm", "0", |
| "uf", "text_s"), |
| "//*[@numFound='2']", |
| "//str[@name='id'][.='142']", |
| "//str[@name='id'][.='144']"); |
| |
| assertQ(req("q", "+bogus:xxx yak", |
| "fl", "id", |
| "qf", "a_s b_s text_s", |
| "defType", "edismax", |
| "mm", "0"), |
| "//*[@numFound='2']", |
| "//str[@name='id'][.='142']", |
| "//str[@name='id'][.='143']"); |
| |
| assertQ(req("q", "+a_s:xxx yak", |
| "fl", "id", |
| "qf", "a_s b_s text_s", |
| "defType", "edismax", |
| "mm", "0", |
| "uf", "b_s"), |
| "//*[@numFound='2']", |
| "//str[@name='id'][.='145']", |
| "//str[@name='id'][.='146']"); |
| } |
| |
| // test phrase fields including pf2 pf3 and phrase slop |
| public void testPfPs() { |
| assertU(adoc("id", "s0", "phrase_sw", "foo bar a b c", "boost_d", "1.0")); |
| assertU(adoc("id", "s1", "phrase_sw", "foo a bar b c", "boost_d", "2.0")); |
| assertU(adoc("id", "s2", "phrase_sw", "foo a b bar c", "boost_d", "3.0")); |
| assertU(adoc("id", "s3", "phrase_sw", "foo a b c bar", "boost_d", "4.0")); |
| assertU(commit()); |
| |
| assertQ("default order assumption wrong", |
| req("q", "foo bar", |
| "qf", "phrase_sw", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s3']", |
| "//doc[2]/str[@name='id'][.='s2']", |
| "//doc[3]/str[@name='id'][.='s1']", |
| "//doc[4]/str[@name='id'][.='s0']"); |
| |
| assertQ("pf not working", |
| req("q", "foo bar", |
| "qf", "phrase_sw", |
| "pf", "phrase_sw^10", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ("pf2 not working", |
| req("q", "foo bar", |
| "qf", "phrase_sw", |
| "pf2", "phrase_sw^10", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ("pf3 not working", |
| req("q", "a b bar", |
| "qf", "phrase_sw", |
| "pf3", "phrase_sw^10", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s2']"); |
| |
| assertQ("ps not working for pf2", |
| req("q", "bar foo", |
| "qf", "phrase_sw", |
| "pf2", "phrase_sw^10", |
| "ps", "2", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ("ps not working for pf3", |
| req("q", "a bar foo", |
| "qf", "phrase_sw", |
| "pf3", "phrase_sw^10", |
| "ps", "3", |
| "bf", "boost_d", |
| "fl", "score,*", |
| "debugQuery", "true", |
| "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s1']"); |
| |
| assertQ("ps/ps2/ps3 with default slop overrides not working", |
| req("q", "zzzz xxxx cccc vvvv", |
| "qf", "phrase_sw", |
| "pf", "phrase_sw~1^10 phrase_sw~2^20 phrase_sw^30", |
| "pf2", "phrase_sw~2^22 phrase_sw^33", |
| "pf3", "phrase_sw~2^222 phrase_sw^333", |
| "ps", "3", |
| "defType", "edismax", |
| "debugQuery", "true"), |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~1^10.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~2^20.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~3^30.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~2^22.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~2^22.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~2^22.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~3^33.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~3^33.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~3^33.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~2^222.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~2^222.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~3^333.0')]", |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~3^333.0')]" |
| ); |
| |
| assertQ( |
| "ps2 not working", |
| req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps2", |
| "2", "bf", "boost_d", "fl", "score,*", "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ( |
| "Specifying slop in pf2 param not working", |
| req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw~2^10", "bf", |
| "boost_d", "fl", "score,*", "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ( |
| "Slop in ps2 parameter should override ps", |
| req("q", "bar foo", "qf", "phrase_sw", "pf2", "phrase_sw^10", "ps", |
| "0", "ps2", "2", "bf", "boost_d", "fl", "score,*", "defType", |
| "edismax"), "//doc[1]/str[@name='id'][.='s0']"); |
| |
| assertQ( |
| "ps3 not working", |
| req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw^10", "ps3", |
| "3", "bf", "boost_d", "fl", "score,*", "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s1']"); |
| |
| assertQ( |
| "Specifying slop in pf3 param not working", |
| req("q", "a bar foo", "qf", "phrase_sw", "pf3", "phrase_sw~3^10", "bf", |
| "boost_d", "fl", "score,*", "defType", "edismax"), |
| "//doc[1]/str[@name='id'][.='s1']"); |
| |
| assertQ("ps2 should not override slop specified inline in pf2", |
| req("q", "zzzz xxxx cccc vvvv", |
| "qf", "phrase_sw", |
| "pf2", "phrase_sw~2^22", |
| "ps2", "4", |
| "defType", "edismax", |
| "debugQuery", "true"), |
| "//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~2^22.0')]" |
| ); |
| |
| } |
| |
| /** |
| * verify that all reserved characters are properly escaped when being set in |
| * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}. |
| * |
| * @see ExtendedDismaxQParser#splitIntoClauses(String, boolean) |
| */ |
| @Test |
| public void testEscapingOfReservedCharacters() throws Exception { |
| // create a document that contains all reserved characters |
| String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/"; |
| |
| assertU(adoc("id", "reservedChars", |
| "name", allReservedCharacters, |
| "cat_s", "foo/")); |
| assertU(commit()); |
| |
| // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent |
| // character) |
| String query = allReservedCharacters.replace("\\", "\\\\"); |
| |
| // query for all those reserved characters. This will fail to parse in the initial parse, meaning that the escaped |
| // query will then be used |
| assertQ("Escaping reserved characters", |
| req("q", query, |
| "qf", "name", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| |
| // Query string field 'cat_s' for special char / - causes SyntaxError without patch SOLR-3467 |
| assertQ("Escaping string with reserved / character", |
| req("q", "foo/", |
| "qf", "cat_s", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| |
| assertQ( |
| "Might be double-escaping a client-escaped colon", |
| req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "id"), |
| "*[count(//doc)=3]"); |
| assertQ( |
| "Might be double-escaping a client-escaped colon", |
| req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text"), |
| "*[count(//doc)=3]"); |
| |
| } |
| |
| /** |
| * SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token |
| */ |
| public void testCJK() throws Exception { |
| assertQ("test cjk (disjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=3]"); |
| assertQ("test cjk (minShouldMatch)", |
| req("q", "大亚湾", |
| "qf", "standardtok", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]"); |
| assertQ("test cjk (conjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| } |
| |
| /** |
| * test that minShouldMatch works with aliasing |
| * for implicit boolean queries |
| */ |
| public void testCJKAliasing() throws Exception { |
| // single field |
| assertQ("test cjk (aliasing+disjunction)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=3]"); |
| assertQ("test cjk (aliasing+minShouldMatch)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]"); |
| assertQ("test cjk (aliasing+conjunction)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| // multifield |
| assertQ("test cjk (aliasing+disjunction)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok HTMLstandardtok", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=6]"); |
| assertQ("test cjk (aliasing+minShouldMatch)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok HTMLstandardtok", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=4]"); |
| assertQ("test cjk (aliasing+conjunction)", |
| req("q", "myalias:大亚湾", |
| "f.myalias.qf", "standardtok HTMLstandardtok", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]"); |
| } |
| |
| /** Test that we apply boosts correctly */ |
| public void testCJKBoosts() throws Exception { |
| assertQ("test cjk (disjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok^2 HTMLstandardtok", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='57']"); |
| assertQ("test cjk (minShouldMatch)", |
| req("q", "大亚湾", |
| "qf", "standardtok^2 HTMLstandardtok", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='57']"); |
| assertQ("test cjk (conjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok^2 HTMLstandardtok", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='57']"); |
| |
| // now boost the other field |
| assertQ("test cjk (disjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok HTMLstandardtok^2", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=6]", "//result/doc[1]/str[@name='id'][.='60']"); |
| assertQ("test cjk (minShouldMatch)", |
| req("q", "大亚湾", |
| "qf", "standardtok HTMLstandardtok^2", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=4]", "//result/doc[1]/str[@name='id'][.='60']"); |
| assertQ("test cjk (conjunction)", |
| req("q", "大亚湾", |
| "qf", "standardtok HTMLstandardtok^2", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]", "//result/doc[1]/str[@name='id'][.='60']"); |
| } |
| |
| /** always apply minShouldMatch to the inner booleanqueries |
| * created from whitespace, as these are never structured lucene queries |
| * but only come from unstructured text */ |
| public void testCJKStructured() throws Exception { |
| assertQ("test cjk (disjunction)", |
| req("q", "大亚湾 OR bogus", |
| "qf", "standardtok", |
| "mm", "0%", |
| "defType", "edismax") |
| , "*[count(//doc)=3]"); |
| assertQ("test cjk (minShouldMatch)", |
| req("q", "大亚湾 OR bogus", |
| "qf", "standardtok", |
| "mm", "67%", |
| "defType", "edismax") |
| , "*[count(//doc)=2]"); |
| assertQ("test cjk (conjunction)", |
| req("q", "大亚湾 OR bogus", |
| "qf", "standardtok", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| } |
| |
| /** |
| * Test that we don't apply minShouldMatch to the inner boolean queries |
| * when there are synonyms (these are indicated by coordination factor) |
| */ |
| public void testSynonyms() throws Exception { |
| // document only contains baraaa, but should still match. |
| assertQ("test synonyms", |
| req("q", "fooaaa", |
| "qf", "text_sw", |
| "mm", "100%", |
| "defType", "edismax") |
| , "*[count(//doc)=1]"); |
| } |
| |
| public void testEdismaxSimpleExtension() throws SyntaxError { |
| ModifiableSolrParams params = new ModifiableSolrParams(); |
| params.set("q", "foo bar"); |
| params.set("qf", "subject title^5"); |
| params.set("qf_fr", "subject_fr title_fr^5"); |
| params.set("qf_en", "subject_en title_en^5"); |
| params.set("qf_es", "subject_es title_es^5"); |
| |
| MultilanguageQueryParser parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params)); |
| Query query = parser.parse(); |
| assertNotNull(query); |
| assertTrue(containsClause(query, "title", "foo", 5, false)); |
| assertTrue(containsClause(query, "title", "bar", 5, false)); |
| assertTrue(containsClause(query, "subject", "foo", 1, false)); |
| assertTrue(containsClause(query, "subject", "bar", 1, false)); |
| |
| params.set("language", "es"); |
| parser = new MultilanguageQueryParser("foo bar", new ModifiableSolrParams(), params, req(params)); |
| query = parser.parse(); |
| assertNotNull(query); |
| assertTrue(containsClause(query, "title_es", "foo", 5, false)); |
| assertTrue(containsClause(query, "title_es", "bar", 5, false)); |
| assertTrue(containsClause(query, "subject_es", "foo", 1, false)); |
| assertTrue(containsClause(query, "subject_es", "bar", 1, false)); |
| |
| FuzzyDismaxQParser parser2 = new FuzzyDismaxQParser("foo bar absence", new ModifiableSolrParams(), params, req(params)); |
| query = parser2.parse(); |
| assertNotNull(query); |
| assertTrue(containsClause(query, "title", "foo", 5, false)); |
| assertTrue(containsClause(query, "title", "bar", 5, false)); |
| assertTrue(containsClause(query, "title", "absence", 5, true)); |
| |
| } |
| |
| private boolean containsClause(Query query, String field, String value, |
| int boost, boolean fuzzy) { |
| |
| if(query instanceof BooleanQuery) { |
| return containsClause((BooleanQuery)query, field, value, boost, fuzzy); |
| } |
| if(query instanceof DisjunctionMaxQuery) { |
| return containsClause((DisjunctionMaxQuery)query, field, value, boost, fuzzy); |
| } |
| if(query instanceof TermQuery && !fuzzy) { |
| return containsClause((TermQuery)query, field, value, boost); |
| } |
| if(query instanceof FuzzyQuery && fuzzy) { |
| return containsClause((FuzzyQuery)query, field, value, boost); |
| } |
| return false; |
| } |
| |
| private boolean containsClause(FuzzyQuery query, String field, String value, |
| int boost) { |
| if(query.getTerm().field().equals(field) && |
| query.getTerm().bytes().utf8ToString().equals(value) && |
| query.getBoost() == boost) { |
| return true; |
| } |
| return false; |
| } |
| |
| private boolean containsClause(BooleanQuery query, String field, String value, int boost, boolean fuzzy) { |
| for(BooleanClause clause:query.getClauses()) { |
| if(containsClause(clause.getQuery(), field, value, boost, fuzzy)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| private boolean containsClause(TermQuery query, String field, String value, int boost) { |
| if(query.getTerm().field().equals(field) && |
| query.getTerm().bytes().utf8ToString().equals(value) && |
| query.getBoost() == boost) { |
| return true; |
| } |
| return false; |
| } |
| |
| private boolean containsClause(DisjunctionMaxQuery query, String field, String value, int boost, boolean fuzzy) { |
| for(Query disjunct:query.getDisjuncts()) { |
| if(containsClause(disjunct, field, value, boost, fuzzy)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| class MultilanguageQueryParser extends ExtendedDismaxQParser { |
| |
| public MultilanguageQueryParser(String qstr, SolrParams localParams, |
| SolrParams params, SolrQueryRequest req) { |
| super(qstr, localParams, params, req); |
| } |
| |
| @Override |
| protected ExtendedDismaxConfiguration createConfiguration(String qstr, |
| SolrParams localParams, SolrParams params, SolrQueryRequest req) { |
| return new MultilanguageDismaxConfiguration(localParams, params, req); |
| } |
| |
| class MultilanguageDismaxConfiguration extends ExtendedDismaxConfiguration { |
| |
| public MultilanguageDismaxConfiguration(SolrParams localParams, |
| SolrParams params, SolrQueryRequest req) { |
| super(localParams, params, req); |
| String language = params.get("language"); |
| if(language != null) { |
| super.queryFields = SolrPluginUtils.parseFieldBoosts(solrParams.getParams("qf_" + language)); |
| } |
| } |
| |
| } |
| |
| } |
| |
| |
| |
| class FuzzyDismaxQParser extends ExtendedDismaxQParser { |
| |
| public FuzzyDismaxQParser(String qstr, SolrParams localParams, |
| SolrParams params, SolrQueryRequest req) { |
| super(qstr, localParams, params, req); |
| } |
| |
| @Override |
| protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser, |
| String field) { |
| return new FuzzyQueryParser(qParser, field); |
| } |
| |
| class FuzzyQueryParser extends ExtendedSolrQueryParser{ |
| |
| private Set<String> frequentlyMisspelledWords; |
| |
| public FuzzyQueryParser(QParser parser, String defaultField) { |
| super(parser, defaultField); |
| frequentlyMisspelledWords = new HashSet<String>(); |
| frequentlyMisspelledWords.add("absence"); |
| } |
| |
| @Override |
| protected Query getFieldQuery(String field, |
| String val, boolean quoted) throws SyntaxError { |
| if(frequentlyMisspelledWords.contains(val)) { |
| return getFuzzyQuery(field, val, 0.75F); |
| } |
| return super.getFieldQuery(field, val, quoted); |
| } |
| } |
| } |
| } |