| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.analysis; |
| import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; |
| import static org.mockito.Mockito.spy; |
| import static org.mockito.Mockito.times; |
| import static org.mockito.Mockito.verify; |
| |
| import java.io.IOException; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.search.AutomatonQuery; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.util.automaton.Automaton; |
| import org.apache.lucene.util.automaton.Operations; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.apache.solr.parser.CharStream; |
| import org.apache.solr.parser.ParseException; |
| import org.apache.solr.parser.SolrQueryParserBase; |
| import org.apache.solr.request.SolrQueryRequest; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.IndexSchema; |
| import org.apache.solr.schema.IndexSchemaFactory; |
| import org.apache.solr.search.QParser; |
| import org.apache.solr.search.SolrQueryParser; |
| import org.apache.solr.search.SyntaxError; |
| import org.junit.Before; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| |
| public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { |
| |
| Map<String,String> args = new HashMap<>(); |
| IndexSchema schema; |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| assumeWorkingMockito(); |
| initCore("solrconfig.xml","schema-reversed.xml"); |
| } |
| |
| @Override |
| @Before |
| public void setUp() throws Exception { |
| super.setUp(); |
| schema = IndexSchemaFactory.buildIndexSchema(getSchemaFile(), solrConfig); |
| clearIndex(); |
| assertU(commit()); |
| } |
| |
| @Test |
| public void testReversedTokens() throws IOException { |
| String text = "simple text"; |
| args.put("withOriginal", "true"); |
| ReversedWildcardFilterFactory factory = new ReversedWildcardFilterFactory(args); |
| TokenStream input = factory.create(whitespaceMockTokenizer(text)); |
| assertTokenStreamContents(input, |
| new String[] { "\u0001elpmis", "simple", "\u0001txet", "text" }, |
| new int[] { 1, 0, 1, 0 }); |
| |
| // now without original tokens |
| args.put("withOriginal", "false"); |
| factory = new ReversedWildcardFilterFactory(args); |
| input = factory.create(whitespaceMockTokenizer(text)); |
| assertTokenStreamContents(input, |
| new String[] { "\u0001elpmis", "\u0001txet" }, |
| new int[] { 1, 1 }); |
| } |
| |
| @Test |
| public void testIndexingAnalysis() throws Exception { |
| Analyzer a = schema.getIndexAnalyzer(); |
| String text = "one two three si\uD834\uDD1Ex"; |
| |
| // field one |
| TokenStream input = a.tokenStream("one", text); |
| assertTokenStreamContents(input, |
| new String[] { "\u0001eno", "one", "\u0001owt", "two", |
| "\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" }, |
| new int[] { 0, 0, 4, 4, 8, 8, 14, 14 }, |
| new int[] { 3, 3, 7, 7, 13, 13, 19, 19 }, |
| new int[] { 1, 0, 1, 0, 1, 0, 1, 0 } |
| ); |
| // field two |
| input = a.tokenStream("two", text); |
| assertTokenStreamContents(input, |
| new String[] { "\u0001eno", "\u0001owt", |
| "\u0001eerht", "\u0001x\uD834\uDD1Eis" }, |
| new int[] { 0, 4, 8, 14 }, |
| new int[] { 3, 7, 13, 19 }, |
| new int[] { 1, 1, 1, 1 } |
| ); |
| // field three |
| input = a.tokenStream("three", text); |
| assertTokenStreamContents(input, |
| new String[] { "one", "two", "three", "si\uD834\uDD1Ex" }, |
| new int[] { 0, 4, 8, 14 }, |
| new int[] { 3, 7, 13, 19 } |
| ); |
| } |
| |
| @Test |
| public void testQueryParsing() throws Exception { |
| |
| // add some docs |
| assertU(adoc("id", "1", "one", "one")); |
| assertU(adoc("id", "2", "two", "two")); |
| assertU(adoc("id", "3", "three", "three")); |
| assertU(adoc("id", "4", "one", "four")); |
| assertU(adoc("id", "5", "two", "five")); |
| assertU(adoc("id", "6", "three", "si\uD834\uDD1Ex")); |
| assertU(commit()); |
| |
| assertQ("should have matched", |
| req("+id:1 +one:one"), |
| "//result[@numFound=1]"); |
| |
| assertQ("should have matched", |
| req("+id:4 +one:f*ur"), |
| "//result[@numFound=1]"); |
| |
| assertQ("should have matched", |
| req("+id:6 +three:*si\uD834\uDD1Ex"), |
| "//result[@numFound=1]"); |
| |
| SolrQueryRequest req = req(); |
| QParser qparser = QParser.getParser("id:1", req); |
| |
| SolrQueryParser parserTwo = new SolrQueryParser(qparser, "two"); |
| assertTrue(parserTwo.getAllowLeadingWildcard()); |
| |
| // test conditional reversal |
| assertTrue(wasReversed(parserTwo, "*hree")); |
| assertTrue(wasReversed(parserTwo, "t*ree")); |
| assertTrue(wasReversed(parserTwo, "th*ee")); |
| assertFalse(wasReversed(parserTwo, "thr*e")); |
| assertTrue(wasReversed(parserTwo, "?hree")); |
| assertTrue(wasReversed(parserTwo, "t?ree")); |
| assertFalse(wasReversed(parserTwo, "th?ee")); |
| assertFalse(wasReversed(parserTwo, "th?*ee")); |
| assertFalse(wasReversed(parserTwo, "short*token")); |
| assertTrue(wasReversed(parserTwo, "ver*longtoken")); |
| |
| req.close(); |
| } |
| |
| /** fragile assert: depends on our implementation, but cleanest way to check for now */ |
| private boolean wasReversed(SolrQueryParser qp, String query) throws Exception { |
| Query q = qp.parse(query); |
| if (!(q instanceof AutomatonQuery)) { |
| return false; |
| } |
| Automaton automaton = ((AutomatonQuery) q).getAutomaton(); |
| String prefix = Operations.getCommonPrefix(Operations.determinize(automaton, |
| Operations.DEFAULT_DETERMINIZE_WORK_LIMIT)); |
| return prefix.length() > 0 && prefix.charAt(0) == '\u0001'; |
| } |
| |
| @Test |
| public void testFalsePositives() throws Exception { |
| // add a doc |
| assertU(adoc("id", "1", "one", "gomez", "two", "gomez", "three", "gomez")); |
| assertU(commit()); |
| |
| assertQ("false positive", |
| req("+id:1 +one:*zemog*"), |
| "//result[@numFound=0]"); |
| |
| assertQ("no reverse, no false positive", |
| req("q", "+id:1 +three:[* TO a]", |
| "debugQuery", "true"), |
| "//result[@numFound=0]"); |
| { |
| String reverseField = random().nextBoolean() ? "one":"two"; |
| assertQ("false positive", |
| req("q", "+id:1 +"+reverseField+":[* TO a]", |
| "debugQuery", "true"), |
| "//result[@numFound=0]"); |
| } |
| assertQ("false positive", |
| req("+id:1 +two:*zemog*"), |
| "//result[@numFound=0]"); |
| assertQ("false positive", |
| req("+id:1 +three:*zemog*"), |
| "//result[@numFound=0]"); |
| |
| assertQ("should have matched", |
| req("+id:1 +one:*omez*"), |
| "//result[@numFound=1]"); |
| } |
| |
| private static final class SolrQParser extends SolrQueryParserBase { |
| @Override |
| public Query TopLevelQuery(String field) throws ParseException, SyntaxError { |
| return null; |
| } |
| |
| @Override |
| public void ReInit(CharStream stream) {} |
| |
| @Override |
| protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) { |
| return super.getReversedWildcardFilterFactory(fieldType); |
| } |
| } |
| |
| @Test |
| public void testCachingInQueryParser() { |
| SolrQParser parser = new SolrQParser(); |
| |
| SolrQueryRequest req = req(); |
| String[] fields = new String[]{"one", "two", "three"}; |
| String aField = fields[random().nextInt(fields.length)]; |
| FieldType type = req.getSchema().getField(aField).getType(); |
| |
| FieldType typeSpy = spy(type); |
| // calling twice |
| parser.getReversedWildcardFilterFactory(typeSpy); |
| parser.getReversedWildcardFilterFactory(typeSpy); |
| // but it should reach only once |
| verify(typeSpy, times(1)).getIndexAnalyzer(); |
| } |
| } |