| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search; |
| |
| |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.MultiTerms; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.Terms; |
| |
| import java.io.IOException; |
| |
| /** |
| * TestWildcard tests the '*' and '?' wildcard characters. |
| */ |
| public class TestWildcard extends LuceneTestCase { |
| |
| public void testEquals() { |
| WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a")); |
| WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a")); |
| WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a")); |
| |
| // reflexive? |
| assertEquals(wq1, wq2); |
| assertEquals(wq2, wq1); |
| |
| // transitive? |
| assertEquals(wq2, wq3); |
| assertEquals(wq1, wq3); |
| |
| assertFalse(wq1.equals(null)); |
| |
| FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a")); |
| assertFalse(wq1.equals(fq)); |
| assertFalse(fq.equals(wq1)); |
| } |
| |
| /** |
| * Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single |
| * TermQuery. The boost should be preserved, and the rewrite should return |
| * a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod. |
| */ |
| public void testTermWithoutWildcard() throws IOException { |
| Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| |
| MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard")); |
| assertMatches(searcher, wq, 1); |
| |
| wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); |
| Query q = searcher.rewrite(wq); |
| assertTrue(q instanceof TermQuery); |
| |
| wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE); |
| q = searcher.rewrite(wq); |
| assertTrue(q instanceof MultiTermQueryConstantScoreWrapper); |
| |
| wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE); |
| q = searcher.rewrite(wq); |
| assertTrue(q instanceof ConstantScoreQuery); |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| /** |
| * Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery |
| */ |
| public void testEmptyTerm() throws IOException { |
| Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| |
| MultiTermQuery wq = new WildcardQuery(new Term("field", "")); |
| wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE); |
| assertMatches(searcher, wq, 0); |
| Query q = searcher.rewrite(wq); |
| assertTrue(q instanceof MatchNoDocsQuery); |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| /** |
| * Tests if a WildcardQuery that has only a trailing * in the term is |
| * rewritten to a single PrefixQuery. The boost and rewriteMethod should be |
| * preserved. |
| */ |
| public void testPrefixTerm() throws IOException { |
| Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| |
| MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*")); |
| assertMatches(searcher, wq, 2); |
| |
| wq = new WildcardQuery(new Term("field", "*")); |
| assertMatches(searcher, wq, 2); |
| Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), "field"); |
| assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum")); |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| /** |
| * Tests Wildcard queries with an asterisk. |
| */ |
| public void testAsterisk() |
| throws IOException { |
| Directory indexStore = getIndexStore("body", new String[] |
| {"metal", "metals"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| Query query1 = new TermQuery(new Term("body", "metal")); |
| Query query2 = new WildcardQuery(new Term("body", "metal*")); |
| Query query3 = new WildcardQuery(new Term("body", "m*tal")); |
| Query query4 = new WildcardQuery(new Term("body", "m*tal*")); |
| Query query5 = new WildcardQuery(new Term("body", "m*tals")); |
| |
| BooleanQuery.Builder query6 = new BooleanQuery.Builder(); |
| query6.add(query5, BooleanClause.Occur.SHOULD); |
| |
| BooleanQuery.Builder query7 = new BooleanQuery.Builder(); |
| query7.add(query3, BooleanClause.Occur.SHOULD); |
| query7.add(query5, BooleanClause.Occur.SHOULD); |
| |
| // Queries do not automatically lower-case search terms: |
| Query query8 = new WildcardQuery(new Term("body", "M*tal*")); |
| |
| assertMatches(searcher, query1, 1); |
| assertMatches(searcher, query2, 2); |
| assertMatches(searcher, query3, 1); |
| assertMatches(searcher, query4, 2); |
| assertMatches(searcher, query5, 1); |
| assertMatches(searcher, query6.build(), 1); |
| assertMatches(searcher, query7.build(), 2); |
| assertMatches(searcher, query8, 0); |
| assertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0); |
| assertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1); |
| assertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2); |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| /** |
| * Tests Wildcard queries with a question mark. |
| * |
| * @throws IOException if an error occurs |
| */ |
| public void testQuestionmark() |
| throws IOException { |
| Directory indexStore = getIndexStore("body", new String[] |
| {"metal", "metals", "mXtals", "mXtXls"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| Query query1 = new WildcardQuery(new Term("body", "m?tal")); |
| Query query2 = new WildcardQuery(new Term("body", "metal?")); |
| Query query3 = new WildcardQuery(new Term("body", "metals?")); |
| Query query4 = new WildcardQuery(new Term("body", "m?t?ls")); |
| Query query5 = new WildcardQuery(new Term("body", "M?t?ls")); |
| Query query6 = new WildcardQuery(new Term("body", "meta??")); |
| |
| assertMatches(searcher, query1, 1); |
| assertMatches(searcher, query2, 1); |
| assertMatches(searcher, query3, 0); |
| assertMatches(searcher, query4, 3); |
| assertMatches(searcher, query5, 0); |
| assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal' |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| /** |
| * Tests if wildcard escaping works |
| */ |
| public void testEscapes() throws Exception { |
| Directory indexStore = getIndexStore("field", |
| new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"}); |
| IndexReader reader = DirectoryReader.open(indexStore); |
| IndexSearcher searcher = newSearcher(reader); |
| |
| // without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar |
| WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar")); |
| assertMatches(searcher, unescaped, 4); |
| |
| // with escape: only matches foo*bar |
| WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar")); |
| assertMatches(searcher, escaped, 1); |
| |
| // without escape: matches foo??bar and fooCDbar |
| unescaped = new WildcardQuery(new Term("field", "foo??bar")); |
| assertMatches(searcher, unescaped, 2); |
| |
| // with escape: matches foo??bar only |
| escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar")); |
| assertMatches(searcher, escaped, 1); |
| |
| // check escaping at end: lenient parse yields "foo\" |
| WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\")); |
| assertMatches(searcher, atEnd, 1); |
| |
| reader.close(); |
| indexStore.close(); |
| } |
| |
| private Directory getIndexStore(String field, String[] contents) |
| throws IOException { |
| Directory indexStore = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore); |
| for (int i = 0; i < contents.length; ++i) { |
| Document doc = new Document(); |
| doc.add(newTextField(field, contents[i], Field.Store.YES)); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| return indexStore; |
| } |
| |
| private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches) |
| throws IOException { |
| ScoreDoc[] result = searcher.search(q, 1000).scoreDocs; |
| assertEquals(expectedMatches, result.length); |
| } |
| |
| /** |
| * Test that wild card queries are parsed to the correct type and are searched correctly. |
| * This test looks at both parsing and execution of wildcard queries. |
| * Although placed here, it also tests prefix queries, verifying that |
| * prefix queries are not parsed into wild card queries, and vice-versa. |
| */ |
| public void testParsingAndSearching() throws Exception { |
| String field = "content"; |
| String docs[] = { |
| "\\ abcdefg1", |
| "\\79 hijklmn1", |
| "\\\\ opqrstu1", |
| }; |
| |
| // queries that should find all docs |
| Query matchAll[] = { |
| new WildcardQuery(new Term(field, "*")), |
| new WildcardQuery(new Term(field, "*1")), |
| new WildcardQuery(new Term(field, "**1")), |
| new WildcardQuery(new Term(field, "*?")), |
| new WildcardQuery(new Term(field, "*?1")), |
| new WildcardQuery(new Term(field, "?*1")), |
| new WildcardQuery(new Term(field, "**")), |
| new WildcardQuery(new Term(field, "***")), |
| new WildcardQuery(new Term(field, "\\\\*")) |
| }; |
| |
| // queries that should find no docs |
| Query matchNone[] = { |
| new WildcardQuery(new Term(field, "a*h")), |
| new WildcardQuery(new Term(field, "a?h")), |
| new WildcardQuery(new Term(field, "*a*h")), |
| new WildcardQuery(new Term(field, "?a")), |
| new WildcardQuery(new Term(field, "a?")) |
| }; |
| |
| PrefixQuery matchOneDocPrefix[][] = { |
| {new PrefixQuery(new Term(field, "a")), |
| new PrefixQuery(new Term(field, "ab")), |
| new PrefixQuery(new Term(field, "abc"))}, // these should find only doc 0 |
| |
| {new PrefixQuery(new Term(field, "h")), |
| new PrefixQuery(new Term(field, "hi")), |
| new PrefixQuery(new Term(field, "hij")), |
| new PrefixQuery(new Term(field, "\\7"))}, // these should find only doc 1 |
| |
| {new PrefixQuery(new Term(field, "o")), |
| new PrefixQuery(new Term(field, "op")), |
| new PrefixQuery(new Term(field, "opq")), |
| new PrefixQuery(new Term(field, "\\\\"))}, // these should find only doc 2 |
| }; |
| |
| WildcardQuery matchOneDocWild[][] = { |
| |
| {new WildcardQuery(new Term(field, "*a*")), // these should find only doc 0 |
| new WildcardQuery(new Term(field, "*ab*")), |
| new WildcardQuery(new Term(field, "*abc**")), |
| new WildcardQuery(new Term(field, "ab*e*")), |
| new WildcardQuery(new Term(field, "*g?")), |
| new WildcardQuery(new Term(field, "*f?1"))}, |
| |
| {new WildcardQuery(new Term(field, "*h*")), // these should find only doc 1 |
| new WildcardQuery(new Term(field, "*hi*")), |
| new WildcardQuery(new Term(field, "*hij**")), |
| new WildcardQuery(new Term(field, "hi*k*")), |
| new WildcardQuery(new Term(field, "*n?")), |
| new WildcardQuery(new Term(field, "*m?1")), |
| new WildcardQuery(new Term(field, "hij**"))}, |
| |
| {new WildcardQuery(new Term(field, "*o*")), // these should find only doc 2 |
| new WildcardQuery(new Term(field, "*op*")), |
| new WildcardQuery(new Term(field, "*opq**")), |
| new WildcardQuery(new Term(field, "op*q*")), |
| new WildcardQuery(new Term(field, "*u?")), |
| new WildcardQuery(new Term(field, "*t?1")), |
| new WildcardQuery(new Term(field, "opq**"))} |
| }; |
| |
| // prepare the index |
| Directory dir = newDirectory(); |
| RandomIndexWriter iw = new RandomIndexWriter(random(), dir, |
| newIndexWriterConfig(new MockAnalyzer(random())) |
| .setMergePolicy(newLogMergePolicy())); |
| for (int i = 0; i < docs.length; i++) { |
| Document doc = new Document(); |
| doc.add(newTextField(field, docs[i], Field.Store.NO)); |
| iw.addDocument(doc); |
| } |
| iw.close(); |
| |
| IndexReader reader = DirectoryReader.open(dir); |
| IndexSearcher searcher = newSearcher(reader); |
| |
| // test queries that must find all |
| for (Query q : matchAll) { |
| if (VERBOSE) System.out.println("matchAll: q=" + q + " " + q.getClass().getName()); |
| ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs; |
| assertEquals(docs.length, hits.length); |
| } |
| |
| // test queries that must find none |
| for (Query q : matchNone) { |
| if (VERBOSE) System.out.println("matchNone: q=" + q + " " + q.getClass().getName()); |
| ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs; |
| assertEquals(0, hits.length); |
| } |
| |
| // thest the prefi queries find only one doc |
| for (int i = 0; i < matchOneDocPrefix.length; i++) { |
| for (int j = 0; j < matchOneDocPrefix[i].length; j++) { |
| Query q = matchOneDocPrefix[i][j]; |
| if (VERBOSE) System.out.println("match 1 prefix: doc="+docs[i]+" q="+q+" "+q.getClass().getName()); |
| ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs; |
| assertEquals(1,hits.length); |
| assertEquals(i,hits[0].doc); |
| } |
| } |
| |
| // test the wildcard queries find only one doc |
| for (int i = 0; i < matchOneDocWild.length; i++) { |
| for (int j = 0; j < matchOneDocWild[i].length; j++) { |
| Query q = matchOneDocWild[i][j]; |
| if (VERBOSE) System.out.println("match 1 wild: doc="+docs[i]+" q="+q+" "+q.getClass().getName()); |
| ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs; |
| assertEquals(1,hits.length); |
| assertEquals(i,hits[0].doc); |
| } |
| } |
| |
| reader.close(); |
| dir.close(); |
| } |
| } |