blob: ff19fa933bb53a265cc7c5b79f2ceb3db6dc2fa2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import java.io.IOException;
/**
* TestWildcard tests the '*' and '?' wildcard characters.
*/
public class TestWildcard extends LuceneTestCase {
public void testEquals() {
WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
WildcardQuery wq3 = new WildcardQuery(new Term("field", "b*a"));
// reflexive?
assertEquals(wq1, wq2);
assertEquals(wq2, wq1);
// transitive?
assertEquals(wq2, wq3);
assertEquals(wq1, wq3);
assertFalse(wq1.equals(null));
FuzzyQuery fq = new FuzzyQuery(new Term("field", "b*a"));
assertFalse(wq1.equals(fq));
assertFalse(fq.equals(wq1));
}
/**
* Tests if a WildcardQuery that has no wildcard in the term is rewritten to a single
* TermQuery. The boost should be preserved, and the rewrite should return
* a ConstantScoreQuery if the WildcardQuery had a ConstantScore rewriteMethod.
*/
public void testTermWithoutWildcard() throws IOException {
Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
MultiTermQuery wq = new WildcardQuery(new Term("field", "nowildcard"));
assertMatches(searcher, wq, 1);
wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
Query q = searcher.rewrite(wq);
assertTrue(q instanceof TermQuery);
wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
q = searcher.rewrite(wq);
assertTrue(q instanceof MultiTermQueryConstantScoreWrapper);
wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
q = searcher.rewrite(wq);
assertTrue(q instanceof ConstantScoreQuery);
reader.close();
indexStore.close();
}
/**
* Tests if a WildcardQuery with an empty term is rewritten to an empty BooleanQuery
*/
public void testEmptyTerm() throws IOException {
Directory indexStore = getIndexStore("field", new String[]{"nowildcard", "nowildcardx"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
MultiTermQuery wq = new WildcardQuery(new Term("field", ""));
wq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
assertMatches(searcher, wq, 0);
Query q = searcher.rewrite(wq);
assertTrue(q instanceof MatchNoDocsQuery);
reader.close();
indexStore.close();
}
/**
* Tests if a WildcardQuery that has only a trailing * in the term is
* rewritten to a single PrefixQuery. The boost and rewriteMethod should be
* preserved.
*/
public void testPrefixTerm() throws IOException {
Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
assertMatches(searcher, wq, 2);
wq = new WildcardQuery(new Term("field", "*"));
assertMatches(searcher, wq, 2);
Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), "field");
assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
reader.close();
indexStore.close();
}
/**
* Tests Wildcard queries with an asterisk.
*/
public void testAsterisk()
throws IOException {
Directory indexStore = getIndexStore("body", new String[]
{"metal", "metals"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
Query query1 = new TermQuery(new Term("body", "metal"));
Query query2 = new WildcardQuery(new Term("body", "metal*"));
Query query3 = new WildcardQuery(new Term("body", "m*tal"));
Query query4 = new WildcardQuery(new Term("body", "m*tal*"));
Query query5 = new WildcardQuery(new Term("body", "m*tals"));
BooleanQuery.Builder query6 = new BooleanQuery.Builder();
query6.add(query5, BooleanClause.Occur.SHOULD);
BooleanQuery.Builder query7 = new BooleanQuery.Builder();
query7.add(query3, BooleanClause.Occur.SHOULD);
query7.add(query5, BooleanClause.Occur.SHOULD);
// Queries do not automatically lower-case search terms:
Query query8 = new WildcardQuery(new Term("body", "M*tal*"));
assertMatches(searcher, query1, 1);
assertMatches(searcher, query2, 2);
assertMatches(searcher, query3, 1);
assertMatches(searcher, query4, 2);
assertMatches(searcher, query5, 1);
assertMatches(searcher, query6.build(), 1);
assertMatches(searcher, query7.build(), 2);
assertMatches(searcher, query8, 0);
assertMatches(searcher, new WildcardQuery(new Term("body", "*tall")), 0);
assertMatches(searcher, new WildcardQuery(new Term("body", "*tal")), 1);
assertMatches(searcher, new WildcardQuery(new Term("body", "*tal*")), 2);
reader.close();
indexStore.close();
}
/**
* Tests Wildcard queries with a question mark.
*
* @throws IOException if an error occurs
*/
public void testQuestionmark()
throws IOException {
Directory indexStore = getIndexStore("body", new String[]
{"metal", "metals", "mXtals", "mXtXls"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
Query query1 = new WildcardQuery(new Term("body", "m?tal"));
Query query2 = new WildcardQuery(new Term("body", "metal?"));
Query query3 = new WildcardQuery(new Term("body", "metals?"));
Query query4 = new WildcardQuery(new Term("body", "m?t?ls"));
Query query5 = new WildcardQuery(new Term("body", "M?t?ls"));
Query query6 = new WildcardQuery(new Term("body", "meta??"));
assertMatches(searcher, query1, 1);
assertMatches(searcher, query2, 1);
assertMatches(searcher, query3, 0);
assertMatches(searcher, query4, 3);
assertMatches(searcher, query5, 0);
assertMatches(searcher, query6, 1); // Query: 'meta??' matches 'metals' not 'metal'
reader.close();
indexStore.close();
}
/**
* Tests if wildcard escaping works
*/
public void testEscapes() throws Exception {
Directory indexStore = getIndexStore("field",
new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
// without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
assertMatches(searcher, unescaped, 4);
// with escape: only matches foo*bar
WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
assertMatches(searcher, escaped, 1);
// without escape: matches foo??bar and fooCDbar
unescaped = new WildcardQuery(new Term("field", "foo??bar"));
assertMatches(searcher, unescaped, 2);
// with escape: matches foo??bar only
escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
assertMatches(searcher, escaped, 1);
// check escaping at end: lenient parse yields "foo\"
WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
assertMatches(searcher, atEnd, 1);
reader.close();
indexStore.close();
}
private Directory getIndexStore(String field, String[] contents)
throws IOException {
Directory indexStore = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), indexStore);
for (int i = 0; i < contents.length; ++i) {
Document doc = new Document();
doc.add(newTextField(field, contents[i], Field.Store.YES));
writer.addDocument(doc);
}
writer.close();
return indexStore;
}
private void assertMatches(IndexSearcher searcher, Query q, int expectedMatches)
throws IOException {
ScoreDoc[] result = searcher.search(q, 1000).scoreDocs;
assertEquals(expectedMatches, result.length);
}
/**
* Test that wild card queries are parsed to the correct type and are searched correctly.
* This test looks at both parsing and execution of wildcard queries.
* Although placed here, it also tests prefix queries, verifying that
* prefix queries are not parsed into wild card queries, and vice-versa.
*/
public void testParsingAndSearching() throws Exception {
String field = "content";
String docs[] = {
"\\ abcdefg1",
"\\79 hijklmn1",
"\\\\ opqrstu1",
};
// queries that should find all docs
Query matchAll[] = {
new WildcardQuery(new Term(field, "*")),
new WildcardQuery(new Term(field, "*1")),
new WildcardQuery(new Term(field, "**1")),
new WildcardQuery(new Term(field, "*?")),
new WildcardQuery(new Term(field, "*?1")),
new WildcardQuery(new Term(field, "?*1")),
new WildcardQuery(new Term(field, "**")),
new WildcardQuery(new Term(field, "***")),
new WildcardQuery(new Term(field, "\\\\*"))
};
// queries that should find no docs
Query matchNone[] = {
new WildcardQuery(new Term(field, "a*h")),
new WildcardQuery(new Term(field, "a?h")),
new WildcardQuery(new Term(field, "*a*h")),
new WildcardQuery(new Term(field, "?a")),
new WildcardQuery(new Term(field, "a?"))
};
PrefixQuery matchOneDocPrefix[][] = {
{new PrefixQuery(new Term(field, "a")),
new PrefixQuery(new Term(field, "ab")),
new PrefixQuery(new Term(field, "abc"))}, // these should find only doc 0
{new PrefixQuery(new Term(field, "h")),
new PrefixQuery(new Term(field, "hi")),
new PrefixQuery(new Term(field, "hij")),
new PrefixQuery(new Term(field, "\\7"))}, // these should find only doc 1
{new PrefixQuery(new Term(field, "o")),
new PrefixQuery(new Term(field, "op")),
new PrefixQuery(new Term(field, "opq")),
new PrefixQuery(new Term(field, "\\\\"))}, // these should find only doc 2
};
WildcardQuery matchOneDocWild[][] = {
{new WildcardQuery(new Term(field, "*a*")), // these should find only doc 0
new WildcardQuery(new Term(field, "*ab*")),
new WildcardQuery(new Term(field, "*abc**")),
new WildcardQuery(new Term(field, "ab*e*")),
new WildcardQuery(new Term(field, "*g?")),
new WildcardQuery(new Term(field, "*f?1"))},
{new WildcardQuery(new Term(field, "*h*")), // these should find only doc 1
new WildcardQuery(new Term(field, "*hi*")),
new WildcardQuery(new Term(field, "*hij**")),
new WildcardQuery(new Term(field, "hi*k*")),
new WildcardQuery(new Term(field, "*n?")),
new WildcardQuery(new Term(field, "*m?1")),
new WildcardQuery(new Term(field, "hij**"))},
{new WildcardQuery(new Term(field, "*o*")), // these should find only doc 2
new WildcardQuery(new Term(field, "*op*")),
new WildcardQuery(new Term(field, "*opq**")),
new WildcardQuery(new Term(field, "op*q*")),
new WildcardQuery(new Term(field, "*u?")),
new WildcardQuery(new Term(field, "*t?1")),
new WildcardQuery(new Term(field, "opq**"))}
};
// prepare the index
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < docs.length; i++) {
Document doc = new Document();
doc.add(newTextField(field, docs[i], Field.Store.NO));
iw.addDocument(doc);
}
iw.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
// test queries that must find all
for (Query q : matchAll) {
if (VERBOSE) System.out.println("matchAll: q=" + q + " " + q.getClass().getName());
ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
assertEquals(docs.length, hits.length);
}
// test queries that must find none
for (Query q : matchNone) {
if (VERBOSE) System.out.println("matchNone: q=" + q + " " + q.getClass().getName());
ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
assertEquals(0, hits.length);
}
// thest the prefi queries find only one doc
for (int i = 0; i < matchOneDocPrefix.length; i++) {
for (int j = 0; j < matchOneDocPrefix[i].length; j++) {
Query q = matchOneDocPrefix[i][j];
if (VERBOSE) System.out.println("match 1 prefix: doc="+docs[i]+" q="+q+" "+q.getClass().getName());
ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
assertEquals(1,hits.length);
assertEquals(i,hits[0].doc);
}
}
// test the wildcard queries find only one doc
for (int i = 0; i < matchOneDocWild.length; i++) {
for (int j = 0; j < matchOneDocWild[i].length; j++) {
Query q = matchOneDocWild[i][j];
if (VERBOSE) System.out.println("match 1 wild: doc="+docs[i]+" q="+q+" "+q.getClass().getName());
ScoreDoc[] hits = searcher.search(q, 1000).scoreDocs;
assertEquals(1,hits.length);
assertEquals(i,hits[0].doc);
}
}
reader.close();
dir.close();
}
}