blob: c60656605beaf1707bed7f7d0d8db29e063119d9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.classic;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockBytesAnalyzer;
import org.apache.lucene.analysis.MockLowerCaseFilter;
import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.queryparser.util.QueryParserTestBase;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
/**
* Tests QueryParser.
*/
public class TestQueryParser extends QueryParserTestBase {
protected boolean splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
private static final String FIELD = "field";
public static class QPTestParser extends QueryParser {
public QPTestParser(String f, Analyzer a) {
super(f, a);
}
@Override
protected Query getFuzzyQuery(String field, String termStr,
float minSimilarity) throws ParseException {
throw new ParseException("Fuzzy queries not allowed");
}
@Override
protected Query getWildcardQuery(String field, String termStr)
throws ParseException {
throw new ParseException("Wildcard queries not allowed");
}
}
public QueryParser getParser(Analyzer a) throws Exception {
if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(getDefaultField(), a);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
qp.setSplitOnWhitespace(splitOnWhitespace);
return qp;
}
@Override
public CommonQueryParserConfiguration getParserConfig(Analyzer a)
throws Exception {
return getParser(a);
}
@Override
public Query getQuery(String query, CommonQueryParserConfiguration cqpC)
throws Exception {
assert cqpC != null : "Parameter must not be null";
assert (cqpC instanceof QueryParser) : "Parameter must be instance of QueryParser";
QueryParser qp = (QueryParser) cqpC;
return qp.parse(query);
}
@Override
public Query getQuery(String query, Analyzer a) throws Exception {
return getParser(a).parse(query);
}
@Override
public boolean isQueryParserException(Exception exception) {
return exception instanceof ParseException;
}
@Override
public void setDefaultOperatorOR(CommonQueryParserConfiguration cqpC) {
assert (cqpC instanceof QueryParser);
QueryParser qp = (QueryParser) cqpC;
qp.setDefaultOperator(Operator.OR);
}
@Override
public void setDefaultOperatorAND(CommonQueryParserConfiguration cqpC) {
assert (cqpC instanceof QueryParser);
QueryParser qp = (QueryParser) cqpC;
qp.setDefaultOperator(Operator.AND);
}
@Override
public void setAutoGeneratePhraseQueries(CommonQueryParserConfiguration cqpC,
boolean value) {
assert (cqpC instanceof QueryParser);
QueryParser qp = (QueryParser) cqpC;
qp.setAutoGeneratePhraseQueries(value);
}
@Override
public void setDateResolution(CommonQueryParserConfiguration cqpC,
CharSequence field, Resolution value) {
assert (cqpC instanceof QueryParser);
QueryParser qp = (QueryParser) cqpC;
qp.setDateResolution(field.toString(), value);
}
@Override
public void testDefaultOperator() throws Exception {
QueryParser qp = getParser(new MockAnalyzer(random()));
// make sure OR is the default:
assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
setDefaultOperatorAND(qp);
assertEquals(QueryParserBase.AND_OPERATOR, qp.getDefaultOperator());
setDefaultOperatorOR(qp);
assertEquals(QueryParserBase.OR_OPERATOR, qp.getDefaultOperator());
}
// LUCENE-2002: when we run javacc to regen QueryParser,
// we also run a replaceregexp step to fix 2 of the public
// ctors (change them to protected):
//
// protected QueryParser(CharStream stream)
//
// protected QueryParser(QueryParserTokenManager tm)
//
// This test is here as a safety, in case that ant step
// doesn't work for some reason.
@SuppressWarnings("rawtype")
public void testProtectedCtors() throws Exception {
expectThrows(NoSuchMethodException.class, () -> QueryParser.class.getConstructor(CharStream.class));
expectThrows(NoSuchMethodException.class, () -> QueryParser.class.getConstructor(QueryParserTokenManager.class));
}
public void testFuzzySlopeExtendability() throws ParseException {
QueryParser qp = new QueryParser("a", new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
throws ParseException {
if(fuzzySlop.image.endsWith("€")) {
float fms = fuzzyMinSim;
try {
fms = Float.parseFloat(fuzzySlop.image.substring(1, fuzzySlop.image.length()-1));
} catch (Exception ignored) { }
float value = Float.parseFloat(termImage);
return getRangeQuery(qfield, Float.toString(value-fms/2.f), Float.toString(value+fms/2.f), true, true);
}
return super.handleBareFuzzy(qfield, fuzzySlop, termImage);
}
};
assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}
@Override
public void testStarParsing() throws Exception {
final int[] type = new int[1];
QueryParser qp = new QueryParser(FIELD,
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {
@Override
protected Query getWildcardQuery(String field, String termStr) {
// override error checking of superclass
type[0] = 1;
return new TermQuery(new Term(field, termStr));
}
@Override
protected Query getPrefixQuery(String field, String termStr) {
// override error checking of superclass
type[0] = 2;
return new TermQuery(new Term(field, termStr));
}
@Override
protected Query getFieldQuery(String field, String queryText,
boolean quoted) throws ParseException {
type[0] = 3;
return super.getFieldQuery(field, queryText, quoted);
}
};
TermQuery tq;
tq = (TermQuery) qp.parse("foo:zoo*");
assertEquals("zoo", tq.getTerm().text());
assertEquals(2, type[0]);
BoostQuery bq = (BoostQuery) qp.parse("foo:zoo*^2");
tq = (TermQuery) bq.getQuery();
assertEquals("zoo", tq.getTerm().text());
assertEquals(2, type[0]);
assertEquals(bq.getBoost(), 2, 0);
tq = (TermQuery) qp.parse("foo:*");
assertEquals("*", tq.getTerm().text());
assertEquals(1, type[0]); // could be a valid prefix query in the future too
bq = (BoostQuery) qp.parse("foo:*^2");
tq = (TermQuery) bq.getQuery();
assertEquals("*", tq.getTerm().text());
assertEquals(1, type[0]);
assertEquals(bq.getBoost(), 2, 0);
tq = (TermQuery) qp.parse("*:foo");
assertEquals("*", tq.getTerm().field());
assertEquals("foo", tq.getTerm().text());
assertEquals(3, type[0]);
tq = (TermQuery) qp.parse("*:*");
assertEquals("*", tq.getTerm().field());
assertEquals("*", tq.getTerm().text());
assertEquals(1, type[0]); // could be handled as a prefix query in the
// future
tq = (TermQuery) qp.parse("(*:*)");
assertEquals("*", tq.getTerm().field());
assertEquals("*", tq.getTerm().text());
assertEquals(1, type[0]);
}
// Wildcard queries should not be allowed
public void testCustomQueryParserWildcard() {
expectThrows(ParseException.class, () -> {
new QPTestParser("contents", new MockAnalyzer(random(),
MockTokenizer.WHITESPACE, false)).parse("a?t");
});
}
// Fuzzy queries should not be allowed
public void testCustomQueryParserFuzzy() throws Exception {
expectThrows(ParseException.class, () -> {
new QPTestParser("contents", new MockAnalyzer(random(),
MockTokenizer.WHITESPACE, false)).parse("xunit~");
});
}
/** query parser that doesn't expand synonyms when users use double quotes */
private class SmartQueryParser extends QueryParser {
Analyzer morePrecise = new Analyzer2();
public SmartQueryParser() {
super(FIELD, new Analyzer1());
}
@Override
protected Query getFieldQuery(String field, String queryText, boolean quoted)
throws ParseException {
if (quoted) return newFieldQuery(morePrecise, field, queryText, quoted);
else return super.getFieldQuery(field, queryText, quoted);
}
}
@Override
public void testNewFieldQuery() throws Exception {
/** ordinary behavior, synonyms form uncoordinated boolean query */
QueryParser dumb = new QueryParser(FIELD,
new Analyzer1());
Query expanded = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "dogs"))
.addTerm(new Term(FIELD, "dog"))
.build();
assertEquals(expanded, dumb.parse("\"dogs\""));
/** even with the phrase operator the behavior is the same */
assertEquals(expanded, dumb.parse("dogs"));
/**
* custom behavior, the synonyms are expanded, unless you use quote operator
*/
QueryParser smart = new SmartQueryParser();
assertEquals(expanded, smart.parse("dogs"));
Query unexpanded = new TermQuery(new Term(FIELD, "dogs"));
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
/** simple synonyms test */
public void testSynonyms() throws Exception {
Query expected = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "dogs"))
.addTerm(new Term(FIELD, "dog"))
.build();
QueryParser qp = new QueryParser(FIELD, new MockSynonymAnalyzer());
assertEquals(expected, qp.parse("dogs"));
assertEquals(expected, qp.parse("\"dogs\""));
qp.setDefaultOperator(Operator.AND);
assertEquals(expected, qp.parse("dogs"));
assertEquals(expected, qp.parse("\"dogs\""));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("dogs^2"));
assertEquals(expected, qp.parse("\"dogs\"^2"));
}
/** forms multiphrase query */
public void testSynonymsPhrase() throws Exception {
MultiPhraseQuery.Builder expectedQBuilder = new MultiPhraseQuery.Builder();
expectedQBuilder.add(new Term(FIELD, "old"));
expectedQBuilder.add(new Term[] { new Term(FIELD, "dogs"), new Term(FIELD, "dog") });
QueryParser qp = new QueryParser(FIELD, new MockSynonymAnalyzer());
assertEquals(expectedQBuilder.build(), qp.parse("\"old dogs\""));
qp.setDefaultOperator(Operator.AND);
assertEquals(expectedQBuilder.build(), qp.parse("\"old dogs\""));
BoostQuery expected = new BoostQuery(expectedQBuilder.build(), 2f);
assertEquals(expected, qp.parse("\"old dogs\"^2"));
expectedQBuilder.setSlop(3);
expected = new BoostQuery(expectedQBuilder.build(), 2f);
assertEquals(expected, qp.parse("\"old dogs\"~3^2"));
}
/**
* adds synonym of "國" for "国".
*/
protected static class MockCJKSynonymFilter extends TokenFilter {
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
boolean addSynonym = false;
public MockCJKSynonymFilter(TokenStream input) {
super(input);
}
@Override
public final boolean incrementToken() throws IOException {
if (addSynonym) { // inject our synonym
clearAttributes();
termAtt.setEmpty().append("國");
posIncAtt.setPositionIncrement(0);
addSynonym = false;
return true;
}
if (input.incrementToken()) {
addSynonym = termAtt.toString().equals("国");
return true;
} else {
return false;
}
}
}
static class MockCJKSynonymAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new SimpleCJKTokenizer();
return new TokenStreamComponents(tokenizer, new MockCJKSynonymFilter(tokenizer));
}
}
/** simple CJK synonym test */
public void testCJKSynonym() throws Exception {
Query expected = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
assertEquals(expected, qp.parse("国"));
qp.setDefaultOperator(Operator.AND);
assertEquals(expected, qp.parse("国"));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("国^2"));
}
/** synonyms with default OR operator */
public void testCJKSynonymsOR() throws Exception {
BooleanQuery.Builder expectedB = new BooleanQuery.Builder();
expectedB.add(new TermQuery(new Term(FIELD, "中")), BooleanClause.Occur.SHOULD);
Query inner = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner, BooleanClause.Occur.SHOULD);
Query expected = expectedB.build();
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
assertEquals(expected, qp.parse("中国"));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("中国^2"));
}
/** more complex synonyms with default OR operator */
public void testCJKSynonymsOR2() throws Exception {
BooleanQuery.Builder expectedB = new BooleanQuery.Builder();
expectedB.add(new TermQuery(new Term(FIELD, "中")), BooleanClause.Occur.SHOULD);
SynonymQuery inner = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner, BooleanClause.Occur.SHOULD);
SynonymQuery inner2 = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner2, BooleanClause.Occur.SHOULD);
Query expected = expectedB.build();
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
assertEquals(expected, qp.parse("中国国"));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("中国国^2"));
}
/** synonyms with default AND operator */
public void testCJKSynonymsAND() throws Exception {
BooleanQuery.Builder expectedB = new BooleanQuery.Builder();
expectedB.add(new TermQuery(new Term(FIELD, "中")), BooleanClause.Occur.MUST);
Query inner = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner, BooleanClause.Occur.MUST);
Query expected = expectedB.build();
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
qp.setDefaultOperator(Operator.AND);
assertEquals(expected, qp.parse("中国"));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("中国^2"));
}
/** more complex synonyms with default AND operator */
public void testCJKSynonymsAND2() throws Exception {
BooleanQuery.Builder expectedB = new BooleanQuery.Builder();
expectedB.add(new TermQuery(new Term(FIELD, "中")), BooleanClause.Occur.MUST);
Query inner = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner, BooleanClause.Occur.MUST);
Query inner2 = new SynonymQuery.Builder(FIELD)
.addTerm(new Term(FIELD, "国"))
.addTerm(new Term(FIELD, "國"))
.build();
expectedB.add(inner2, BooleanClause.Occur.MUST);
Query expected = expectedB.build();
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
qp.setDefaultOperator(Operator.AND);
assertEquals(expected, qp.parse("中国国"));
expected = new BoostQuery(expected, 2f);
assertEquals(expected, qp.parse("中国国^2"));
}
/** forms multiphrase query */
public void testCJKSynonymsPhrase() throws Exception {
MultiPhraseQuery.Builder expectedQBuilder = new MultiPhraseQuery.Builder();
expectedQBuilder.add(new Term(FIELD, "中"));
expectedQBuilder.add(new Term[] { new Term(FIELD, "国"), new Term(FIELD, "國")});
QueryParser qp = new QueryParser(FIELD, new MockCJKSynonymAnalyzer());
qp.setDefaultOperator(Operator.AND);
assertEquals(expectedQBuilder.build(), qp.parse("\"中国\""));
Query expected = new BoostQuery(expectedQBuilder.build(), 2f);
assertEquals(expected, qp.parse("\"中国\"^2"));
expectedQBuilder.setSlop(3);
expected = new BoostQuery(expectedQBuilder.build(), 2f);
assertEquals(expected, qp.parse("\"中国\"~3^2"));
}
/** LUCENE-6677: make sure wildcard query respects determinizeWorkLimit. */
public void testWildcardDeterminizeWorkLimit() throws Exception {
QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
qp.setDeterminizeWorkLimit(1);
expectThrows(TooComplexToDeterminizeException.class, () -> {
qp.parse("a*aaaaaaa");
});
}
// TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
@Override
public void testQPA() throws Exception {
boolean oldSplitOnWhitespace = splitOnWhitespace;
splitOnWhitespace = false;
assertQueryEquals("term phrase term", qpAnalyzer, "term phrase1 phrase2 term");
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
setDefaultOperatorAND(cqpc);
assertQueryEquals(cqpc, "field", "term phrase term", "+term +phrase1 +phrase2 +term");
splitOnWhitespace = oldSplitOnWhitespace;
}
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testMultiWordSynonyms() throws Exception {
QueryParser dumb = new QueryParser("field", new Analyzer1());
dumb.setSplitOnWhitespace(false);
TermQuery guinea = new TermQuery(new Term("field", "guinea"));
TermQuery pig = new TermQuery(new Term("field", "pig"));
TermQuery cavy = new TermQuery(new Term("field", "cavy"));
// A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
BooleanQuery.Builder synonym = new BooleanQuery.Builder();
synonym.add(guinea, BooleanClause.Occur.MUST);
synonym.add(pig, BooleanClause.Occur.MUST);
BooleanQuery guineaPig = synonym.build();
PhraseQuery phraseGuineaPig = new PhraseQuery.Builder()
.add(new Term("field", "guinea"))
.add(new Term("field", "pig"))
.build();
BooleanQuery graphQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, dumb.parse("guinea pig"));
// With the phrase operator, a multi-word synonym source will form span near queries.
SpanNearQuery spanGuineaPig = SpanNearQuery.newOrderedNearQuery("field")
.addClause(new SpanTermQuery(new Term("field", "guinea")))
.addClause(new SpanTermQuery(new Term("field", "pig")))
.setSlop(0)
.build();
SpanTermQuery spanCavy = new SpanTermQuery(new Term("field", "cavy"));
SpanOrQuery spanPhrase = new SpanOrQuery(new SpanQuery[]{spanGuineaPig, spanCavy});
assertEquals(spanPhrase, dumb.parse("\"guinea pig\""));
// custom behavior, the synonyms are expanded, unless you use quote operator
QueryParser smart = new SmartQueryParser();
smart.setSplitOnWhitespace(false);
graphQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, smart.parse("guinea pig"));
assertEquals(phraseGuineaPig, smart.parse("\"guinea pig\""));
// with the AND operator
dumb.setDefaultOperator(Operator.AND);
BooleanQuery graphAndQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.MUST)
.build();
assertEquals(graphAndQuery, dumb.parse("guinea pig"));
graphAndQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.MUST)
.add(cavy, BooleanClause.Occur.MUST)
.build();
assertEquals(graphAndQuery, dumb.parse("guinea pig cavy"));
}
public void testEnableGraphQueries() throws Exception {
QueryParser dumb = new QueryParser("field", new Analyzer1());
dumb.setSplitOnWhitespace(false);
dumb.setEnableGraphQueries(false);
TermQuery pig = new TermQuery(new Term("field", "pig"));
// A multi-word synonym source will just form a boolean query when graph queries are disabled:
Query inner = new SynonymQuery.Builder("field")
.addTerm(new Term("field", "cavy"))
.addTerm(new Term("field", "guinea"))
.build();
BooleanQuery.Builder b = new BooleanQuery.Builder();
b.add(inner, BooleanClause.Occur.SHOULD);
b.add(pig, BooleanClause.Occur.SHOULD);
BooleanQuery query = b.build();
assertEquals(query, dumb.parse("guinea pig"));
}
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
public void testOperatorsAndMultiWordSynonyms() throws Exception {
Analyzer a = new MockSynonymAnalyzer();
boolean oldSplitOnWhitespace = splitOnWhitespace;
splitOnWhitespace = false;
// Operators should interrupt multiword analysis of adjacent words if they associate
assertQueryEquals("+guinea pig", a, "+guinea pig");
assertQueryEquals("-guinea pig", a, "-guinea pig");
assertQueryEquals("!guinea pig", a, "-guinea pig");
assertQueryEquals("guinea* pig", a, "guinea* pig");
assertQueryEquals("guinea? pig", a, "guinea? pig");
assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
assertQueryEquals("guinea +pig", a, "guinea +pig");
assertQueryEquals("guinea -pig", a, "guinea -pig");
assertQueryEquals("guinea !pig", a, "guinea -pig");
assertQueryEquals("guinea pig*", a, "guinea pig*");
assertQueryEquals("guinea pig?", a, "guinea pig?");
assertQueryEquals("guinea pig~2", a, "guinea pig~2");
assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
assertQueryEquals("field:guinea pig", a, "guinea pig");
assertQueryEquals("guinea field:pig", a, "guinea pig");
assertQueryEquals("NOT guinea pig", a, "-guinea pig");
assertQueryEquals("guinea NOT pig", a, "guinea -pig");
assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
assertQueryEquals("\"guinea\" pig", a, "guinea pig");
assertQueryEquals("guinea \"pig\"", a, "guinea pig");
assertQueryEquals("(guinea) pig", a, "guinea pig");
assertQueryEquals("guinea (pig)", a, "guinea pig");
assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
// Operators should not interrupt multiword analysis if not don't associate
assertQueryEquals("(guinea pig)", a, "((+guinea +pig) cavy)");
assertQueryEquals("+(guinea pig)", a, "+(((+guinea +pig) cavy))");
assertQueryEquals("-(guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("!(guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("NOT (guinea pig)", a, "-(((+guinea +pig) cavy))");
assertQueryEquals("(guinea pig)^2", a, "(((+guinea +pig) cavy))^2.0");
assertQueryEquals("field:(guinea pig)", a, "((+guinea +pig) cavy)");
assertQueryEquals("+small guinea pig", a, "+small ((+guinea +pig) cavy)");
assertQueryEquals("-small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("!small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("NOT small guinea pig", a, "-small ((+guinea +pig) cavy)");
assertQueryEquals("small* guinea pig", a, "small* ((+guinea +pig) cavy)");
assertQueryEquals("small? guinea pig", a, "small? ((+guinea +pig) cavy)");
assertQueryEquals("\"small\" guinea pig", a, "small ((+guinea +pig) cavy)");
assertQueryEquals("guinea pig +running", a, "((+guinea +pig) cavy) +running");
assertQueryEquals("guinea pig -running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig !running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig NOT running", a, "((+guinea +pig) cavy) -running");
assertQueryEquals("guinea pig running*", a, "((+guinea +pig) cavy) running*");
assertQueryEquals("guinea pig running?", a, "((+guinea +pig) cavy) running?");
assertQueryEquals("guinea pig \"running\"", a, "((+guinea +pig) cavy) running");
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
assertQueryEquals("field:\"guinea pig\"", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
splitOnWhitespace = oldSplitOnWhitespace;
}
public void testOperatorsAndMultiWordSynonymsSplitOnWhitespace() throws Exception {
Analyzer a = new MockSynonymAnalyzer();
boolean oldSplitOnWhitespace = splitOnWhitespace;
splitOnWhitespace = true;
assertQueryEquals("+guinea pig", a, "+guinea pig");
assertQueryEquals("-guinea pig", a, "-guinea pig");
assertQueryEquals("!guinea pig", a, "-guinea pig");
assertQueryEquals("guinea* pig", a, "guinea* pig");
assertQueryEquals("guinea? pig", a, "guinea? pig");
assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
assertQueryEquals("guinea +pig", a, "guinea +pig");
assertQueryEquals("guinea -pig", a, "guinea -pig");
assertQueryEquals("guinea !pig", a, "guinea -pig");
assertQueryEquals("guinea pig*", a, "guinea pig*");
assertQueryEquals("guinea pig?", a, "guinea pig?");
assertQueryEquals("guinea pig~2", a, "guinea pig~2");
assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
assertQueryEquals("field:guinea pig", a, "guinea pig");
assertQueryEquals("guinea field:pig", a, "guinea pig");
assertQueryEquals("NOT guinea pig", a, "-guinea pig");
assertQueryEquals("guinea NOT pig", a, "guinea -pig");
assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
assertQueryEquals("\"guinea\" pig", a, "guinea pig");
assertQueryEquals("guinea \"pig\"", a, "guinea pig");
assertQueryEquals("(guinea) pig", a, "guinea pig");
assertQueryEquals("guinea (pig)", a, "guinea pig");
assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
assertQueryEquals("(guinea pig)", a, "guinea pig");
assertQueryEquals("+(guinea pig)", a, "+(guinea pig)");
assertQueryEquals("-(guinea pig)", a, "-(guinea pig)");
assertQueryEquals("!(guinea pig)", a, "-(guinea pig)");
assertQueryEquals("NOT (guinea pig)", a, "-(guinea pig)");
assertQueryEquals("(guinea pig)^2", a, "(guinea pig)^2.0");
assertQueryEquals("field:(guinea pig)", a, "guinea pig");
assertQueryEquals("+small guinea pig", a, "+small guinea pig");
assertQueryEquals("-small guinea pig", a, "-small guinea pig");
assertQueryEquals("!small guinea pig", a, "-small guinea pig");
assertQueryEquals("NOT small guinea pig", a, "-small guinea pig");
assertQueryEquals("small* guinea pig", a, "small* guinea pig");
assertQueryEquals("small? guinea pig", a, "small? guinea pig");
assertQueryEquals("\"small\" guinea pig", a, "small guinea pig");
assertQueryEquals("guinea pig +running", a, "guinea pig +running");
assertQueryEquals("guinea pig -running", a, "guinea pig -running");
assertQueryEquals("guinea pig !running", a, "guinea pig -running");
assertQueryEquals("guinea pig NOT running", a, "guinea pig -running");
assertQueryEquals("guinea pig running*", a, "guinea pig running*");
assertQueryEquals("guinea pig running?", a, "guinea pig running?");
assertQueryEquals("guinea pig \"running\"", a, "guinea pig running");
assertQueryEquals("\"guinea pig\"~2", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
assertQueryEquals("field:\"guinea pig\"", a, "spanOr([spanNear([guinea, pig], 0, true), cavy])");
splitOnWhitespace = oldSplitOnWhitespace;
}
public void testDefaultSplitOnWhitespace() throws Exception {
QueryParser parser = new QueryParser("field", new Analyzer1());
assertFalse(parser.getSplitOnWhitespace()); // default is false
// A multi-word synonym source will form a synonym query for the same-starting-position tokens
TermQuery guinea = new TermQuery(new Term("field", "guinea"));
TermQuery pig = new TermQuery(new Term("field", "pig"));
TermQuery cavy = new TermQuery(new Term("field", "cavy"));
// A multi-word synonym source will form a graph query for synonyms that formed the graph token stream
BooleanQuery.Builder synonym = new BooleanQuery.Builder();
synonym.add(guinea, BooleanClause.Occur.MUST);
synonym.add(pig, BooleanClause.Occur.MUST);
BooleanQuery guineaPig = synonym.build();
BooleanQuery graphQuery = new BooleanQuery.Builder()
.add(new BooleanQuery.Builder()
.add(guineaPig, BooleanClause.Occur.SHOULD)
.add(cavy, BooleanClause.Occur.SHOULD)
.build(), BooleanClause.Occur.SHOULD)
.build();
assertEquals(graphQuery, parser.parse("guinea pig"));
boolean oldSplitOnWhitespace = splitOnWhitespace;
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "((+guinea +pig) cavy)");
splitOnWhitespace = oldSplitOnWhitespace;
}
public void testWildcardAlone() throws ParseException {
//seems like crazy edge case, but can be useful in concordance
QueryParser parser = new QueryParser(FIELD, new ASCIIAnalyzer());
parser.setAllowLeadingWildcard(false);
expectThrows(ParseException.class, () -> {
parser.parse("*");
});
QueryParser parser2 = new QueryParser("*", new ASCIIAnalyzer());
parser2.setAllowLeadingWildcard(false);
assertEquals(new MatchAllDocsQuery(), parser2.parse("*"));
}
public void testWildCardEscapes() throws ParseException, IOException {
Analyzer a = new ASCIIAnalyzer();
QueryParser parser = new QueryParser(FIELD, a);
assertTrue(isAHit(parser.parse("mö*tley"), "moatley", a));
// need to have at least one genuine wildcard to trigger the wildcard analysis
// hence the * before the y
assertTrue(isAHit(parser.parse("mö\\*tl*y"), "mo*tley", a));
// escaped backslash then true wildcard
assertTrue(isAHit(parser.parse("mö\\\\*tley"), "mo\\atley", a));
// escaped wildcard then true wildcard
assertTrue(isAHit(parser.parse("mö\\??ley"), "mo?tley", a));
// the first is an escaped * which should yield a miss
assertFalse(isAHit(parser.parse("mö\\*tl*y"), "moatley", a));
}
public void testWildcardDoesNotNormalizeEscapedChars() throws Exception {
Analyzer asciiAnalyzer = new ASCIIAnalyzer();
Analyzer keywordAnalyzer = new MockAnalyzer(random());
QueryParser parser = new QueryParser(FIELD, asciiAnalyzer);
assertTrue(isAHit(parser.parse("e*e"), "étude", asciiAnalyzer));
assertTrue(isAHit(parser.parse("é*e"), "etude", asciiAnalyzer));
assertFalse(isAHit(parser.parse("\\é*e"), "etude", asciiAnalyzer));
assertTrue(isAHit(parser.parse("\\é*e"), "étude", keywordAnalyzer));
}
public void testWildCardQuery() throws ParseException {
Analyzer a = new ASCIIAnalyzer();
QueryParser parser = new QueryParser(FIELD, a);
parser.setAllowLeadingWildcard(true);
assertEquals("*bersetzung uber*ung", parser.parse("*bersetzung über*ung").toString(FIELD));
parser.setAllowLeadingWildcard(false);
assertEquals("motley crue motl?* cru?", parser.parse("Mötley Cr\u00fce Mötl?* Crü?").toString(FIELD));
assertEquals("renee zellweger ren?? zellw?ger", parser.parse("Renée Zellweger Ren?? Zellw?ger").toString(FIELD));
}
public void testPrefixQuery() throws ParseException {
Analyzer a = new ASCIIAnalyzer();
QueryParser parser = new QueryParser(FIELD, a);
assertEquals("ubersetzung ubersetz*", parser.parse("übersetzung übersetz*").toString(FIELD));
assertEquals("motley crue motl* cru*", parser.parse("Mötley Crüe Mötl* crü*").toString(FIELD));
assertEquals("rene? zellw*", parser.parse("René? Zellw*").toString(FIELD));
}
public void testRangeQuery() throws ParseException {
Analyzer a = new ASCIIAnalyzer();
QueryParser parser = new QueryParser(FIELD, a);
assertEquals("[aa TO bb]", parser.parse("[aa TO bb]").toString(FIELD));
assertEquals("{anais TO zoe}", parser.parse("{Anaïs TO Zoé}").toString(FIELD));
}
public void testFuzzyQuery() throws ParseException {
Analyzer a = new ASCIIAnalyzer();
QueryParser parser = new QueryParser(FIELD, a);
assertEquals("ubersetzung ubersetzung~1", parser.parse("Übersetzung Übersetzung~0.9").toString(FIELD));
assertEquals("motley crue motley~1 crue~2", parser.parse("Mötley Crüe Mötley~0.75 Crüe~0.5").toString(FIELD));
assertEquals("renee zellweger renee~0 zellweger~2", parser.parse("Renée Zellweger Renée~0.9 Zellweger~").toString(FIELD));
}
final static class FoldingFilter extends TokenFilter {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public FoldingFilter(TokenStream input) {
super(input);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char term[] = termAtt.buffer();
for (int i = 0; i < term.length; i++)
switch(term[i]) {
case 'ü':
term[i] = 'u';
break;
case 'ö':
term[i] = 'o';
break;
case 'é':
term[i] = 'e';
break;
case 'ï':
term[i] = 'i';
break;
}
return true;
} else {
return false;
}
}
}
final static class ASCIIAnalyzer extends Analyzer {
@Override
public TokenStreamComponents createComponents(String fieldName) {
Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(result, new FoldingFilter(result));
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
return new FoldingFilter(new MockLowerCaseFilter(in));
}
}
// LUCENE-4176
public void testByteTerms() throws Exception {
String s = "เข";
Analyzer analyzer = new MockBytesAnalyzer();
QueryParser qp = new QueryParser(FIELD, analyzer);
assertTrue(isAHit(qp.parse("[เข TO เข]"), s, analyzer));
assertTrue(isAHit(qp.parse("เข~1"), s, analyzer));
assertTrue(isAHit(qp.parse("เข*"), s, analyzer));
assertTrue(isAHit(qp.parse("เ*"), s, analyzer));
assertTrue(isAHit(qp.parse("เ??"), s, analyzer));
}
// LUCENE-7533
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp.setSplitOnWhitespace(false);
qp.setAutoGeneratePhraseQueries(true);
});
final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp2.setSplitOnWhitespace(true);
qp2.setAutoGeneratePhraseQueries(true);
qp2.setSplitOnWhitespace(false);
});
}
private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{
Directory ramDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), ramDir, analyzer);
Document doc = new Document();
FieldType fieldType = new FieldType();
fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
fieldType.setTokenized(true);
fieldType.setStored(true);
Field field = new Field(FIELD, content, fieldType);
doc.add(field);
writer.addDocument(doc);
writer.close();
DirectoryReader ir = DirectoryReader.open(ramDir);
IndexSearcher is = new IndexSearcher(ir);
long hits = is.count(q);
ir.close();
ramDir.close();
if (hits == 1){
return true;
} else {
return false;
}
}
}