blob: 0bcfedf067c946590b6296a28e212e9bf629c5ee [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.spans;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.processors.WildcardQueryNodeProcessor;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.LuceneTestCase;
/**
* This test case demonstrates how the new query parser can be used.<br>
* <br>
*
* It tests queries likes "term", "field:term" "term1 term2" "term1 OR term2",
* which are all already supported by the current syntax parser (
* {@link StandardSyntaxParser}).<br>
* <br>
*
* The goals is to create a new query parser that supports only the pair
* "field:term" or a list of pairs separated or not by an OR operator, and from
* this query generate {@link SpanQuery} objects instead of the regular
* {@link Query} objects. Basically, every pair will be converted to a
* {@link SpanTermQuery} object and if there are more than one pair they will be
* grouped by an {@link OrQueryNode}.<br>
* <br>
*
* Another functionality that will be added is the ability to convert every
* field defined in the query to an unique specific field.<br>
* <br>
*
* The query generation is divided in three different steps: parsing (syntax),
* processing (semantic) and building.<br>
* <br>
*
* The parsing phase, as already mentioned will be performed by the current
* query parser: {@link StandardSyntaxParser}.<br>
* <br>
*
* The processing phase will be performed by a processor pipeline which is
* compound by 2 processors: {@link SpansValidatorQueryNodeProcessor} and
* {@link UniqueFieldQueryNodeProcessor}.
*
* <pre>
*
* {@link SpansValidatorQueryNodeProcessor}: as it's going to use the current
* query parser to parse the syntax, it will support more features than we want,
* this processor basically validates the query node tree generated by the parser
* and just let got through the elements we want, all the other elements as
* wildcards, range queries, etc...if found, an exception is thrown.
*
* {@link UniqueFieldQueryNodeProcessor}: this processor will take care of reading
* what is the &quot;unique field&quot; from the configuration and convert every field defined
* in every pair to this &quot;unique field&quot;. For that, a {@link SpansQueryConfigHandler} is
* used, which has the {@link UniqueFieldAttribute} defined in it.
* </pre>
*
* The building phase is performed by the {@link SpansQueryTreeBuilder}, which
* basically contains a map that defines which builder will be used to generate
* {@link SpanQuery} objects from {@link QueryNode} objects.<br>
* <br>
*
* @see SpansQueryConfigHandler
* @see SpansQueryTreeBuilder
* @see SpansValidatorQueryNodeProcessor
* @see SpanOrQueryNodeBuilder
* @see SpanTermQueryNodeBuilder
* @see StandardSyntaxParser
* @see UniqueFieldQueryNodeProcessor
* @see UniqueFieldAttribute
*/
public class TestSpanQueryParser extends LuceneTestCase {
private QueryNodeProcessorPipeline spanProcessorPipeline;
private SpansQueryConfigHandler spanQueryConfigHandler;
private SpansQueryTreeBuilder spansQueryTreeBuilder;
private SyntaxParser queryParser = new StandardSyntaxParser();
public TestSpanQueryParser() {
// empty constructor
}
@Override
public void setUp() throws Exception {
super.setUp();
this.spanProcessorPipeline = new QueryNodeProcessorPipeline();
this.spanQueryConfigHandler = new SpansQueryConfigHandler();
this.spansQueryTreeBuilder = new SpansQueryTreeBuilder();
// set up the processor pipeline
this.spanProcessorPipeline
.setQueryConfigHandler(this.spanQueryConfigHandler);
this.spanProcessorPipeline.add(new WildcardQueryNodeProcessor());
this.spanProcessorPipeline.add(new SpansValidatorQueryNodeProcessor());
this.spanProcessorPipeline.add(new UniqueFieldQueryNodeProcessor());
}
public SpanQuery getSpanQuery(CharSequence query) throws QueryNodeException {
return getSpanQuery("", query);
}
public SpanQuery getSpanQuery(String uniqueField, CharSequence query)
throws QueryNodeException {
this.spanQueryConfigHandler.set(SpansQueryConfigHandler.UNIQUE_FIELD, uniqueField);
QueryNode queryTree = this.queryParser.parse(query, "defaultField");
queryTree = this.spanProcessorPipeline.process(queryTree);
return this.spansQueryTreeBuilder.build(queryTree);
}
public void testTermSpans() throws Exception {
assertEquals(getSpanQuery("field:term").toString(), "term");
assertEquals(getSpanQuery("term").toString(), "term");
assertTrue(getSpanQuery("field:term") instanceof SpanTermQuery);
assertTrue(getSpanQuery("term") instanceof SpanTermQuery);
}
public void testUniqueField() throws Exception {
assertEquals(getSpanQuery("field", "term").toString(), "field:term");
assertEquals(getSpanQuery("field", "field:term").toString(), "field:term");
assertEquals(getSpanQuery("field", "anotherField:term").toString(),
"field:term");
}
public void testOrSpans() throws Exception {
assertEquals(getSpanQuery("term1 term2").toString(),
"spanOr([term1, term2])");
assertEquals(getSpanQuery("term1 OR term2").toString(),
"spanOr([term1, term2])");
assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);
assertTrue(getSpanQuery("term1 term2") instanceof SpanOrQuery);
}
public void testQueryValidator() throws QueryNodeException {
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("term*"); // wildcard queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("[a TO z]"); // range queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("a~0.5"); // boost queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("a^0.5"); // fuzzy queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("\"a b\""); // quoted queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("(a b)"); // parenthesized queries should not be supported
});
expectThrows(QueryNodeException.class, () -> {
getSpanQuery("a AND b"); // AND queries should not be supported
});
}
}