blob: 8803618ab618fd10c641387f6abfab70780c963f [file] [log] [blame]
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
USER_CHAR_STREAM=true;
IGNORE_CASE=false;
JDK_VERSION="1.5";
}
PARSER_BEGIN(StandardSyntaxParser)
package org.apache.lucene.queryparser.flexible.standard.parser;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.StringReader;
import java.util.Vector;
import java.util.Arrays;
import org.apache.lucene.queryparser.flexible.messages.Message;
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException;
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
/**
* Parser for the standard Lucene syntax
*/
public class StandardSyntaxParser implements SyntaxParser {
// syntax parser constructor
public StandardSyntaxParser() {
this(new FastCharStream(new StringReader("")));
}
/** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}.
* @param query the query string to be parsed.
* @throws ParseException if the parsing fails
*/
public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
ReInit(new FastCharStream(new StringReader(query.toString())));
try {
// TopLevelQuery is a Query followed by the end-of-input (EOF)
QueryNode querynode = TopLevelQuery(field);
return querynode;
}
catch (ParseException tme) {
tme.setQuery(query);
throw tme;
}
catch (Error tme) {
Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage());
QueryNodeParseException e = new QueryNodeParseException(tme);
e.setQuery(query);
e.setNonLocalizedMessage(message);
throw e;
}
}
}
PARSER_END(StandardSyntaxParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
// every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}
<DEFAULT, Range> SKIP : {
< <_WHITESPACE>>
}
<DEFAULT> TOKEN : {
<AND: ("AND" | "&&") >
| <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") >
| <PLUS: "+" >
| <MINUS: "-" >
| <LPAREN: "(" >
| <RPAREN: ")" >
| <OP_COLON: ":" >
| <OP_EQUAL: "=" >
| <OP_LESSTHAN: "<" >
| <OP_LESSTHANEQ: "<=" >
| <OP_MORETHAN: ">" >
| <OP_MORETHANEQ: ">=" >
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range
}
<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<Range> TOKEN : {
<RANGE_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT
| <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
}
ModifierQueryNode.Modifier Modifiers() : {
ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE;
}
{
[
<PLUS> { ret = ModifierQueryNode.Modifier.MOD_REQ; }
| <MINUS> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
| <NOT> { ret = ModifierQueryNode.Modifier.MOD_NOT; }
]
{ return ret; }
}
// This makes sure that there is no garbage after the query string
QueryNode TopLevelQuery(CharSequence field) :
{
QueryNode q;
}
{
q=Query(field) <EOF>
{
return q;
}
}
// These changes were made to introduce operator precedence:
// - Clause() now returns a QueryNode.
// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object
// - Query does not consume conjunctions (AND, OR) anymore.
// - This is now done by two new non-terminals: ConjClause and DisjClause
// The parse tree looks similar to this:
// Query ::= DisjQuery ( DisjQuery )*
// DisjQuery ::= ConjQuery ( OR ConjQuery )*
// ConjQuery ::= Clause ( AND Clause )*
// Clause ::= [ Modifier ] ...
QueryNode Query(CharSequence field) :
{
Vector<QueryNode> clauses = null;
QueryNode c, first=null;
}
{
first=DisjQuery(field)
(
c=DisjQuery(field)
{
if (clauses == null) {
clauses = new Vector<QueryNode>();
clauses.addElement(first);
}
clauses.addElement(c);
}
)*
{
if (clauses != null) {
return new BooleanQueryNode(clauses);
} else {
// Handle the case of a "pure" negation query which
// needs to be wrapped as a boolean query, otherwise
// the returned result drops the negation.
if (first instanceof ModifierQueryNode) {
ModifierQueryNode m = (ModifierQueryNode) first;
if (m.getModifier() == ModifierQueryNode.Modifier.MOD_NOT) {
return new BooleanQueryNode(Arrays.<QueryNode> asList(m));
}
}
return first;
}
}
}
QueryNode DisjQuery(CharSequence field) : {
QueryNode first, c;
Vector<QueryNode> clauses = null;
}
{
first = ConjQuery(field)
(
<OR> c=ConjQuery(field)
{
if (clauses == null) {
clauses = new Vector<QueryNode>();
clauses.addElement(first);
}
clauses.addElement(c);
}
)*
{
if (clauses != null) {
return new OrQueryNode(clauses);
} else {
return first;
}
}
}
QueryNode ConjQuery(CharSequence field) : {
QueryNode first, c;
Vector<QueryNode> clauses = null;
}
{
first = ModClause(field)
(
<AND> c=ModClause(field)
{
if (clauses == null) {
clauses = new Vector<QueryNode>();
clauses.addElement(first);
}
clauses.addElement(c);
}
)*
{
if (clauses != null) {
return new AndQueryNode(clauses);
} else {
return first;
}
}
}
// QueryNode Query(CharSequence field) :
// {
// List clauses = new ArrayList();
// List modifiers = new ArrayList();
// QueryNode q, firstQuery=null;
// ModifierQueryNode.Modifier mods;
// int conj;
// }
// {
// mods=Modifiers() q=Clause(field)
// {
// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q;
//
// // do not create modifier nodes with MOD_NONE
// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
// q = new ModifierQueryNode(q, mods);
// }
// clauses.add(q);
// }
// (
// conj=Conjunction() mods=Modifiers() q=Clause(field)
// {
// // do not create modifier nodes with MOD_NONE
// if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
// q = new ModifierQueryNode(q, mods);
// }
// clauses.add(q);
// //TODO: figure out what to do with AND and ORs
// }
// )*
// {
// if (clauses.size() == 1 && firstQuery != null)
// return firstQuery;
// else {
// return new BooleanQueryNode(clauses);
// }
// }
// }
QueryNode ModClause(CharSequence field) : {
QueryNode q;
ModifierQueryNode.Modifier mods;
}
{
mods=Modifiers() q= Clause(field) {
if (mods != ModifierQueryNode.Modifier.MOD_NONE) {
q = new ModifierQueryNode(q, mods);
}
return q;
}
}
QueryNode Clause(CharSequence field) : {
QueryNode q;
Token fieldToken=null, boost=null, operator=null, term=null;
FieldQueryNode qLower, qUpper;
boolean lowerInclusive, upperInclusive;
boolean group = false;
}
{
(
LOOKAHEAD(3)
fieldToken=<TERM> (
( <OP_COLON> | <OP_EQUAL> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} q=Term(field)
| ( operator=<OP_LESSTHAN> | operator=<OP_LESSTHANEQ> | operator=<OP_MORETHAN> | operator=<OP_MORETHANEQ> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}( term=<TERM> | term=<QUOTED> | term=<NUMBER> )
{
if (term.kind == QUOTED) {
term.image = term.image.substring(1, term.image.length()-1);
}
switch (operator.kind) {
case OP_LESSTHAN:
lowerInclusive = true;
upperInclusive = false;
qLower = new FieldQueryNode(field,
"*", term.beginColumn, term.endColumn);
qUpper = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
break;
case OP_LESSTHANEQ:
lowerInclusive = true;
upperInclusive = true;
qLower = new FieldQueryNode(field,
"*", term.beginColumn, term.endColumn);
qUpper = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
break;
case OP_MORETHAN:
lowerInclusive = false;
upperInclusive = true;
qLower = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
qUpper = new FieldQueryNode(field,
"*", term.beginColumn, term.endColumn);
break;
case OP_MORETHANEQ:
lowerInclusive = true;
upperInclusive = true;
qLower = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn);
qUpper = new FieldQueryNode(field,
"*", term.beginColumn, term.endColumn);
break;
default:
throw new Error("Unhandled case: operator="+operator.toString());
}
q = new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive);
}
)
| [
LOOKAHEAD(2)
fieldToken=<TERM>
( <OP_COLON> | <OP_EQUAL> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}
]
(
q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? {group=true;}
)
)
{
if (boost != null) {
float f = (float)1.0;
try {
f = Float.parseFloat(boost.image);
// avoid boosting null queries, such as those caused by stop words
if (q != null) {
q = new BoostQueryNode(q, f);
}
} catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid)
*/
}
}
if (group) { q = new GroupQueryNode(q);}
return q;
}
}
QueryNode Term(CharSequence field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean fuzzy = false;
boolean regexp = false;
boolean startInc=false;
boolean endInc=false;
QueryNode q =null;
FieldQueryNode qLower, qUpper;
float defaultMinSimilarity = org.apache.lucene.search.FuzzyQuery.defaultMinSimilarity;
}
{
(
(
term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
if (fuzzy) {
float fms = defaultMinSimilarity;
try {
fms = Float.parseFloat(fuzzySlop.image.substring(1));
} catch (Exception ignored) { }
if(fms < 0.0f){
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));
} else if (fms >= 1.0f && fms != (int) fms) {
throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS));
}
q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);
} else if (regexp) {
String re = term.image.substring(1, term.image.length()-1);
q = new RegexpQueryNode(field, re, 0, re.length());
}
}
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED>|goop1=<RANGE_TO> )
( <RANGE_TO> )
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED>|goop2=<RANGE_TO> )
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
}
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
}
qLower = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn);
qUpper = new FieldQueryNode(field,
EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn);
q = new TermRangeQueryNode(qLower, qUpper, startInc ? true : false, endInc ? true : false);
}
| term=<QUOTED> {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);}
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
int phraseSlop = 0;
if (fuzzySlop != null) {
try {
phraseSlop = (int)Float.parseFloat(fuzzySlop.image.substring(1));
q = new SlopQueryNode(q, phraseSlop);
}
catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no PhraseSlop", if
* slop number is invalid)
*/
}
}
}
)
{
if (boost != null) {
float f = (float)1.0;
try {
f = Float.parseFloat(boost.image);
// avoid boosting null queries, such as those caused by stop words
if (q != null) {
q = new BoostQueryNode(q, f);
}
} catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid)
*/
}
}
return q;
}
}