| options { |
| STATIC=false; |
| JAVA_UNICODE_ESCAPE=true; |
| USER_CHAR_STREAM=true; |
| IGNORE_CASE=false; |
| JDK_VERSION="1.5"; |
| } |
| |
| PARSER_BEGIN(StandardSyntaxParser) |
| package org.apache.lucene.queryparser.flexible.standard.parser; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.StringReader; |
| import java.util.Vector; |
| import java.util.Arrays; |
| |
| import org.apache.lucene.queryparser.flexible.messages.Message; |
| import org.apache.lucene.queryparser.flexible.messages.MessageImpl; |
| import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; |
| import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages; |
| import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; |
| import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; |
| import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; |
| import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; |
| import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; |
| |
| /** |
| * Parser for the standard Lucene syntax |
| */ |
| public class StandardSyntaxParser implements SyntaxParser { |
| |
| |
| // syntax parser constructor |
| public StandardSyntaxParser() { |
| this(new FastCharStream(new StringReader(""))); |
| } |
| /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. |
| * @param query the query string to be parsed. |
| * @throws ParseException if the parsing fails |
| */ |
| public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { |
| ReInit(new FastCharStream(new StringReader(query.toString()))); |
| try { |
| // TopLevelQuery is a Query followed by the end-of-input (EOF) |
| QueryNode querynode = TopLevelQuery(field); |
| return querynode; |
| } |
| catch (ParseException tme) { |
| tme.setQuery(query); |
| throw tme; |
| } |
| catch (Error tme) { |
| Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); |
| QueryNodeParseException e = new QueryNodeParseException(tme); |
| e.setQuery(query); |
| e.setNonLocalizedMessage(message); |
| throw e; |
| } |
| } |
| |
| } |
| |
| PARSER_END(StandardSyntaxParser) |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_NUM_CHAR: ["0"-"9"] > |
| // every character that follows a backslash is considered as an escaped character |
| | <#_ESCAPED_CHAR: "\\" ~[] > |
| | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", |
| "<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/" ] |
| | <_ESCAPED_CHAR> ) > |
| | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > |
| | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > |
| | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > |
| } |
| |
| <DEFAULT, Range> SKIP : { |
| < <_WHITESPACE>> |
| } |
| |
| <DEFAULT> TOKEN : { |
| <AND: ("AND" | "&&") > |
| | <OR: ("OR" | "||") > |
| | <NOT: ("NOT" | "!") > |
| | <PLUS: "+" > |
| | <MINUS: "-" > |
| | <LPAREN: "(" > |
| | <RPAREN: ")" > |
| | <OP_COLON: ":" > |
| | <OP_EQUAL: "=" > |
| | <OP_LESSTHAN: "<" > |
| | <OP_LESSTHANEQ: "<=" > |
| | <OP_MORETHAN: ">" > |
| | <OP_MORETHANEQ: ">=" > |
| | <CARAT: "^" > : Boost |
| | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> |
| | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > |
| | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > |
| | <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" > |
| | <RANGEIN_START: "[" > : Range |
| | <RANGEEX_START: "{" > : Range |
| } |
| |
| <Boost> TOKEN : { |
| <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT |
| } |
| |
| <Range> TOKEN : { |
| <RANGE_TO: "TO"> |
| | <RANGEIN_END: "]"> : DEFAULT |
| | <RANGEEX_END: "}"> : DEFAULT |
| | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> |
| | <RANGE_GOOP: (~[ " ", "]", "}" ])+ > |
| } |
| |
| ModifierQueryNode.Modifier Modifiers() : { |
| ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; |
| } |
| { |
| [ |
| <PLUS> { ret = ModifierQueryNode.Modifier.MOD_REQ; } |
| | <MINUS> { ret = ModifierQueryNode.Modifier.MOD_NOT; } |
| | <NOT> { ret = ModifierQueryNode.Modifier.MOD_NOT; } |
| ] |
| { return ret; } |
| } |
| |
| // This makes sure that there is no garbage after the query string |
| QueryNode TopLevelQuery(CharSequence field) : |
| { |
| QueryNode q; |
| } |
| { |
| q=Query(field) <EOF> |
| { |
| return q; |
| } |
| } |
| |
| // These changes were made to introduce operator precedence: |
| // - Clause() now returns a QueryNode. |
| // - The modifiers are consumed by Clause() and returned as part of the QueryNode Object |
| // - Query does not consume conjunctions (AND, OR) anymore. |
| // - This is now done by two new non-terminals: ConjClause and DisjClause |
| // The parse tree looks similar to this: |
| // Query ::= DisjQuery ( DisjQuery )* |
| // DisjQuery ::= ConjQuery ( OR ConjQuery )* |
| // ConjQuery ::= Clause ( AND Clause )* |
| // Clause ::= [ Modifier ] ... |
| |
| |
| QueryNode Query(CharSequence field) : |
| { |
| Vector<QueryNode> clauses = null; |
| QueryNode c, first=null; |
| } |
| { |
| first=DisjQuery(field) |
| ( |
| c=DisjQuery(field) |
| { |
| if (clauses == null) { |
| clauses = new Vector<QueryNode>(); |
| clauses.addElement(first); |
| } |
| clauses.addElement(c); |
| } |
| )* |
| { |
| if (clauses != null) { |
| return new BooleanQueryNode(clauses); |
| } else { |
| // Handle the case of a "pure" negation query which |
| // needs to be wrapped as a boolean query, otherwise |
| // the returned result drops the negation. |
| if (first instanceof ModifierQueryNode) { |
| ModifierQueryNode m = (ModifierQueryNode) first; |
| if (m.getModifier() == ModifierQueryNode.Modifier.MOD_NOT) { |
| return new BooleanQueryNode(Arrays.<QueryNode> asList(m)); |
| } |
| } |
| return first; |
| } |
| } |
| } |
| |
| QueryNode DisjQuery(CharSequence field) : { |
| QueryNode first, c; |
| Vector<QueryNode> clauses = null; |
| } |
| { |
| first = ConjQuery(field) |
| ( |
| <OR> c=ConjQuery(field) |
| { |
| if (clauses == null) { |
| clauses = new Vector<QueryNode>(); |
| clauses.addElement(first); |
| } |
| clauses.addElement(c); |
| } |
| )* |
| { |
| if (clauses != null) { |
| return new OrQueryNode(clauses); |
| } else { |
| return first; |
| } |
| } |
| } |
| |
| QueryNode ConjQuery(CharSequence field) : { |
| QueryNode first, c; |
| Vector<QueryNode> clauses = null; |
| } |
| { |
| first = ModClause(field) |
| ( |
| <AND> c=ModClause(field) |
| { |
| if (clauses == null) { |
| clauses = new Vector<QueryNode>(); |
| clauses.addElement(first); |
| } |
| clauses.addElement(c); |
| } |
| )* |
| { |
| if (clauses != null) { |
| return new AndQueryNode(clauses); |
| } else { |
| return first; |
| } |
| } |
| } |
| |
| // QueryNode Query(CharSequence field) : |
| // { |
| // List clauses = new ArrayList(); |
| // List modifiers = new ArrayList(); |
| // QueryNode q, firstQuery=null; |
| // ModifierQueryNode.Modifier mods; |
| // int conj; |
| // } |
| // { |
| // mods=Modifiers() q=Clause(field) |
| // { |
| // if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; |
| // |
| // // do not create modifier nodes with MOD_NONE |
| // if (mods != ModifierQueryNode.Modifier.MOD_NONE) { |
| // q = new ModifierQueryNode(q, mods); |
| // } |
| // clauses.add(q); |
| // } |
| // ( |
| // conj=Conjunction() mods=Modifiers() q=Clause(field) |
| // { |
| // // do not create modifier nodes with MOD_NONE |
| // if (mods != ModifierQueryNode.Modifier.MOD_NONE) { |
| // q = new ModifierQueryNode(q, mods); |
| // } |
| // clauses.add(q); |
| // //TODO: figure out what to do with AND and ORs |
| // } |
| // )* |
| // { |
| // if (clauses.size() == 1 && firstQuery != null) |
| // return firstQuery; |
| // else { |
| // return new BooleanQueryNode(clauses); |
| // } |
| // } |
| // } |
| |
| QueryNode ModClause(CharSequence field) : { |
| QueryNode q; |
| ModifierQueryNode.Modifier mods; |
| } |
| { |
| mods=Modifiers() q= Clause(field) { |
| if (mods != ModifierQueryNode.Modifier.MOD_NONE) { |
| q = new ModifierQueryNode(q, mods); |
| } |
| return q; |
| } |
| } |
| |
| QueryNode Clause(CharSequence field) : { |
| QueryNode q; |
| Token fieldToken=null, boost=null, operator=null, term=null; |
| FieldQueryNode qLower, qUpper; |
| boolean lowerInclusive, upperInclusive; |
| |
| boolean group = false; |
| } |
| { |
| ( |
| LOOKAHEAD(3) |
| fieldToken=<TERM> ( |
| ( <OP_COLON> | <OP_EQUAL> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} q=Term(field) |
| | ( operator=<OP_LESSTHAN> | operator=<OP_LESSTHANEQ> | operator=<OP_MORETHAN> | operator=<OP_MORETHANEQ> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}( term=<TERM> | term=<QUOTED> | term=<NUMBER> ) |
| { |
| if (term.kind == QUOTED) { |
| term.image = term.image.substring(1, term.image.length()-1); |
| } |
| switch (operator.kind) { |
| case OP_LESSTHAN: |
| lowerInclusive = true; |
| upperInclusive = false; |
| |
| qLower = new FieldQueryNode(field, |
| "*", term.beginColumn, term.endColumn); |
| qUpper = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); |
| |
| break; |
| case OP_LESSTHANEQ: |
| lowerInclusive = true; |
| upperInclusive = true; |
| |
| qLower = new FieldQueryNode(field, |
| "*", term.beginColumn, term.endColumn); |
| qUpper = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); |
| break; |
| case OP_MORETHAN: |
| lowerInclusive = false; |
| upperInclusive = true; |
| |
| qLower = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); |
| qUpper = new FieldQueryNode(field, |
| "*", term.beginColumn, term.endColumn); |
| break; |
| case OP_MORETHANEQ: |
| lowerInclusive = true; |
| upperInclusive = true; |
| |
| qLower = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); |
| qUpper = new FieldQueryNode(field, |
| "*", term.beginColumn, term.endColumn); |
| break; |
| default: |
| throw new Error("Unhandled case: operator="+operator.toString()); |
| } |
| q = new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive); |
| } |
| ) |
| | [ |
| LOOKAHEAD(2) |
| fieldToken=<TERM> |
| ( <OP_COLON> | <OP_EQUAL> ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} |
| ] |
| ( |
| q=Term(field) |
| | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? {group=true;} |
| ) |
| ) |
| { |
| if (boost != null) { |
| float f = (float)1.0; |
| try { |
| f = Float.parseFloat(boost.image); |
| // avoid boosting null queries, such as those caused by stop words |
| if (q != null) { |
| q = new BoostQueryNode(q, f); |
| } |
| } catch (Exception ignored) { |
| /* Should this be handled somehow? (defaults to "no boost", if |
| * boost number is invalid) |
| */ |
| } |
| } |
| if (group) { q = new GroupQueryNode(q);} |
| return q; |
| } |
| } |
| |
| |
| QueryNode Term(CharSequence field) : { |
| Token term, boost=null, fuzzySlop=null, goop1, goop2; |
| boolean fuzzy = false; |
| boolean regexp = false; |
| boolean startInc=false; |
| boolean endInc=false; |
| QueryNode q =null; |
| FieldQueryNode qLower, qUpper; |
| float defaultMinSimilarity = org.apache.lucene.search.FuzzyQuery.defaultMinSimilarity; |
| } |
| { |
| ( |
| ( |
| term=<TERM> { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } |
| | term=<REGEXPTERM> { regexp=true; } |
| | term=<NUMBER> |
| ) |
| [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] |
| { |
| if (fuzzy) { |
| float fms = defaultMinSimilarity; |
| try { |
| fms = Float.parseFloat(fuzzySlop.image.substring(1)); |
| } catch (Exception ignored) { } |
| if(fms < 0.0f){ |
| throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); |
| } else if (fms >= 1.0f && fms != (int) fms) { |
| throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS)); |
| } |
| q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); |
| } else if (regexp) { |
| String re = term.image.substring(1, term.image.length()-1); |
| q = new RegexpQueryNode(field, re, 0, re.length()); |
| } |
| } |
| | ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> ) |
| ( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED>|goop1=<RANGE_TO> ) |
| ( <RANGE_TO> ) |
| ( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED>|goop2=<RANGE_TO> ) |
| ( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>)) |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| if (goop1.kind == RANGE_QUOTED) { |
| goop1.image = goop1.image.substring(1, goop1.image.length()-1); |
| } |
| if (goop2.kind == RANGE_QUOTED) { |
| goop2.image = goop2.image.substring(1, goop2.image.length()-1); |
| } |
| |
| qLower = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); |
| qUpper = new FieldQueryNode(field, |
| EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); |
| q = new TermRangeQueryNode(qLower, qUpper, startInc ? true : false, endInc ? true : false); |
| } |
| | term=<QUOTED> {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);} |
| [ fuzzySlop=<FUZZY_SLOP> ] |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| int phraseSlop = 0; |
| |
| if (fuzzySlop != null) { |
| try { |
| phraseSlop = (int)Float.parseFloat(fuzzySlop.image.substring(1)); |
| q = new SlopQueryNode(q, phraseSlop); |
| } |
| catch (Exception ignored) { |
| /* Should this be handled somehow? (defaults to "no PhraseSlop", if |
| * slop number is invalid) |
| */ |
| } |
| } |
| |
| } |
| ) |
| { |
| if (boost != null) { |
| float f = (float)1.0; |
| try { |
| f = Float.parseFloat(boost.image); |
| // avoid boosting null queries, such as those caused by stop words |
| if (q != null) { |
| q = new BoostQueryNode(q, f); |
| } |
| } catch (Exception ignored) { |
| /* Should this be handled somehow? (defaults to "no boost", if |
| * boost number is invalid) |
| */ |
| } |
| } |
| return q; |
| } |
| } |