| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| options { |
| STATIC=false; |
| JAVA_UNICODE_ESCAPE=true; |
| USER_CHAR_STREAM=true; |
| } |
| |
| PARSER_BEGIN(QueryParser) |
| |
| package org.apache.lucene.queryparser.classic; |
| |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Set; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.document.DateTools; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TermRangeQuery; |
| |
| /** |
| * This class is generated by JavaCC. The most important method is |
| * {@link #parse(String)}. |
| * |
| * The syntax for query strings is as follows: |
| * A Query is a series of clauses. |
| * A clause may be prefixed by: |
| * <ul> |
| * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating |
| * that the clause is required or prohibited respectively; or |
| * <li> a term followed by a colon, indicating the field to be searched. |
| * This enables one to construct queries which search multiple fields. |
| * </ul> |
| * |
| * A clause may be either: |
| * <ul> |
| * <li> a term, indicating all the documents that contain this term; or |
| * <li> a nested query, enclosed in parentheses. Note that this may be used |
| * with a <code>+</code>/<code>-</code> prefix to require any of a set of |
| * terms. |
| * </ul> |
| * |
| * Thus, in BNF, the query grammar is: |
| * <pre> |
| * Query ::= ( Clause )* |
| * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| * </pre> |
| * |
| * <p> |
| * Examples of appropriately formatted queries can be found in the <a |
| * href="{@docRoot}/org/apache/lucene/queryparser/classic/package-summary.html#package.description">query syntax |
| * documentation</a>. |
| * </p> |
| * |
| * <p> |
| * In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. |
| * <tt>date:[6/1/2005 TO 6/4/2005]</tt> produces a range query that searches |
| * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format |
| * of the accepted input depends on {@link #setLocale(Locale) the locale}. |
| * A {@link org.apache.lucene.document.DateTools.Resolution} has to be set, |
| * if you want to use {@link DateTools} for date conversion. |
| * </p> |
| * <p> |
| * The date resolution that shall be used for RangeQueries can be set |
| * using {@link #setDateResolution(DateTools.Resolution)} |
| * or {@link #setDateResolution(String, DateTools.Resolution)}. The former |
| * sets the default date resolution for all fields, whereas the latter can |
| * be used to set field specific date resolutions. Field specific date |
| * resolutions take, if set, precedence over the default date resolution. |
| * </p> |
| * <p> |
| * If you don't use {@link DateTools} in your index, you can create your own |
| * query parser that inherits QueryParser and overwrites |
| * {@link #getRangeQuery(String, String, String, boolean, boolean)} to |
| * use a different method for date conversion. |
| * </p> |
| * |
| * <p>Note that QueryParser is <em>not</em> thread-safe.</p> |
| * |
| * <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches |
| * the same syntax as this class, but is more modular, |
| * enabling substantial customization to how a query is created. |
| */ |
| public class QueryParser extends QueryParserBase { |
| /** The default operator for parsing queries. |
| * Use {@link QueryParserBase#setDefaultOperator} to change it. |
| */ |
| static public enum Operator { OR, AND } |
| |
| /** default split on whitespace behavior */ |
| public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = false; |
| |
| /** Create a query parser. |
| * @param f the default field for query terms. |
| * @param a used to find terms in the query text. |
| */ |
| public QueryParser(String f, Analyzer a) { |
| this(new FastCharStream(new StringReader(""))); |
| init(f, a); |
| } |
| |
| /** |
| * Set to true if phrase queries will be automatically generated |
| * when the analyzer returns more than one term from whitespace |
| * delimited text. |
| * NOTE: this behavior may not be suitable for all languages. |
| * <p> |
| * Set to false if phrase queries should only be generated when |
| * surrounded by double quotes. |
| * <p> |
| * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true |
| * is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>. |
| */ |
| @Override |
| public void setAutoGeneratePhraseQueries(boolean value) { |
| if (splitOnWhitespace == false && value == true) { |
| throw new IllegalArgumentException |
| ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); |
| } |
| this.autoGeneratePhraseQueries = value; |
| } |
| |
| /** |
| * @see #setSplitOnWhitespace(boolean) |
| */ |
| public boolean getSplitOnWhitespace() { |
| return splitOnWhitespace; |
| } |
| |
| /** |
| * Whether query text should be split on whitespace prior to analysis. |
| * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>. |
| * <p> |
| * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true |
| * is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>. |
| */ |
| public void setSplitOnWhitespace(boolean splitOnWhitespace) { |
| if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { |
| throw new IllegalArgumentException |
| ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); |
| } |
| this.splitOnWhitespace = splitOnWhitespace; |
| } |
| |
| private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; |
| private static Set<Integer> disallowedPostMultiTerm |
| = new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); |
| private static boolean allowedPostMultiTerm(int tokenKind) { |
| return disallowedPostMultiTerm.contains(tokenKind) == false; |
| } |
| } |
| |
| PARSER_END(QueryParser) |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_NUM_CHAR: ["0"-"9"] > |
| | <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character |
| | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", |
| "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] |
| | <_ESCAPED_CHAR> ) > |
| | <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) > |
| | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > |
| | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > |
| } |
| |
| <DEFAULT, Range> SKIP : { |
| < <_WHITESPACE>> |
| } |
| |
| <DEFAULT> TOKEN : { |
| <AND: ("AND" | "&&") > |
| | <OR: ("OR" | "||") > |
| | <NOT: ("NOT" | "!") > |
| | <PLUS: "+" > |
| | <MINUS: "-" > |
| | <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> > |
| | <LPAREN: "(" > |
| | <RPAREN: ")" > |
| | <COLON: ":" > |
| | <STAR: "*" > |
| | <CARAT: "^" > : Boost |
| | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> |
| | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > |
| | <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) > |
| | <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) > |
| | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > |
| | <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" > |
| | <RANGEIN_START: "[" > : Range |
| | <RANGEEX_START: "{" > : Range |
| } |
| |
| <Boost> TOKEN : { |
| <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT |
| } |
| |
| <Range> TOKEN : { |
| <RANGE_TO: "TO"> |
| | <RANGEIN_END: "]"> : DEFAULT |
| | <RANGEEX_END: "}"> : DEFAULT |
| | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> |
| | <RANGE_GOOP: (~[ " ", "]", "}" ])+ > |
| } |
| |
| // * Query ::= ( Clause )* |
| // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| |
| int Conjunction() : { |
| int ret = CONJ_NONE; |
| } |
| { |
| [ |
| <AND> { ret = CONJ_AND; } |
| | <OR> { ret = CONJ_OR; } |
| ] |
| { return ret; } |
| } |
| |
| int Modifiers() : { |
| int ret = MOD_NONE; |
| } |
| { |
| [ |
| <PLUS> { ret = MOD_REQ; } |
| | <MINUS> { ret = MOD_NOT; } |
| | <NOT> { ret = MOD_NOT; } |
| ] |
| { return ret; } |
| } |
| |
| // This makes sure that there is no garbage after the query string |
| Query TopLevelQuery(String field) : { |
| Query q; |
| } |
| { |
| q=Query(field) <EOF> |
| { return q; } |
| } |
| |
| Query Query(String field) : |
| { |
| List<BooleanClause> clauses = new ArrayList<BooleanClause>(); |
| Query q, firstQuery=null; |
| int conj, mods; |
| } |
| { |
| ( |
| LOOKAHEAD(2) |
| firstQuery=MultiTerm(field, clauses) |
| | mods=Modifiers() q=Clause(field) |
| { |
| addClause(clauses, CONJ_NONE, mods, q); |
| if (mods == MOD_NONE) { |
| firstQuery = q; |
| } |
| } |
| ) |
| ( |
| LOOKAHEAD(2) |
| MultiTerm(field, clauses) |
| | conj=Conjunction() mods=Modifiers() q=Clause(field) |
| { addClause(clauses, conj, mods, q); } |
| )* |
| { |
| if (clauses.size() == 1 && firstQuery != null) { |
| return firstQuery; |
| } else { |
| return getBooleanQuery(clauses); |
| } |
| } |
| } |
| |
| Query Clause(String field) : { |
| Query q; |
| Token fieldToken=null, boost=null; |
| } |
| { |
| [ |
| LOOKAHEAD(2) |
| ( |
| fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);} |
| | <STAR> <COLON> {field="*";} |
| ) |
| ] |
| ( |
| q=Term(field) |
| | <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] |
| ) |
| { return handleBoost(q, boost); } |
| } |
| |
| Query Term(String field) : { |
| Token term, boost=null, fuzzySlop=null, goop1, goop2; |
| boolean prefix = false; |
| boolean wildcard = false; |
| boolean fuzzy = false; |
| boolean regexp = false; |
| boolean startInc=false; |
| boolean endInc=false; |
| Query q; |
| } |
| { |
| ( |
| ( |
| term=<TERM> |
| | term=<STAR> { wildcard=true; } |
| | term=<PREFIXTERM> { prefix=true; } |
| | term=<WILDTERM> { wildcard=true; } |
| | term=<REGEXPTERM> { regexp=true; } |
| | term=<NUMBER> |
| | term=<BAREOPER> { term.image = term.image.substring(0,1); } |
| ) |
| [ |
| <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ] |
| ] |
| { q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); } |
| |
| | ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> ) |
| ( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> | goop1=<RANGE_TO> ) |
| ( <RANGE_TO> ) |
| ( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> | goop2=<RANGE_TO> ) |
| ( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> ) |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| boolean startOpen=false; |
| boolean endOpen=false; |
| if (goop1.kind == RANGE_QUOTED) { |
| goop1.image = goop1.image.substring(1, goop1.image.length()-1); |
| } else if ("*".equals(goop1.image)) { |
| startOpen=true; |
| } |
| if (goop2.kind == RANGE_QUOTED) { |
| goop2.image = goop2.image.substring(1, goop2.image.length()-1); |
| } else if ("*".equals(goop2.image)) { |
| endOpen=true; |
| } |
| q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); |
| } |
| |
| | term=<QUOTED> |
| [ |
| <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ] |
| ] |
| { q = handleQuotedTerm(field, term, fuzzySlop); } |
| ) |
| { return handleBoost(q, boost); } |
| } |
| |
| /** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */ |
| Query MultiTerm(String field, List<BooleanClause> clauses) : { |
| Token text, whitespace, followingText; |
| Query firstQuery = null; |
| } |
| { |
| text=<TERM> |
| { |
| if (splitOnWhitespace) { |
| firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); |
| addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery); |
| } |
| } |
| // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest |
| LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) |
| ( |
| LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) |
| followingText=<TERM> |
| { |
| if (splitOnWhitespace) { |
| Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false); |
| addClause(clauses, CONJ_NONE, MOD_NONE, q); |
| } else { // build up the text to send to analysis |
| text.image += " " + followingText.image; |
| } |
| } |
| )+ |
| { |
| if (splitOnWhitespace == false) { |
| firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); |
| addMultiTermClauses(clauses, firstQuery); |
| } |
| return firstQuery; |
| } |
| } |