| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| options { |
| STATIC=false; |
| JAVA_UNICODE_ESCAPE=true; |
| USER_CHAR_STREAM=true; |
| } |
| |
| PARSER_BEGIN(QueryParser) |
| |
| package org.apache.solr.parser; |
| |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Set; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.Query; |
| import org.apache.solr.search.SyntaxError; |
| import org.apache.solr.search.QParser; |
| |
| |
| public class QueryParser extends SolrQueryParserBase { |
| /** The default operator for parsing queries. |
| */ |
| static public enum Operator { OR, AND } |
| |
| /** default split on whitespace behavior */ |
| public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = false; |
| |
| public QueryParser(String defaultField, QParser parser) { |
| this(new FastCharStream(new StringReader(""))); |
| init(defaultField, parser); |
| } |
| |
| /** |
| * @see #setSplitOnWhitespace(boolean) |
| */ |
| public boolean getSplitOnWhitespace() { |
| return splitOnWhitespace; |
| } |
| |
| /** |
| * Whether query text should be split on whitespace prior to analysis. |
| * Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>. |
| */ |
| public void setSplitOnWhitespace(boolean splitOnWhitespace) { |
| this.splitOnWhitespace = splitOnWhitespace; |
| } |
| |
| private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; |
| private static Set<Integer> disallowedPostMultiTerm |
| = new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); |
| private static boolean allowedPostMultiTerm(int tokenKind) { |
| return disallowedPostMultiTerm.contains(tokenKind) == false; |
| } |
| |
| @Override |
| protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, |
| boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries) |
| throws SyntaxError { |
| setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()); |
| // Don't auto-quote graph-aware field queries |
| boolean treatAsQuoted = getSplitOnWhitespace() |
| ? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted; |
| return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries); |
| } |
| } |
| |
| PARSER_END(QueryParser) |
| |
| TOKEN_MGR_DECLS : { |
| int commentNestingDepth ; |
| } |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_NUM_CHAR: ["0"-"9"] > |
| | <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character |
| | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", |
| "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] |
| | <_ESCAPED_CHAR> ) > |
| | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > |
| | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > |
| | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > |
| | <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) > |
| } |
| |
| <DEFAULT, COMMENT> SKIP : { |
| < <_WHITESPACE>> |
| | "/*" {commentNestingDepth++;} : COMMENT |
| } |
| |
| <COMMENT> SKIP : { |
| // trying to avoid matching end-of-comment in string leads to more problems (incorrectly thinking we are in a string due |
| // to the simplistic matching in this state. |
| // < ("\"" (<_QUOTED_CHAR>)* "\"") > |
| "*/" { commentNestingDepth -= 1; SwitchTo( commentNestingDepth==0 ? DEFAULT : COMMENT ); } |
| | < ~[]> |
| } |
| |
| <Range> SKIP : { |
| < <_WHITESPACE>> |
| } |
| |
| <DEFAULT> TOKEN : { |
| <AND: ("AND" | "&&") > |
| | <OR: ("OR" | "||") > |
| | <NOT: ("NOT" | "!") > |
| | <PLUS: "+" > |
| | <MINUS: "-" > |
| | <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> > |
| | <LPAREN: "(" > |
| | <RPAREN: ")" > |
| | <COLON: ":" > |
| | <STAR: "*" > |
| | <CARAT: "^" > : Boost |
| | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> |
| | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > |
| | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > |
| | <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) > |
| | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > |
| | <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" > |
| | <RANGEIN_START: "[" > : Range |
| | <RANGEEX_START: "{" > : Range |
| // TODO: consider using token states instead of inlining SQUOTED |
| // | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'"> |
| // | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > |
| | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > |
| | <FILTER: "filter(" > |
| } |
| |
| <Boost> TOKEN : { |
| <NUMBER: ("=")?("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT |
| } |
| |
| <Range> TOKEN : { |
| <RANGE_TO: "TO"> |
| | <RANGEIN_END: "]"> : DEFAULT |
| | <RANGEEX_END: "}"> : DEFAULT |
| | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> |
| | <RANGE_GOOP: (~[ " ", "]", "}" ])+ > |
| } |
| |
| // * Query ::= ( Clause )* |
| // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| |
| int Conjunction() : { |
| int ret = CONJ_NONE; |
| } |
| { |
| [ |
| <AND> { ret = CONJ_AND; } |
| | <OR> { ret = CONJ_OR; } |
| ] |
| { return ret; } |
| } |
| |
| int Modifiers() : { |
| int ret = MOD_NONE; |
| } |
| { |
| [ |
| <PLUS> { ret = MOD_REQ; } |
| | <MINUS> { ret = MOD_NOT; } |
| | <NOT> { ret = MOD_NOT; } |
| ] |
| { return ret; } |
| } |
| |
| // This makes sure that there is no garbage after the query string |
| Query TopLevelQuery(String field) throws SyntaxError : { |
| Query q; |
| } |
| { |
| q=Query(field) <EOF> |
| { |
| return q; |
| } |
| } |
| |
| Query Query(String field) throws SyntaxError : |
| { |
| List<BooleanClause> clauses = new ArrayList<BooleanClause>(); |
| Query q; |
| int conj, mods; |
| } |
| { |
| ( |
| LOOKAHEAD(2) |
| MultiTerm(field, clauses) |
| | mods=Modifiers() q=Clause(field) |
| { addClause(clauses, CONJ_NONE, mods, q); } |
| ) |
| ( |
| LOOKAHEAD(2) |
| MultiTerm(field, clauses) |
| | conj=Conjunction() mods=Modifiers() q=Clause(field) |
| { addClause(clauses, conj, mods, q); } |
| )* |
| { |
| if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) { |
| Query firstQuery = clauses.get(0).getQuery(); |
| if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) { |
| return rawToNormal(firstQuery); |
| } |
| } |
| return getBooleanQuery(clauses); |
| } |
| } |
| |
| Query Clause(String field) throws SyntaxError : { |
| Query q; |
| Token fieldToken=null, boost=null; |
| Token localParams=null; |
| int flags = 0; |
| } |
| { |
| [ |
| LOOKAHEAD(2) |
| ( |
| fieldToken=<TERM> <COLON> { field = discardEscapeChar(fieldToken.image); } |
| | <STAR> <COLON> { field = "*"; } |
| ) |
| ] |
| ( |
| q=Term(field) |
| | <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] |
| | (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] { q=getFilter(q); restoreFlags(flags); } ) |
| | (localParams = <LPARAMS> [ <CARAT> boost=<NUMBER> ] { q=getLocalParams(field, localParams.image); } ) |
| ) |
| { return handleBoost(q, boost); } |
| } |
| |
| Query Term(String field) throws SyntaxError : { |
| Token term, boost=null, fuzzySlop=null, goop1, goop2; |
| boolean prefix = false; |
| boolean wildcard = false; |
| boolean fuzzy = false; |
| boolean regexp = false; |
| boolean startInc=false; |
| boolean endInc=false; |
| Query q; |
| } |
| { |
| ( |
| ( |
| term=<TERM> |
| | term=<STAR> { wildcard=true; } |
| | term=<PREFIXTERM> { prefix=true; } |
| | term=<WILDTERM> { wildcard=true; } |
| | term=<REGEXPTERM> { regexp=true; } |
| | term=<NUMBER> |
| | term=<BAREOPER> { term.image = term.image.substring(0,1); } |
| ) |
| [ |
| <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ] |
| ] |
| { q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); } |
| |
| | ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> ) |
| ( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> | goop1=<RANGE_TO> ) |
| ( <RANGE_TO> ) |
| ( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> | goop2=<RANGE_TO> ) |
| ( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> ) |
| [ <CARAT> boost=<NUMBER> ] |
| { |
| boolean startOpen=false; |
| boolean endOpen=false; |
| if (goop1.kind == RANGE_QUOTED) { |
| goop1.image = goop1.image.substring(1, goop1.image.length()-1); |
| } else if ("*".equals(goop1.image)) { |
| startOpen=true; |
| } |
| if (goop2.kind == RANGE_QUOTED) { |
| goop2.image = goop2.image.substring(1, goop2.image.length()-1); |
| } else if ("*".equals(goop2.image)) { |
| endOpen=true; |
| } |
| q = getRangeQuery(getField(field), |
| startOpen ? null : discardEscapeChar(goop1.image), |
| endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); |
| } |
| | term=<QUOTED> |
| [ |
| <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] |
| | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ] |
| ] |
| { q = handleQuotedTerm(getField(field), term, fuzzySlop); } |
| ) |
| { return handleBoost(q, boost); } |
| } |
| |
| void MultiTerm(String field, List<BooleanClause> clauses) throws SyntaxError : { |
| Token text; |
| List<String> terms = null; |
| } |
| { |
| text=<TERM> |
| { |
| if (splitOnWhitespace) { |
| Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); |
| addClause(clauses, CONJ_NONE, MOD_NONE, q); |
| } else { |
| terms = new ArrayList<String>(); |
| terms.add(discardEscapeChar(text.image)); |
| } |
| } |
| // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest |
| LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) |
| ( |
| LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) |
| text=<TERM> |
| { |
| if (splitOnWhitespace) { |
| Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); |
| addClause(clauses, CONJ_NONE, MOD_NONE, q); |
| } else { |
| terms.add(discardEscapeChar(text.image)); |
| } |
| } |
| )+ |
| { |
| if (splitOnWhitespace == false) { |
| Query q = getFieldQuery(getField(field), terms, true); |
| addMultiTermClause(clauses, q); |
| } |
| } |
| } |