| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* Surround query language parser */ |
| |
| /* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */ |
| |
| |
| options { |
| STATIC=false; |
| JAVA_UNICODE_ESCAPE=true; |
| USER_CHAR_STREAM=true; |
| } |
| |
| PARSER_BEGIN(QueryParser) |
| |
| package org.apache.lucene.queryparser.surround.parser; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.io.StringReader; |
| |
| |
| import org.apache.lucene.analysis.TokenStream; |
| |
| import org.apache.lucene.queryparser.surround.query.SrndQuery; |
| import org.apache.lucene.queryparser.surround.query.FieldsQuery; |
| import org.apache.lucene.queryparser.surround.query.OrQuery; |
| import org.apache.lucene.queryparser.surround.query.AndQuery; |
| import org.apache.lucene.queryparser.surround.query.NotQuery; |
| import org.apache.lucene.queryparser.surround.query.DistanceQuery; |
| import org.apache.lucene.queryparser.surround.query.SrndTermQuery; |
| import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery; |
| import org.apache.lucene.queryparser.surround.query.SrndTruncQuery; |
| |
| /** |
| * This class is generated by JavaCC. The only method that clients should need |
| * to call is {@link #parse parse()}. |
| * |
| |
| * <p>This parser generates queries that make use of position information |
| * (Span queries). It provides positional operators (<code>w</code> and |
| * <code>n</code>) that accept a numeric distance, as well as boolean |
| * operators (<code>and</code>, <code>or</code>, and <code>not</code>, |
| * wildcards (<code>*</code> and <code>?</code>), quoting (with |
| * <code>"</code>), and boosting (via <code>^</code>).</p> |
| |
| * <p>The operators (W, N, AND, OR, NOT) can be expressed lower-cased or |
| * upper-cased, and the non-unary operators (everything but NOT) support |
| * both infix <code>(a AND b AND c)</code> and prefix <code>AND(a, b, |
| * c)</code> notation. </p> |
| |
| * <p>The W and N operators express a positional relationship among their |
| * operands. W is ordered, and N is unordered. The distance is 1 by |
| * default, meaning the operands are adjacent, or may be provided as a |
| * prefix from 2-99. So, for example, 3W(a, b) means that terms a and b |
| * must appear within three positions of each other, or in other words, up |
| * to two terms may appear between a and b. </p> |
| */ |
| |
| public class QueryParser { |
| static final int MINIMUM_PREFIX_LENGTH = 3; |
| static final int MINIMUM_CHARS_IN_TRUNC = 3; |
| static final String TRUNCATION_ERROR_MESSAGE = "Too unrestrictive truncation: "; |
| static final String BOOST_ERROR_MESSAGE = "Cannot handle boost value: "; |
| |
| /* CHECKME: These should be the same as for the tokenizer. How? */ |
| static final char TRUNCATOR = '*'; |
| static final char ANY_CHAR = '?'; |
| static final char FIELD_OPERATOR = ':'; |
| |
| static public SrndQuery parse(String query) throws ParseException { |
| QueryParser parser = new QueryParser(); |
| return parser.parse2(query); |
| } |
| |
| public QueryParser() { |
| this(new FastCharStream(new StringReader(""))); |
| } |
| |
| public SrndQuery parse2(String query) throws ParseException { |
| ReInit(new FastCharStream(new StringReader(query))); |
| try { |
| return TopSrndQuery(); |
| } catch (TokenMgrError tme) { |
| throw new ParseException(tme.getMessage()); |
| } |
| } |
| |
| protected SrndQuery getFieldsQuery( |
| SrndQuery q, ArrayList<String> fieldNames) { |
| /* FIXME: check acceptable subquery: at least one subquery should not be |
| * a fields query. |
| */ |
| return new FieldsQuery(q, fieldNames, FIELD_OPERATOR); |
| } |
| |
| protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) { |
| return new OrQuery(queries, infix, orToken.image); |
| } |
| |
| protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) { |
| return new AndQuery( queries, infix, andToken.image); |
| } |
| |
| protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) { |
| return new NotQuery( queries, notToken.image); |
| } |
| |
| protected static int getOpDistance(String distanceOp) { |
| /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ |
| return distanceOp.length() == 1 |
| ? 1 |
| : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); |
| } |
| |
| protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) |
| throws ParseException { |
| String m = distq.distanceSubQueryNotAllowed(); |
| if (m != null) { |
| throw new ParseException("Operator " + opName + ": " + m); |
| } |
| } |
| |
| protected SrndQuery getDistanceQuery( |
| List<SrndQuery> queries, |
| boolean infix, |
| Token dToken, |
| boolean ordered) throws ParseException { |
| DistanceQuery dq = new DistanceQuery(queries, |
| infix, |
| getOpDistance(dToken.image), |
| dToken.image, |
| ordered); |
| checkDistanceSubQueries(dq, dToken.image); |
| return dq; |
| } |
| |
| protected SrndQuery getTermQuery( |
| String term, boolean quoted) { |
| return new SrndTermQuery(term, quoted); |
| } |
| |
| protected boolean allowedSuffix(String suffixed) { |
| return (suffixed.length() - 1) >= MINIMUM_PREFIX_LENGTH; |
| } |
| |
| protected SrndQuery getPrefixQuery( |
| String prefix, boolean quoted) { |
| return new SrndPrefixQuery(prefix, quoted, TRUNCATOR); |
| } |
| |
| protected boolean allowedTruncation(String truncated) { |
| /* At least 3 normal characters needed. */ |
| int nrNormalChars = 0; |
| for (int i = 0; i < truncated.length(); i++) { |
| char c = truncated.charAt(i); |
| if ((c != TRUNCATOR) && (c != ANY_CHAR)) { |
| nrNormalChars++; |
| } |
| } |
| return nrNormalChars >= MINIMUM_CHARS_IN_TRUNC; |
| } |
| |
| protected SrndQuery getTruncQuery(String truncated) { |
| return new SrndTruncQuery(truncated, TRUNCATOR, ANY_CHAR); |
| } |
| } |
| |
| PARSER_END(QueryParser) |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_NUM_CHAR: ["0"-"9"] > |
| | <#_TERM_CHAR: /* everything except whitespace and operators */ |
| ( ~[ " ", "\t", "\n", "\r", |
| ",", "?", "*", "(", ")", ":", "^", "\""] |
| ) > |
| | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > |
| | <#_STAR: "*" > /* term truncation */ |
| | <#_ONE_CHAR: "?" > /* precisely one character in a term */ |
| /* 2..99 prefix for distance operators */ |
| | <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))> |
| } |
| |
| <DEFAULT> SKIP : { |
| < <_WHITESPACE>> |
| } |
| |
| /* Operator tokens (in increasing order of precedence): */ |
| <DEFAULT> TOKEN : |
| { |
| <OR: "OR" | "or"> |
| | <AND: "AND" | "and"> |
| | <NOT: "NOT" | "not"> |
| | <W: (<_DISTOP_NUM>)? ("W"|"w")> |
| | <N: (<_DISTOP_NUM>)? ("N"|"n")> |
| /* These are excluded in _TERM_CHAR: */ |
| | <LPAREN: "("> |
| | <RPAREN: ")"> |
| | <COMMA: ","> |
| | <COLON: ":"> |
| | <CARAT: "^"> : Boost |
| /* Literal non empty term between single quotes, |
| * escape quoted quote or backslash by backslash. |
| * Evt. truncated. |
| */ |
| | <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>> |
| | <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\""> |
| | <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>> |
| | <TRUNCTERM: (<_TERM_CHAR>)+ |
| (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */ |
| (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )* |
| > |
| | <TERM: (<_TERM_CHAR>)+> |
| } |
| |
| <Boost> TOKEN : { |
| <NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT |
| } |
| |
| |
| SrndQuery TopSrndQuery() : { |
| SrndQuery q; |
| }{ |
| q = FieldsQuery() |
| <EOF> |
| {return q;} |
| } |
| |
| |
| SrndQuery FieldsQuery() : { |
| SrndQuery q; |
| ArrayList<String> fieldNames; |
| }{ |
| fieldNames = OptionalFields() |
| q = OrQuery() |
| {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} |
| } |
| |
| |
| ArrayList<String> OptionalFields() : { |
| Token fieldName; |
| ArrayList<String> fieldNames = null; |
| }{ |
| ( LOOKAHEAD(2) // to the colon |
| fieldName = <TERM> |
| <COLON> { |
| if (fieldNames == null) { |
| fieldNames = new ArrayList<String>(); |
| } |
| fieldNames.add(fieldName.image); |
| } |
| )* |
| {return fieldNames;} |
| } |
| |
| |
| SrndQuery OrQuery() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries = null; |
| Token oprt = null; |
| }{ |
| q = AndQuery() |
| ( oprt = <OR> { /* keep only last used operator */ |
| if (queries == null) { |
| queries = new ArrayList<SrndQuery>(); |
| queries.add(q); |
| } |
| } |
| q = AndQuery() { |
| queries.add(q); |
| } |
| )* |
| {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} |
| } |
| |
| |
| SrndQuery AndQuery() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries = null; |
| Token oprt = null; |
| }{ |
| q = NotQuery() |
| ( oprt = <AND> { /* keep only last used operator */ |
| if (queries == null) { |
| queries = new ArrayList<SrndQuery>(); |
| queries.add(q); |
| } |
| } |
| q = NotQuery() { |
| queries.add(q); |
| } |
| )* |
| {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} |
| } |
| |
| |
| SrndQuery NotQuery() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries = null; |
| Token oprt = null; |
| }{ |
| q = NQuery() |
| ( oprt = <NOT> { /* keep only last used operator */ |
| if (queries == null) { |
| queries = new ArrayList<SrndQuery>(); |
| queries.add(q); |
| } |
| } |
| q = NQuery() { |
| queries.add(q); |
| } |
| )* |
| {return (queries == null) ? q : getNotQuery(queries, oprt);} |
| } |
| |
| |
| SrndQuery NQuery() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries; |
| Token dt; |
| }{ |
| q = WQuery() |
| ( dt = <N> { |
| queries = new ArrayList<SrndQuery>(); |
| queries.add(q); /* left associative */ |
| } |
| q = WQuery() { |
| queries.add(q); |
| q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); |
| } |
| )* |
| {return q;} |
| } |
| |
| |
| SrndQuery WQuery() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries; |
| Token wt; |
| }{ |
| q = PrimaryQuery() |
| ( wt = <W> { |
| queries = new ArrayList<SrndQuery>(); |
| queries.add(q); /* left associative */ |
| } |
| q = PrimaryQuery() { |
| queries.add(q); |
| q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); |
| } |
| )* |
| {return q;} |
| } |
| |
| |
| SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */ |
| SrndQuery q; |
| }{ |
| ( <LPAREN> q = FieldsQuery() <RPAREN> |
| | q = PrefixOperatorQuery() |
| | q = SimpleTerm() |
| ) |
| OptionalWeights(q) |
| {return q;} |
| } |
| |
| |
| SrndQuery PrefixOperatorQuery() : { |
| Token oprt; |
| List<SrndQuery> queries; |
| }{ |
| ( oprt = <OR> /* prefix OR */ |
| queries = FieldsQueryList() |
| {return getOrQuery(queries, false /* not infix */, oprt);} |
| |
| | oprt = <AND> /* prefix AND */ |
| queries = FieldsQueryList() |
| {return getAndQuery(queries, false /* not infix */, oprt);} |
| |
| | oprt = <N> /* prefix N */ |
| queries = FieldsQueryList() |
| {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);} |
| |
| | oprt = <W> /* prefix W */ |
| queries = FieldsQueryList() |
| {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);} |
| ) |
| } |
| |
| |
| List<SrndQuery> FieldsQueryList() : { |
| SrndQuery q; |
| ArrayList<SrndQuery> queries = new ArrayList<SrndQuery>(); |
| }{ |
| <LPAREN> |
| q = FieldsQuery() {queries.add(q);} |
| (<COMMA> q = FieldsQuery() {queries.add(q);})+ |
| <RPAREN> |
| {return queries;} |
| } |
| |
| |
| SrndQuery SimpleTerm() : { |
| Token term; |
| }{ |
| ( term=<TERM> |
| {return getTermQuery(term.image, false /* not quoted */);} |
| |
| | term=<QUOTED> |
| {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);} |
| |
| | term=<SUFFIXTERM> { /* ending in * */ |
| if (! allowedSuffix(term.image)) { |
| throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); |
| } |
| return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */); |
| } |
| |
| | term=<TRUNCTERM> { /* with at least one * or ? */ |
| if (! allowedTruncation(term.image)) { |
| throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); |
| } |
| return getTruncQuery(term.image); |
| } |
| |
| | term=<TRUNCQUOTED> { /* eg. "9b-b,m"* */ |
| if ((term.image.length() - 3) < MINIMUM_PREFIX_LENGTH) { |
| throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); |
| } |
| return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */); |
| } |
| ) |
| } |
| |
| |
| void OptionalWeights(SrndQuery q) : { |
| Token weight=null; |
| }{ |
| ( <CARAT> weight=<NUMBER> { |
| float f; |
| try { |
| f = Float.parseFloat(weight.image); |
| } catch (Exception floatExc) { |
| throw new ParseException(BOOST_ERROR_MESSAGE + weight.image + " (" + floatExc + ")"); |
| } |
| if (f <= 0.0) { |
| throw new ParseException(BOOST_ERROR_MESSAGE + weight.image); |
| } |
| q.setWeight(f * q.getWeight()); /* left associative, fwiw */ |
| } |
| )* |
| } |
| |