blob: 787ed16bde49f1f6aad30810de7f60db5a09b98d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Surround query language parser */
/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
USER_CHAR_STREAM=true;
}
PARSER_BEGIN(QueryParser)
package org.apache.lucene.queryparser.surround.parser;
import java.util.ArrayList;
import java.util.List;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.queryparser.surround.query.SrndQuery;
import org.apache.lucene.queryparser.surround.query.FieldsQuery;
import org.apache.lucene.queryparser.surround.query.OrQuery;
import org.apache.lucene.queryparser.surround.query.AndQuery;
import org.apache.lucene.queryparser.surround.query.NotQuery;
import org.apache.lucene.queryparser.surround.query.DistanceQuery;
import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is {@link #parse parse()}.
*
* <p>This parser generates queries that make use of position information
* (Span queries). It provides positional operators (<code>w</code> and
* <code>n</code>) that accept a numeric distance, as well as boolean
* operators (<code>and</code>, <code>or</code>, and <code>not</code>,
* wildcards (<code>*</code> and <code>?</code>), quoting (with
* <code>"</code>), and boosting (via <code>^</code>).</p>
* <p>The operators (W, N, AND, OR, NOT) can be expressed lower-cased or
* upper-cased, and the non-unary operators (everything but NOT) support
* both infix <code>(a AND b AND c)</code> and prefix <code>AND(a, b,
* c)</code> notation. </p>
* <p>The W and N operators express a positional relationship among their
* operands. W is ordered, and N is unordered. The distance is 1 by
* default, meaning the operands are adjacent, or may be provided as a
* prefix from 2-99. So, for example, 3W(a, b) means that terms a and b
* must appear within three positions of each other, or in other words, up
* to two terms may appear between a and b. </p>
*/
public class QueryParser {
static final int MINIMUM_PREFIX_LENGTH = 3;
static final int MINIMUM_CHARS_IN_TRUNC = 3;
static final String TRUNCATION_ERROR_MESSAGE = "Too unrestrictive truncation: ";
static final String BOOST_ERROR_MESSAGE = "Cannot handle boost value: ";
/* CHECKME: These should be the same as for the tokenizer. How? */
static final char TRUNCATOR = '*';
static final char ANY_CHAR = '?';
static final char FIELD_OPERATOR = ':';
static public SrndQuery parse(String query) throws ParseException {
QueryParser parser = new QueryParser();
return parser.parse2(query);
}
public QueryParser() {
this(new FastCharStream(new StringReader("")));
}
public SrndQuery parse2(String query) throws ParseException {
ReInit(new FastCharStream(new StringReader(query)));
try {
return TopSrndQuery();
} catch (TokenMgrError tme) {
throw new ParseException(tme.getMessage());
}
}
protected SrndQuery getFieldsQuery(
SrndQuery q, ArrayList<String> fieldNames) {
/* FIXME: check acceptable subquery: at least one subquery should not be
* a fields query.
*/
return new FieldsQuery(q, fieldNames, FIELD_OPERATOR);
}
protected SrndQuery getOrQuery(List<SrndQuery> queries, boolean infix, Token orToken) {
return new OrQuery(queries, infix, orToken.image);
}
protected SrndQuery getAndQuery(List<SrndQuery> queries, boolean infix, Token andToken) {
return new AndQuery( queries, infix, andToken.image);
}
protected SrndQuery getNotQuery(List<SrndQuery> queries, Token notToken) {
return new NotQuery( queries, notToken.image);
}
protected static int getOpDistance(String distanceOp) {
/* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
return distanceOp.length() == 1
? 1
: Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
}
protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
throws ParseException {
String m = distq.distanceSubQueryNotAllowed();
if (m != null) {
throw new ParseException("Operator " + opName + ": " + m);
}
}
protected SrndQuery getDistanceQuery(
List<SrndQuery> queries,
boolean infix,
Token dToken,
boolean ordered) throws ParseException {
DistanceQuery dq = new DistanceQuery(queries,
infix,
getOpDistance(dToken.image),
dToken.image,
ordered);
checkDistanceSubQueries(dq, dToken.image);
return dq;
}
protected SrndQuery getTermQuery(
String term, boolean quoted) {
return new SrndTermQuery(term, quoted);
}
protected boolean allowedSuffix(String suffixed) {
return (suffixed.length() - 1) >= MINIMUM_PREFIX_LENGTH;
}
protected SrndQuery getPrefixQuery(
String prefix, boolean quoted) {
return new SrndPrefixQuery(prefix, quoted, TRUNCATOR);
}
protected boolean allowedTruncation(String truncated) {
/* At least 3 normal characters needed. */
int nrNormalChars = 0;
for (int i = 0; i < truncated.length(); i++) {
char c = truncated.charAt(i);
if ((c != TRUNCATOR) && (c != ANY_CHAR)) {
nrNormalChars++;
}
}
return nrNormalChars >= MINIMUM_CHARS_IN_TRUNC;
}
protected SrndQuery getTruncQuery(String truncated) {
return new SrndTruncQuery(truncated, TRUNCATOR, ANY_CHAR);
}
}
PARSER_END(QueryParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
| <#_TERM_CHAR: /* everything except whitespace and operators */
( ~[ " ", "\t", "\n", "\r",
",", "?", "*", "(", ")", ":", "^", "\""]
) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
| <#_STAR: "*" > /* term truncation */
| <#_ONE_CHAR: "?" > /* precisely one character in a term */
/* 2..99 prefix for distance operators */
| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))>
}
<DEFAULT> SKIP : {
< <_WHITESPACE>>
}
/* Operator tokens (in increasing order of precedence): */
<DEFAULT> TOKEN :
{
<OR: "OR" | "or">
| <AND: "AND" | "and">
| <NOT: "NOT" | "not">
| <W: (<_DISTOP_NUM>)? ("W"|"w")>
| <N: (<_DISTOP_NUM>)? ("N"|"n")>
/* These are excluded in _TERM_CHAR: */
| <LPAREN: "(">
| <RPAREN: ")">
| <COMMA: ",">
| <COLON: ":">
| <CARAT: "^"> : Boost
/* Literal non empty term between single quotes,
* escape quoted quote or backslash by backslash.
* Evt. truncated.
*/
| <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>>
| <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\"">
| <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>>
| <TRUNCTERM: (<_TERM_CHAR>)+
(<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */
(<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )*
>
| <TERM: (<_TERM_CHAR>)+>
}
<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT
}
SrndQuery TopSrndQuery() : {
SrndQuery q;
}{
q = FieldsQuery()
<EOF>
{return q;}
}
SrndQuery FieldsQuery() : {
SrndQuery q;
ArrayList<String> fieldNames;
}{
fieldNames = OptionalFields()
q = OrQuery()
{return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
}
ArrayList<String> OptionalFields() : {
Token fieldName;
ArrayList<String> fieldNames = null;
}{
( LOOKAHEAD(2) // to the colon
fieldName = <TERM>
<COLON> {
if (fieldNames == null) {
fieldNames = new ArrayList<String>();
}
fieldNames.add(fieldName.image);
}
)*
{return fieldNames;}
}
SrndQuery OrQuery() : {
SrndQuery q;
ArrayList<SrndQuery> queries = null;
Token oprt = null;
}{
q = AndQuery()
( oprt = <OR> { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList<SrndQuery>();
queries.add(q);
}
}
q = AndQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
}
SrndQuery AndQuery() : {
SrndQuery q;
ArrayList<SrndQuery> queries = null;
Token oprt = null;
}{
q = NotQuery()
( oprt = <AND> { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList<SrndQuery>();
queries.add(q);
}
}
q = NotQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
}
SrndQuery NotQuery() : {
SrndQuery q;
ArrayList<SrndQuery> queries = null;
Token oprt = null;
}{
q = NQuery()
( oprt = <NOT> { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList<SrndQuery>();
queries.add(q);
}
}
q = NQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getNotQuery(queries, oprt);}
}
SrndQuery NQuery() : {
SrndQuery q;
ArrayList<SrndQuery> queries;
Token dt;
}{
q = WQuery()
( dt = <N> {
queries = new ArrayList<SrndQuery>();
queries.add(q); /* left associative */
}
q = WQuery() {
queries.add(q);
q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
}
)*
{return q;}
}
SrndQuery WQuery() : {
SrndQuery q;
ArrayList<SrndQuery> queries;
Token wt;
}{
q = PrimaryQuery()
( wt = <W> {
queries = new ArrayList<SrndQuery>();
queries.add(q); /* left associative */
}
q = PrimaryQuery() {
queries.add(q);
q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
}
)*
{return q;}
}
SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */
SrndQuery q;
}{
( <LPAREN> q = FieldsQuery() <RPAREN>
| q = PrefixOperatorQuery()
| q = SimpleTerm()
)
OptionalWeights(q)
{return q;}
}
SrndQuery PrefixOperatorQuery() : {
Token oprt;
List<SrndQuery> queries;
}{
( oprt = <OR> /* prefix OR */
queries = FieldsQueryList()
{return getOrQuery(queries, false /* not infix */, oprt);}
| oprt = <AND> /* prefix AND */
queries = FieldsQueryList()
{return getAndQuery(queries, false /* not infix */, oprt);}
| oprt = <N> /* prefix N */
queries = FieldsQueryList()
{return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
| oprt = <W> /* prefix W */
queries = FieldsQueryList()
{return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
)
}
List<SrndQuery> FieldsQueryList() : {
SrndQuery q;
ArrayList<SrndQuery> queries = new ArrayList<SrndQuery>();
}{
<LPAREN>
q = FieldsQuery() {queries.add(q);}
(<COMMA> q = FieldsQuery() {queries.add(q);})+
<RPAREN>
{return queries;}
}
SrndQuery SimpleTerm() : {
Token term;
}{
( term=<TERM>
{return getTermQuery(term.image, false /* not quoted */);}
| term=<QUOTED>
{return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
| term=<SUFFIXTERM> { /* ending in * */
if (! allowedSuffix(term.image)) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);
}
| term=<TRUNCTERM> { /* with at least one * or ? */
if (! allowedTruncation(term.image)) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getTruncQuery(term.image);
}
| term=<TRUNCQUOTED> { /* eg. "9b-b,m"* */
if ((term.image.length() - 3) < MINIMUM_PREFIX_LENGTH) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);
}
)
}
void OptionalWeights(SrndQuery q) : {
Token weight=null;
}{
( <CARAT> weight=<NUMBER> {
float f;
try {
f = Float.parseFloat(weight.image);
} catch (Exception floatExc) {
throw new ParseException(BOOST_ERROR_MESSAGE + weight.image + " (" + floatExc + ")");
}
if (f <= 0.0) {
throw new ParseException(BOOST_ERROR_MESSAGE + weight.image);
}
q.setWeight(f * q.getWeight()); /* left associative, fwiw */
}
)*
}