blob: ec87328b1c2d964f42d6439239918939fbdc5ca6 [file] [log] [blame]
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
options {
STATIC= false;
}
PARSER_BEGIN(QueryParser)
package org.apache.lucene.queryParser;
import java.util.Vector;
import java.io.*;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.*;
import org.apache.lucene.search.*;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is <a href="#parse">parse()</a>.
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
* A clause may be prefixed by:
* <ul>
* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
* that the clause is required or prohibited respectively; or
* <li> a term followed by a colon, indicating the field to be searched.
* This enables one to construct queries which search multiple fields.
* </ul>
*
* A clause may be either a:
* <ul>
* <li> a term, indicating all the documents that contain this term; or
* <li> a nested query, enclosed in parentheses. Note that this may be used
* with a <code>+</code>/<code>-</code> prefix to require any of a set of
* terms.
* </ul>
*
* Thus, in BNF, the query grammar is:
* <pre>
* Query ::= ( Clause )*
* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
* </pre>
*/
public class QueryParser {
/** Parses a query string, returning a
* <a href="lucene.search.Query.html">Query</a>.
* @param query the query string to be parsed.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
*/
static public Query parse(String query, String field, Analyzer analyzer)
throws ParseException {
QueryParser parser = new QueryParser(field, analyzer);
return parser.parse(query);
}
Analyzer analyzer;
String field;
int phraseSlop = 0;
/** Constructs a query parser.
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
*/
public QueryParser(String f, Analyzer a) {
this(new StringReader(""));
analyzer = a;
field = f;
}
/** Parses a query string, returning a
* <a href="lucene.search.Query.html">Query</a>.
* @param query the query string to be parsed.
*/
public Query parse(String query) throws ParseException {
ReInit(new StringReader(query));
return Query(field);
}
/** Sets the default slop for phrases. If zero, then exact phrase matches
are required. Zero by default. */
public void setPhraseSlop(int s) { phraseSlop = s; }
/** Gets the default slop for phrases. */
public int getPhraseSlop() { return phraseSlop; }
private void addClause(Vector clauses, int conj, int mods,
Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
if (!c.prohibited)
c.required = true;
}
// We might have been passed a null query; the term might have been
// filtered away by the analyzer.
if (q == null)
return;
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (mods == MOD_NOT);
required = (mods == MOD_REQ);
if (conj == CONJ_AND && !prohibited)
required = true;
clauses.addElement(new BooleanClause(q, required, prohibited));
}
private Query getFieldQuery(String field, Analyzer analyzer, String queryText) {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Vector v = new Vector();
org.apache.lucene.analysis.Token t;
while (true) {
try {
t = source.next();
}
catch (IOException e) {
t = null;
}
if (t == null)
break;
v.addElement(t.termText());
}
if (v.size() == 0)
return null;
else if (v.size() == 1)
return new TermQuery(new Term(field, (String) v.elementAt(0)));
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i=0; i<v.size(); i++) {
q.add(new Term(field, (String) v.elementAt(i)));
}
return q;
}
}
private Query getRangeQuery(String field, Analyzer analyzer, String queryText, boolean inclusive)
{
// Use the analyzer to get all the tokens. There should be 1 or 2.
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
Term[] terms = new Term[2];
org.apache.lucene.analysis.Token t;
for (int i = 0; i < 2; i++)
{
try
{
t = source.next();
}
catch (IOException e)
{
t = null;
}
if (t != null)
{
String text = t.termText();
if (!text.equalsIgnoreCase("NULL"))
{
terms[i] = new Term(field, text);
}
}
}
return new RangeQuery(terms[0], terms[1], inclusive);
}
public static void main(String[] args) throws Exception {
QueryParser qp = new QueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
}
private static final int CONJ_NONE = 0;
private static final int CONJ_AND = 1;
private static final int CONJ_OR = 2;
private static final int MOD_NONE = 0;
private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11;
}
PARSER_END(QueryParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_ALPHA_CHAR: ["a"-"z", "A"-"Z"] >
| <#_NUM_CHAR: ["0"-"9"] >
| <#_ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
| <#_IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] >
| <#_IDENTIFIER: <_ALPHA_CHAR> (<_IDENTIFIER_CHAR>)* >
| <#_NEWLINE: ( "\r\n" | "\r" | "\n" ) >
| <#_WHITESPACE: ( " " | "\t" ) >
| <#_QCHAR: ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) >
| <#_RESTOFLINE: (~["\r", "\n"])* >
}
<DEFAULT> TOKEN : {
<AND: ("AND" | "&&") >
| <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") >
| <PLUS: "+" >
| <MINUS: "-" >
| <LPAREN: "(" >
| <RPAREN: ")" >
| <COLON: ":" >
| <CARAT: "^" >
| <STAR: "*" >
| <QUOTED: "\"" (~["\""])+ "\"">
| <NUMBER: (<_NUM_CHAR>)+ "." (<_NUM_CHAR>)+ >
| <TERM: <_IDENTIFIER_CHAR>
( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "*", "?", "~", "{", "}", "[", "]" ] )* >
| <FUZZY: "~" >
| <WILDTERM: <_IDENTIFIER_CHAR>
( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~", "{", "}", "[", "]" ] )* <_IDENTIFIER_CHAR>>
| <RANGEIN: "[" (~["]"])+ "]">
| <RANGEEX: "{" (~["}"])+ "}">
}
<DEFAULT> SKIP : {
<<_WHITESPACE>>
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
int Conjunction() : {
int ret = CONJ_NONE;
}
{
[
<AND> { ret = CONJ_AND; }
| <OR> { ret = CONJ_OR; }
]
{ return ret; }
}
int Modifiers() : {
int ret = MOD_NONE;
}
{
[
<PLUS> { ret = MOD_REQ; }
| <MINUS> { ret = MOD_NOT; }
| <NOT> { ret = MOD_NOT; }
]
{ return ret; }
}
Query Query(String field) :
{
Vector clauses = new Vector();
Query q;
int conj, mods;
}
{
mods=Modifiers() q=Clause(field)
{ addClause(clauses, CONJ_NONE, mods, q); }
(
conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); }
)*
{
BooleanQuery query = new BooleanQuery();
for (int i = 0; i < clauses.size(); i++)
query.add((BooleanClause)clauses.elementAt(i));
return query;
}
}
Query Clause(String field) : {
Query q;
Token fieldToken=null;
}
{
[
LOOKAHEAD(2)
fieldToken=<TERM> <COLON> { field = fieldToken.image; }
]
(
q=Term(field)
| <LPAREN> q=Query(field) <RPAREN>
)
{
return q;
}
}
Query Term(String field) : {
Token term, boost=null;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean rangein = false;
Query q;
}
{
(
(term=<TERM>|term=<WILDTERM>{wildcard=true;}|term=<NUMBER>)[<STAR>{prefix=true;}|<FUZZY>{fuzzy=true;}][<CARAT> boost=<NUMBER>]
{ if (wildcard)
q = new WildcardQuery(new Term(field, term.image));
else if (prefix)
q = new PrefixQuery(new Term(field, term.image));
else if (fuzzy)
q = new FuzzyQuery(new Term(field, term.image));
else
q = getFieldQuery(field, analyzer, term.image); }
| (term=<RANGEIN>{rangein=true;}|term=<RANGEEX>)
{
q = getRangeQuery(field, analyzer,
term.image.substring(1, term.image.length()-1), rangein);
}
| term=<QUOTED>
{ q = getFieldQuery(field, analyzer,
term.image.substring(1, term.image.length()-1)); }
)
{
if (boost != null) {
float f = (float) 1.0;
try {
f = Float.valueOf(boost.image).floatValue();
}
catch (Exception ignored) { }
if (q instanceof TermQuery)
((TermQuery) q).setBoost(f);
else if (q instanceof PhraseQuery)
((PhraseQuery) q).setBoost(f);
else if (q instanceof MultiTermQuery)
((MultiTermQuery) q).setBoost(f);
}
return q;
}
}