| /* ==================================================================== |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 2001 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Apache" and "Apache Software Foundation" and |
| * "Apache Lucene" must not be used to endorse or promote products |
| * derived from this software without prior written permission. For |
| * written permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * "Apache Lucene", nor may "Apache" appear in their name, without |
| * prior written permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| |
| options { |
| STATIC= false; |
| } |
| |
| PARSER_BEGIN(QueryParser) |
| |
| package org.apache.lucene.queryParser; |
| |
| import java.util.Vector; |
| import java.io.*; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.analysis.*; |
| import org.apache.lucene.search.*; |
| |
| /** |
| * This class is generated by JavaCC. The only method that clients should need |
| * to call is <a href="#parse">parse()</a>. |
| * |
| * The syntax for query strings is as follows: |
| * A Query is a series of clauses. |
| * A clause may be prefixed by: |
| * <ul> |
| * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating |
| * that the clause is required or prohibited respectively; or |
| * <li> a term followed by a colon, indicating the field to be searched. |
| * This enables one to construct queries which search multiple fields. |
| * </ul> |
| * |
| * A clause may be either a: |
| * <ul> |
| * <li> a term, indicating all the documents that contain this term; or |
| * <li> a nested query, enclosed in parentheses. Note that this may be used |
| * with a <code>+</code>/<code>-</code> prefix to require any of a set of |
| * terms. |
| * </ul> |
| * |
| * Thus, in BNF, the query grammar is: |
| * <pre> |
| * Query ::= ( Clause )* |
| * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| * </pre> |
| */ |
| |
| public class QueryParser { |
| /** Parses a query string, returning a |
| * <a href="lucene.search.Query.html">Query</a>. |
| * @param query the query string to be parsed. |
| * @param field the default field for query terms. |
| * @param analyzer used to find terms in the query text. |
| */ |
| static public Query parse(String query, String field, Analyzer analyzer) |
| throws ParseException { |
| QueryParser parser = new QueryParser(field, analyzer); |
| return parser.parse(query); |
| } |
| |
| Analyzer analyzer; |
| String field; |
| int phraseSlop = 0; |
| |
| /** Constructs a query parser. |
| * @param field the default field for query terms. |
| * @param analyzer used to find terms in the query text. |
| */ |
| public QueryParser(String f, Analyzer a) { |
| this(new StringReader("")); |
| analyzer = a; |
| field = f; |
| } |
| |
| /** Parses a query string, returning a |
| * <a href="lucene.search.Query.html">Query</a>. |
| * @param query the query string to be parsed. |
| */ |
| public Query parse(String query) throws ParseException { |
| ReInit(new StringReader(query)); |
| return Query(field); |
| } |
| |
| /** Sets the default slop for phrases. If zero, then exact phrase matches |
| are required. Zero by default. */ |
| public void setPhraseSlop(int s) { phraseSlop = s; } |
| /** Gets the default slop for phrases. */ |
| public int getPhraseSlop() { return phraseSlop; } |
| |
| private void addClause(Vector clauses, int conj, int mods, |
| Query q) { |
| boolean required, prohibited; |
| |
| // If this term is introduced by AND, make the preceding term required, |
| // unless it's already prohibited |
| if (conj == CONJ_AND) { |
| BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); |
| if (!c.prohibited) |
| c.required = true; |
| } |
| |
| // We might have been passed a null query; the term might have been |
| // filtered away by the analyzer. |
| if (q == null) |
| return; |
| |
| // We set REQUIRED if we're introduced by AND or +; PROHIBITED if |
| // introduced by NOT or -; make sure not to set both. |
| prohibited = (mods == MOD_NOT); |
| required = (mods == MOD_REQ); |
| if (conj == CONJ_AND && !prohibited) |
| required = true; |
| clauses.addElement(new BooleanClause(q, required, prohibited)); |
| } |
| |
| private Query getFieldQuery(String field, Analyzer analyzer, String queryText) { |
| // Use the analyzer to get all the tokens, and then build a TermQuery, |
| // PhraseQuery, or nothing based on the term count |
| |
| TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); |
| Vector v = new Vector(); |
| org.apache.lucene.analysis.Token t; |
| |
| while (true) { |
| try { |
| t = source.next(); |
| } |
| catch (IOException e) { |
| t = null; |
| } |
| if (t == null) |
| break; |
| v.addElement(t.termText()); |
| } |
| if (v.size() == 0) |
| return null; |
| else if (v.size() == 1) |
| return new TermQuery(new Term(field, (String) v.elementAt(0))); |
| else { |
| PhraseQuery q = new PhraseQuery(); |
| q.setSlop(phraseSlop); |
| for (int i=0; i<v.size(); i++) { |
| q.add(new Term(field, (String) v.elementAt(i))); |
| } |
| return q; |
| } |
| } |
| |
| private Query getRangeQuery(String field, Analyzer analyzer, String queryText, boolean inclusive) |
| { |
| // Use the analyzer to get all the tokens. There should be 1 or 2. |
| TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); |
| Term[] terms = new Term[2]; |
| org.apache.lucene.analysis.Token t; |
| |
| for (int i = 0; i < 2; i++) |
| { |
| try |
| { |
| t = source.next(); |
| } |
| catch (IOException e) |
| { |
| t = null; |
| } |
| if (t != null) |
| { |
| String text = t.termText(); |
| if (!text.equalsIgnoreCase("NULL")) |
| { |
| terms[i] = new Term(field, text); |
| } |
| } |
| } |
| return new RangeQuery(terms[0], terms[1], inclusive); |
| } |
| |
| public static void main(String[] args) throws Exception { |
| QueryParser qp = new QueryParser("field", |
| new org.apache.lucene.analysis.SimpleAnalyzer()); |
| Query q = qp.parse(args[0]); |
| System.out.println(q.toString("field")); |
| } |
| |
| private static final int CONJ_NONE = 0; |
| private static final int CONJ_AND = 1; |
| private static final int CONJ_OR = 2; |
| |
| private static final int MOD_NONE = 0; |
| private static final int MOD_NOT = 10; |
| private static final int MOD_REQ = 11; |
| } |
| |
| PARSER_END(QueryParser) |
| |
| /* ***************** */ |
| /* Token Definitions */ |
| /* ***************** */ |
| |
| <*> TOKEN : { |
| <#_ALPHA_CHAR: ["a"-"z", "A"-"Z"] > |
| | <#_NUM_CHAR: ["0"-"9"] > |
| | <#_ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] > |
| | <#_IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] > |
| | <#_IDENTIFIER: <_ALPHA_CHAR> (<_IDENTIFIER_CHAR>)* > |
| | <#_NEWLINE: ( "\r\n" | "\r" | "\n" ) > |
| | <#_WHITESPACE: ( " " | "\t" ) > |
| | <#_QCHAR: ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) > |
| | <#_RESTOFLINE: (~["\r", "\n"])* > |
| } |
| |
| <DEFAULT> TOKEN : { |
| <AND: ("AND" | "&&") > |
| | <OR: ("OR" | "||") > |
| | <NOT: ("NOT" | "!") > |
| | <PLUS: "+" > |
| | <MINUS: "-" > |
| | <LPAREN: "(" > |
| | <RPAREN: ")" > |
| | <COLON: ":" > |
| | <CARAT: "^" > |
| | <STAR: "*" > |
| | <QUOTED: "\"" (~["\""])+ "\""> |
| | <NUMBER: (<_NUM_CHAR>)+ "." (<_NUM_CHAR>)+ > |
| | <TERM: <_IDENTIFIER_CHAR> |
| ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "*", "?", "~", "{", "}", "[", "]" ] )* > |
| | <FUZZY: "~" > |
| | <WILDTERM: <_IDENTIFIER_CHAR> |
| ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~", "{", "}", "[", "]" ] )* <_IDENTIFIER_CHAR>> |
| | <RANGEIN: "[" (~["]"])+ "]"> |
| | <RANGEEX: "{" (~["}"])+ "}"> |
| } |
| |
| <DEFAULT> SKIP : { |
| <<_WHITESPACE>> |
| } |
| |
| // * Query ::= ( Clause )* |
| // * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) |
| |
| int Conjunction() : { |
| int ret = CONJ_NONE; |
| } |
| { |
| [ |
| <AND> { ret = CONJ_AND; } |
| | <OR> { ret = CONJ_OR; } |
| ] |
| { return ret; } |
| } |
| |
| int Modifiers() : { |
| int ret = MOD_NONE; |
| } |
| { |
| [ |
| <PLUS> { ret = MOD_REQ; } |
| | <MINUS> { ret = MOD_NOT; } |
| | <NOT> { ret = MOD_NOT; } |
| ] |
| { return ret; } |
| } |
| |
| Query Query(String field) : |
| { |
| Vector clauses = new Vector(); |
| Query q; |
| int conj, mods; |
| } |
| { |
| mods=Modifiers() q=Clause(field) |
| { addClause(clauses, CONJ_NONE, mods, q); } |
| ( |
| conj=Conjunction() mods=Modifiers() q=Clause(field) |
| { addClause(clauses, conj, mods, q); } |
| )* |
| { |
| BooleanQuery query = new BooleanQuery(); |
| for (int i = 0; i < clauses.size(); i++) |
| query.add((BooleanClause)clauses.elementAt(i)); |
| return query; |
| } |
| } |
| |
| Query Clause(String field) : { |
| Query q; |
| Token fieldToken=null; |
| } |
| { |
| [ |
| LOOKAHEAD(2) |
| fieldToken=<TERM> <COLON> { field = fieldToken.image; } |
| ] |
| |
| ( |
| q=Term(field) |
| | <LPAREN> q=Query(field) <RPAREN> |
| ) |
| { |
| return q; |
| } |
| } |
| |
| |
| Query Term(String field) : { |
| Token term, boost=null; |
| boolean prefix = false; |
| boolean wildcard = false; |
| boolean fuzzy = false; |
| boolean rangein = false; |
| Query q; |
| } |
| { |
| ( |
| (term=<TERM>|term=<WILDTERM>{wildcard=true;}|term=<NUMBER>)[<STAR>{prefix=true;}|<FUZZY>{fuzzy=true;}][<CARAT> boost=<NUMBER>] |
| { if (wildcard) |
| q = new WildcardQuery(new Term(field, term.image)); |
| else if (prefix) |
| q = new PrefixQuery(new Term(field, term.image)); |
| else if (fuzzy) |
| q = new FuzzyQuery(new Term(field, term.image)); |
| else |
| q = getFieldQuery(field, analyzer, term.image); } |
| | (term=<RANGEIN>{rangein=true;}|term=<RANGEEX>) |
| { |
| q = getRangeQuery(field, analyzer, |
| term.image.substring(1, term.image.length()-1), rangein); |
| } |
| | term=<QUOTED> |
| { q = getFieldQuery(field, analyzer, |
| term.image.substring(1, term.image.length()-1)); } |
| ) |
| { |
| if (boost != null) { |
| float f = (float) 1.0; |
| try { |
| f = Float.valueOf(boost.image).floatValue(); |
| } |
| catch (Exception ignored) { } |
| |
| if (q instanceof TermQuery) |
| ((TermQuery) q).setBoost(f); |
| else if (q instanceof PhraseQuery) |
| ((PhraseQuery) q).setBoost(f); |
| else if (q instanceof MultiTermQuery) |
| ((MultiTermQuery) q).setBoost(f); |
| } |
| return q; |
| } |
| } |
| |
| |