src/java/org/apache/lucene/queryParser/QueryParser.jj - lucene-solr - Git at Google

 /**
  * Copyright 2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 options {
   STATIC=false;
   JAVA_UNICODE_ESCAPE=true;
   USER_CHAR_STREAM=true;
 }

 PARSER_BEGIN(QueryParser)

 package org.apache.lucene.queryParser;

 import java.util.Vector;
 import java.io.*;
 import java.text.*;
 import java.util.*;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.search.*;

 /**
  * This class is generated by JavaCC.  The only method that clients should need
  * to call is <a href="#parse">parse()</a>.
  *
  * The syntax for query strings is as follows:
  * A Query is a series of clauses.
  * A clause may be prefixed by:
  * <ul>
  * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
  * that the clause is required or prohibited respectively; or
  * <li> a term followed by a colon, indicating the field to be searched.
  * This enables one to construct queries which search multiple fields.
  * </ul>
  *
  * A clause may be either:
  * <ul>
  * <li> a term, indicating all the documents that contain this term; or
  * <li> a nested query, enclosed in parentheses.  Note that this may be used
  * with a <code>+</code>/<code>-</code> prefix to require any of a set of
  * terms.
  * </ul>
  *
  * Thus, in BNF, the query grammar is:
  * <pre>
  *   Query  ::= ( Clause )*
  *   Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
  * </pre>
  *
  * <p>
  * Examples of appropriately formatted queries can be found in the <a
  * href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>.
  * </p>
  *
  * @author Brian Goetz
  * @author Peter Halacsy
  * @author Tatu Saloranta
  */

 public class QueryParser {

   private static final int CONJ_NONE   = 0;
   private static final int CONJ_AND    = 1;
   private static final int CONJ_OR     = 2;

   private static final int MOD_NONE    = 0;
   private static final int MOD_NOT     = 10;
   private static final int MOD_REQ     = 11;

   public static final int DEFAULT_OPERATOR_OR  = 0;
   public static final int DEFAULT_OPERATOR_AND = 1;

   /** The actual operator that parser uses to combine query terms */
   private int operator = DEFAULT_OPERATOR_OR;

   /**
    * Whether terms of wildcard and prefix queries are to be automatically
    * lower-cased or not.  Default is <code>true</code>.
    */
   boolean lowercaseWildcardTerms = true;

   Analyzer analyzer;
   String field;
   int phraseSlop = 0;
   float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
   Locale locale = Locale.getDefault();

   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
    *  @param query  the query string to be parsed.
    *  @param field  the default field for query terms.
    *  @param analyzer   used to find terms in the query text.
    *  @throws ParseException if the parsing fails
    */
   static public Query parse(String query, String field, Analyzer analyzer)
        throws ParseException {
     QueryParser parser = new QueryParser(field, analyzer);
     return parser.parse(query);
   }

   /** Constructs a query parser.
    *  @param f  the default field for query terms.
    *  @param a   used to find terms in the query text.
    */
   public QueryParser(String f, Analyzer a) {
     this(new FastCharStream(new StringReader("")));
     analyzer = a;
     field = f;
   }

   /** Parses a query string, returning a
    * <a href="lucene.search.Query.html">Query</a>.
    *  @param query  the query string to be parsed.
    *  @throws ParseException if the parsing fails
    */
   public Query parse(String query) throws ParseException {
     ReInit(new FastCharStream(new StringReader(query)));
     try {
       return Query(field);
     }
     catch (TokenMgrError tme) {
       throw new ParseException(tme.getMessage());
     }
     catch (BooleanQuery.TooManyClauses tmc) {
       throw new ParseException("Too many boolean clauses");
     }
   }

    /**
    * @return Returns the analyzer.
    */
   public Analyzer getAnalyzer() {
     return analyzer;
   }

   /**
    * @return Returns the field.
    */
   public String getField() {
     return field;
   }

    /**
    * Get the default minimal similarity for fuzzy queries.
    */
   public float getFuzzyMinSim() {
       return fuzzyMinSim;
   }
   /**
    *Set the default minimum similarity for fuzzy queries.
    */
   public void setFuzzyMinSim(float fuzzyMinSim) {
       this.fuzzyMinSim = fuzzyMinSim;
   }

   /**
    * Sets the default slop for phrases.  If zero, then exact phrase matches
    * are required.  Default value is zero.
    */
   public void setPhraseSlop(int phraseSlop) {
     this.phraseSlop = phraseSlop;
   }

   /**
    * Gets the default slop for phrases.
    */
   public int getPhraseSlop() {
     return phraseSlop;
   }

   /**
    * Sets the boolean operator of the QueryParser.
    * In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
    * are considered optional: for example <code>capital of Hungary</code> is equal to
    * <code>capital OR of OR Hungary</code>.<br/>
    * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
    */
   public void setOperator(int operator) {
     this.operator = operator;
   }

   /**
    * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
    * or DEFAULT_OPERATOR_OR.
    */
   public int getOperator() {
     return operator;
   }

   public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
     this.lowercaseWildcardTerms = lowercaseWildcardTerms;
   }

   public boolean getLowercaseWildcardTerms() {
     return lowercaseWildcardTerms;
   }

   /**
    * Set locale used by date range parsing.
    */
   public void setLocale(Locale locale) {
     this.locale = locale;
   }

   /**
    * Returns current locale, allowing access by subclasses.
    */
   public Locale getLocale() {
     return locale;
   }

    protected void addClause(Vector clauses, int conj, int mods, Query q) {
     boolean required, prohibited;

     // If this term is introduced by AND, make the preceding term required,
     // unless it's already prohibited
     if (clauses.size() > 0 && conj == CONJ_AND) {
       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
       if (!c.prohibited)
         c.required = true;
     }

     if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
       // If this term is introduced by OR, make the preceding term optional,
       // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
       // notice if the input is a OR b, first term is parsed as required; without
       // this modification a OR b would parsed as +a OR b
       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
       if (!c.prohibited)
         c.required = false;
     }

     // We might have been passed a null query; the term might have been
     // filtered away by the analyzer.
     if (q == null)
       return;

     if (operator == DEFAULT_OPERATOR_OR) {
       // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
       // introduced by NOT or -; make sure not to set both.
       prohibited = (mods == MOD_NOT);
       required = (mods == MOD_REQ);
       if (conj == CONJ_AND && !prohibited) {
         required = true;
       }
     } else {
       // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
       // if not PROHIBITED and not introduced by OR
       prohibited = (mods == MOD_NOT);
       required   = (!prohibited && conj != CONJ_OR);
     }
     clauses.addElement(new BooleanClause(q, required, prohibited));
   }

   /**
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getFieldQuery(String field, String queryText)  throws ParseException {
     // Use the analyzer to get all the tokens, and then build a TermQuery,
     // PhraseQuery, or nothing based on the term count

     TokenStream source = analyzer.tokenStream(field,
                                               new StringReader(queryText));
     Vector v = new Vector();
     org.apache.lucene.analysis.Token t;

     while (true) {
       try {
         t = source.next();
       }
       catch (IOException e) {
         t = null;
       }
       if (t == null)
         break;
       v.addElement(t.termText());
     }
     try {
       source.close();
     }
     catch (IOException e) {
       // ignore
     }

     if (v.size() == 0)
       return null;
     else if (v.size() == 1)
       return new TermQuery(new Term(field, (String) v.elementAt(0)));
     else {
       PhraseQuery q = new PhraseQuery();
       q.setSlop(phraseSlop);
       for (int i=0; i<v.size(); i++) {
         q.add(new Term(field, (String) v.elementAt(i)));
       }
       return q;
     }
   }

   /**
    * Base implementation delegates to {@link #getFieldQuery(String,String)}.
    * This method may be overridden, for example, to return
    * a SpanNearQuery instead of a PhraseQuery.
    *
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getFieldQuery(String field, String queryText, int slop)
   	throws ParseException {
     Query query = getFieldQuery(field, queryText);

     if (query instanceof PhraseQuery) {
       ((PhraseQuery) query).setSlop(slop);
     }

     return query;
   }

   /**
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getRangeQuery(String field,
                                 String part1,
                                 String part2,
                                 boolean inclusive) throws ParseException
   {
     try {
       DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
       df.setLenient(true);
       Date d1 = df.parse(part1);
       Date d2 = df.parse(part2);
       part1 = DateField.dateToString(d1);
       part2 = DateField.dateToString(d2);
     }
     catch (Exception e) { }

     return new RangeQuery(new Term(field, part1),
                           new Term(field, part2),
                           inclusive);
   }

   /**
    * Factory method for generating query, given a set of clauses.
    * By default creates a boolean query composed of clauses passed in.
    *
    * Can be overridden by extending classes, to modify query being
    * returned.
    *
    * @param clauses Vector that contains {@link BooleanClause} instances
    *    to join.
    *
    * @return Resulting {@link Query} object.
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getBooleanQuery(Vector clauses) throws ParseException
   {
     BooleanQuery query = new BooleanQuery();
     for (int i = 0; i < clauses.size(); i++) {
   query.add((BooleanClause)clauses.elementAt(i));
     }
     return query;
   }

   /**
    * Factory method for generating a query. Called when parser
    * parses an input term token that contains one or more wildcard
    * characters (? and *), but is not a prefix term token (one
    * that has just a single * character at the end)
    *<p>
    * Depending on settings, prefix term may be lower-cased
    * automatically. It will not go through the default Analyzer,
    * however, since normal Analyzers are unlikely to work properly
    * with wildcard templates.
    *<p>
    * Can be overridden by extending classes, to provide custom handling for
    * wildcard queries, which may be necessary due to missing analyzer calls.
    *
    * @param field Name of the field query will use.
    * @param termStr Term token that contains one or more wild card
    *   characters (? or *), but is not simple prefix term
    *
    * @return Resulting {@link Query} built for the term
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getWildcardQuery(String field, String termStr) throws ParseException
   {
     if (lowercaseWildcardTerms) {
   termStr = termStr.toLowerCase();
     }
     Term t = new Term(field, termStr);
     return new WildcardQuery(t);
   }

   /**
    * Factory method for generating a query (similar to
    * ({@link #getWildcardQuery}). Called when parser parses an input term
    * token that uses prefix notation; that is, contains a single '*' wildcard
    * character as its last character. Since this is a special case
    * of generic wildcard term, and such a query can be optimized easily,
    * this usually results in a different query object.
    *<p>
    * Depending on settings, a prefix term may be lower-cased
    * automatically. It will not go through the default Analyzer,
    * however, since normal Analyzers are unlikely to work properly
    * with wildcard templates.
    *<p>
    * Can be overridden by extending classes, to provide custom handling for
    * wild card queries, which may be necessary due to missing analyzer calls.
    *
    * @param field Name of the field query will use.
    * @param termStr Term token to use for building term for the query
    *    (<b>without</b> trailing '*' character!)
    *
    * @return Resulting {@link Query} built for the term
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getPrefixQuery(String field, String termStr) throws ParseException
   {
     if (lowercaseWildcardTerms) {
   termStr = termStr.toLowerCase();
     }
     Term t = new Term(field, termStr);
     return new PrefixQuery(t);
   }

   /**
    * Factory method for generating a query (similar to
    * ({@link #getWildcardQuery}). Called when parser parses
    * an input term token that has the fuzzy suffix (~) appended.
    *
    * @param field Name of the field query will use.
    * @param termStr Term token to use for building term for the query
    *
    * @return Resulting {@link Query} built for the term
    * @exception ParseException throw in overridden method to disallow
    */
   protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
   {
     Term t = new Term(field, termStr);
     return new FuzzyQuery(t, minSimilarity);
   }

   /**
    * Returns a String where the escape char has been
    * removed, or kept only once if there was a double escape.
    */
   private String discardEscapeChar(String input) {
     char[] caSource = input.toCharArray();
     char[] caDest = new char[caSource.length];
     int j = 0;
     for (int i = 0; i < caSource.length; i++) {
       if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
         caDest[j++]=caSource[i];
       }
     }
     return new String(caDest, 0, j);
   }

   /**
    * Returns a String where those characters that QueryParser
    * expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
    */
   public static String escape(String s) {
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < s.length(); i++) {
       char c = s.charAt(i);
       // NOTE: keep this in sync with _ESCAPED_CHAR below!
       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
         || c == '*' || c == '?') {
         sb.append('\\');
       }
       sb.append(c);
     }
     return sb.toString();
   }

   public static void main(String[] args) throws Exception {
     QueryParser qp = new QueryParser("field",
                            new org.apache.lucene.analysis.SimpleAnalyzer());
     Query q = qp.parse(args[0]);
     System.out.println(q.toString("field"));
   }
 }

 PARSER_END(QueryParser)

 /* ***************** */
 /* Token Definitions */
 /* ***************** */

 <*> TOKEN : {
   <#_NUM_CHAR:   ["0"-"9"] >
 // NOTE: keep this in sync with escape(String) above!
 | <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
                           "[", "]", "\"", "{", "}", "~", "*", "?" ] >
 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
                            "[", "]", "\"", "{", "}", "~", "*", "?" ]
                        | <_ESCAPED_CHAR> ) >
 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
 }

 <DEFAULT, RangeIn, RangeEx> SKIP : {
   <<_WHITESPACE>>
 }

 // OG: to support prefix queries:
 // http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
 // Change from:
 // | <WILDTERM:  <_TERM_START_CHAR>
 //              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 // To:
 //
 // | <WILDTERM:  (<_TERM_CHAR> | ( [ "*", "?" ] ))* >

 <DEFAULT> TOKEN : {
   <AND:       ("AND" | "&&") >
 | <OR:        ("OR" | "||") >
 | <NOT:       ("NOT" | "!") >
 | <PLUS:      "+" >
 | <MINUS:     "-" >
 | <LPAREN:    "(" >
 | <RPAREN:    ")" >
 | <COLON:     ":" >
 | <CARAT:     "^" > : Boost
 | <QUOTED:     "\"" (~["\""])+ "\"">
 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
 | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
 | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
 | <WILDTERM:  <_TERM_START_CHAR>
               (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 | <RANGEIN_START: "[" > : RangeIn
 | <RANGEEX_START: "{" > : RangeEx
 }

 <Boost> TOKEN : {
 <NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
 }

 <RangeIn> TOKEN : {
 <RANGEIN_TO: "TO">
 | <RANGEIN_END: "]"> : DEFAULT
 | <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
 | <RANGEIN_GOOP: (~[ " ", "]" ])+ >
 }

 <RangeEx> TOKEN : {
 <RANGEEX_TO: "TO">
 | <RANGEEX_END: "}"> : DEFAULT
 | <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
 | <RANGEEX_GOOP: (~[ " ", "}" ])+ >
 }

 // *   Query  ::= ( Clause )*
 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )

 int Conjunction() : {
   int ret = CONJ_NONE;
 }
 {
   [
     <AND> { ret = CONJ_AND; }
     | <OR>  { ret = CONJ_OR; }
   ]
   { return ret; }
 }

 int Modifiers() : {
   int ret = MOD_NONE;
 }
 {
   [
      <PLUS> { ret = MOD_REQ; }
      | <MINUS> { ret = MOD_NOT; }
      | <NOT> { ret = MOD_NOT; }
   ]
   { return ret; }
 }

 Query Query(String field) :
 {
   Vector clauses = new Vector();
   Query q, firstQuery=null;
   int conj, mods;
 }
 {
   mods=Modifiers() q=Clause(field)
   {
     addClause(clauses, CONJ_NONE, mods, q);
     if (mods == MOD_NONE)
         firstQuery=q;
   }
   (
     conj=Conjunction() mods=Modifiers() q=Clause(field)
     { addClause(clauses, conj, mods, q); }
   )*
     {
       if (clauses.size() == 1 && firstQuery != null)
         return firstQuery;
       else {
   return getBooleanQuery(clauses);
       }
     }
 }

 Query Clause(String field) : {
   Query q;
   Token fieldToken=null, boost=null;
 }
 {
   [
     LOOKAHEAD(2)
     fieldToken=<TERM> <COLON> {
       field=discardEscapeChar(fieldToken.image);
     }
   ]

   (
    q=Term(field)
    | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?

   )
     {
       if (boost != null) {
         float f = (float)1.0;
   try {
     f = Float.valueOf(boost.image).floatValue();
           q.setBoost(f);
   } catch (Exception ignored) { }
       }
       return q;
     }
 }


 Query Term(String field) : {
   Token term, boost=null, fuzzySlop=null, goop1, goop2;
   boolean prefix = false;
   boolean wildcard = false;
   boolean fuzzy = false;
   boolean rangein = false;
   Query q;
 }
 {
   (
      (
        term=<TERM>
        | term=<PREFIXTERM> { prefix=true; }
        | term=<WILDTERM> { wildcard=true; }
        | term=<NUMBER>
      )
      [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
      [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
      {
        String termImage=discardEscapeChar(term.image);
        if (wildcard) {
        q = getWildcardQuery(field, termImage);
        } else if (prefix) {
          q = getPrefixQuery(field,
            discardEscapeChar(term.image.substring
           (0, term.image.length()-1)));
        } else if (fuzzy) {
        	  float fms = fuzzyMinSim;
        	  try {
             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
        	  } catch (Exception ignored) { }
        	 if(fms < 0.0f || fms > 1.0f){
        	   throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
        	 }
          q = getFuzzyQuery(field, termImage, fms);
        } else {
          q = getFieldQuery(field, termImage);
        }
      }
      | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
          [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
          <RANGEIN_END> )
        [ <CARAT> boost=<NUMBER> ]
         {
           if (goop1.kind == RANGEIN_QUOTED) {
             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
           } else {
             goop1.image = discardEscapeChar(goop1.image);
           }
           if (goop2.kind == RANGEIN_QUOTED) {
             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
       } else {
         goop2.image = discardEscapeChar(goop2.image);
       }
           q = getRangeQuery(field, goop1.image, goop2.image, true);
         }
      | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
          [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
          <RANGEEX_END> )
        [ <CARAT> boost=<NUMBER> ]
         {
           if (goop1.kind == RANGEEX_QUOTED) {
             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
           } else {
             goop1.image = discardEscapeChar(goop1.image);
           }
           if (goop2.kind == RANGEEX_QUOTED) {
             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
       } else {
         goop2.image = discardEscapeChar(goop2.image);
       }

           q = getRangeQuery(field, goop1.image, goop2.image, false);
         }
      | term=<QUOTED>
        [ fuzzySlop=<FUZZY_SLOP> ]
        [ <CARAT> boost=<NUMBER> ]
        {
          int s = phraseSlop;

          if (fuzzySlop != null) {
            try {
              s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
            }
            catch (Exception ignored) { }
          }
          q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
        }
   )
   {
     if (boost != null) {
       float f = (float) 1.0;
       try {
         f = Float.valueOf(boost.image).floatValue();
       }
       catch (Exception ignored) {
     /* Should this be handled somehow? (defaults to "no boost", if
      * boost number is invalid)
      */
       }

       // avoid boosting null queries, such as those caused by stop words
       if (q != null) {
         q.setBoost(f);
       }
     }
     return q;
   }
 }
	/**
	* Copyright 2004 The Apache Software Foundation
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	options {
	STATIC=false;
	JAVA_UNICODE_ESCAPE=true;
	USER_CHAR_STREAM=true;
	}

	PARSER_BEGIN(QueryParser)

	package org.apache.lucene.queryParser;

	import java.util.Vector;
	import java.io.*;
	import java.text.*;
	import java.util.*;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.analysis.*;
	import org.apache.lucene.document.*;
	import org.apache.lucene.search.*;

	/**
	* This class is generated by JavaCC. The only method that clients should need
	* to call is <a href="#parse">parse()</a>.
	*
	* The syntax for query strings is as follows:
	* A Query is a series of clauses.
	* A clause may be prefixed by:
	* <ul>
	* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
	* that the clause is required or prohibited respectively; or
	* <li> a term followed by a colon, indicating the field to be searched.
	* This enables one to construct queries which search multiple fields.
	* </ul>
	*
	* A clause may be either:
	* <ul>
	* <li> a term, indicating all the documents that contain this term; or
	* <li> a nested query, enclosed in parentheses. Note that this may be used
	* with a <code>+</code>/<code>-</code> prefix to require any of a set of
	* terms.
	* </ul>
	*
	* Thus, in BNF, the query grammar is:
	* <pre>
	* Query ::= ( Clause )*
	* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> \| "(" Query ")" )
	* </pre>
	*
	* <p>
	* Examples of appropriately formatted queries can be found in the <a
	* href="http://jakarta.apache.org/lucene/src/test/org/apache/lucene/queryParser/TestQueryParser.java">test cases</a>.
	* </p>
	*
	* @author Brian Goetz
	* @author Peter Halacsy
	* @author Tatu Saloranta
	*/

	public class QueryParser {

	private static final int CONJ_NONE = 0;
	private static final int CONJ_AND = 1;
	private static final int CONJ_OR = 2;

	private static final int MOD_NONE = 0;
	private static final int MOD_NOT = 10;
	private static final int MOD_REQ = 11;

	public static final int DEFAULT_OPERATOR_OR = 0;
	public static final int DEFAULT_OPERATOR_AND = 1;

	/** The actual operator that parser uses to combine query terms */
	private int operator = DEFAULT_OPERATOR_OR;

	/**
	* Whether terms of wildcard and prefix queries are to be automatically
	* lower-cased or not. Default is <code>true</code>.
	*/
	boolean lowercaseWildcardTerms = true;

	Analyzer analyzer;
	String field;
	int phraseSlop = 0;
	float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
	Locale locale = Locale.getDefault();

	/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
	* @param query the query string to be parsed.
	* @param field the default field for query terms.
	* @param analyzer used to find terms in the query text.
	* @throws ParseException if the parsing fails
	*/
	static public Query parse(String query, String field, Analyzer analyzer)
	throws ParseException {
	QueryParser parser = new QueryParser(field, analyzer);
	return parser.parse(query);
	}

	/** Constructs a query parser.
	* @param f the default field for query terms.
	* @param a used to find terms in the query text.
	*/
	public QueryParser(String f, Analyzer a) {
	this(new FastCharStream(new StringReader("")));
	analyzer = a;
	field = f;
	}

	/** Parses a query string, returning a
	* <a href="lucene.search.Query.html">Query</a>.
	* @param query the query string to be parsed.
	* @throws ParseException if the parsing fails
	*/
	public Query parse(String query) throws ParseException {
	ReInit(new FastCharStream(new StringReader(query)));
	try {
	return Query(field);
	}
	catch (TokenMgrError tme) {
	throw new ParseException(tme.getMessage());
	}
	catch (BooleanQuery.TooManyClauses tmc) {
	throw new ParseException("Too many boolean clauses");
	}
	}

	/**
	* @return Returns the analyzer.
	*/
	public Analyzer getAnalyzer() {
	return analyzer;
	}

	/**
	* @return Returns the field.
	*/
	public String getField() {
	return field;
	}

	/**
	* Get the default minimal similarity for fuzzy queries.
	*/
	public float getFuzzyMinSim() {
	return fuzzyMinSim;
	}
	/**
	*Set the default minimum similarity for fuzzy queries.
	*/
	public void setFuzzyMinSim(float fuzzyMinSim) {
	this.fuzzyMinSim = fuzzyMinSim;
	}

	/**
	* Sets the default slop for phrases. If zero, then exact phrase matches
	* are required. Default value is zero.
	*/
	public void setPhraseSlop(int phraseSlop) {
	this.phraseSlop = phraseSlop;
	}

	/**
	* Gets the default slop for phrases.
	*/
	public int getPhraseSlop() {
	return phraseSlop;
	}

	/**
	* Sets the boolean operator of the QueryParser.
	* In classic mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
	* are considered optional: for example <code>capital of Hungary</code> is equal to
	* <code>capital OR of OR Hungary</code>.<br/>
	* In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
	* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
	*/
	public void setOperator(int operator) {
	this.operator = operator;
	}

	/**
	* Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
	* or DEFAULT_OPERATOR_OR.
	*/
	public int getOperator() {
	return operator;
	}

	public void setLowercaseWildcardTerms(boolean lowercaseWildcardTerms) {
	this.lowercaseWildcardTerms = lowercaseWildcardTerms;
	}

	public boolean getLowercaseWildcardTerms() {
	return lowercaseWildcardTerms;
	}

	/**
	* Set locale used by date range parsing.
	*/
	public void setLocale(Locale locale) {
	this.locale = locale;
	}

	/**
	* Returns current locale, allowing access by subclasses.
	*/
	public Locale getLocale() {
	return locale;
	}

	protected void addClause(Vector clauses, int conj, int mods, Query q) {
	boolean required, prohibited;

	// If this term is introduced by AND, make the preceding term required,
	// unless it's already prohibited
	if (clauses.size() > 0 && conj == CONJ_AND) {
	BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
	if (!c.prohibited)
	c.required = true;
	}

	if (clauses.size() > 0 && operator == DEFAULT_OPERATOR_AND && conj == CONJ_OR) {
	// If this term is introduced by OR, make the preceding term optional,
	// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
	// notice if the input is a OR b, first term is parsed as required; without
	// this modification a OR b would parsed as +a OR b
	BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
	if (!c.prohibited)
	c.required = false;
	}

	// We might have been passed a null query; the term might have been
	// filtered away by the analyzer.
	if (q == null)
	return;

	if (operator == DEFAULT_OPERATOR_OR) {
	// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
	// introduced by NOT or -; make sure not to set both.
	prohibited = (mods == MOD_NOT);
	required = (mods == MOD_REQ);
	if (conj == CONJ_AND && !prohibited) {
	required = true;
	}
	} else {
	// We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
	// if not PROHIBITED and not introduced by OR
	prohibited = (mods == MOD_NOT);
	required = (!prohibited && conj != CONJ_OR);
	}
	clauses.addElement(new BooleanClause(q, required, prohibited));
	}

	/**
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getFieldQuery(String field, String queryText) throws ParseException {
	// Use the analyzer to get all the tokens, and then build a TermQuery,
	// PhraseQuery, or nothing based on the term count

	TokenStream source = analyzer.tokenStream(field,
	new StringReader(queryText));
	Vector v = new Vector();
	org.apache.lucene.analysis.Token t;

	while (true) {
	try {
	t = source.next();
	}
	catch (IOException e) {
	t = null;
	}
	if (t == null)
	break;
	v.addElement(t.termText());
	}
	try {
	source.close();
	}
	catch (IOException e) {
	// ignore
	}

	if (v.size() == 0)
	return null;
	else if (v.size() == 1)
	return new TermQuery(new Term(field, (String) v.elementAt(0)));
	else {
	PhraseQuery q = new PhraseQuery();
	q.setSlop(phraseSlop);
	for (int i=0; i<v.size(); i++) {
	q.add(new Term(field, (String) v.elementAt(i)));
	}
	return q;
	}
	}

	/**
	* Base implementation delegates to {@link #getFieldQuery(String,String)}.
	* This method may be overridden, for example, to return
	* a SpanNearQuery instead of a PhraseQuery.
	*
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getFieldQuery(String field, String queryText, int slop)
	throws ParseException {
	Query query = getFieldQuery(field, queryText);

	if (query instanceof PhraseQuery) {
	((PhraseQuery) query).setSlop(slop);
	}

	return query;
	}

	/**
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getRangeQuery(String field,
	String part1,
	String part2,
	boolean inclusive) throws ParseException
	{
	try {
	DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
	df.setLenient(true);
	Date d1 = df.parse(part1);
	Date d2 = df.parse(part2);
	part1 = DateField.dateToString(d1);
	part2 = DateField.dateToString(d2);
	}
	catch (Exception e) { }

	return new RangeQuery(new Term(field, part1),
	new Term(field, part2),
	inclusive);
	}

	/**
	* Factory method for generating query, given a set of clauses.
	* By default creates a boolean query composed of clauses passed in.
	*
	* Can be overridden by extending classes, to modify query being
	* returned.
	*
	* @param clauses Vector that contains {@link BooleanClause} instances
	* to join.
	*
	* @return Resulting {@link Query} object.
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getBooleanQuery(Vector clauses) throws ParseException
	{
	BooleanQuery query = new BooleanQuery();
	for (int i = 0; i < clauses.size(); i++) {
	query.add((BooleanClause)clauses.elementAt(i));
	}
	return query;
	}

	/**
	* Factory method for generating a query. Called when parser
	* parses an input term token that contains one or more wildcard
	* characters (? and *), but is not a prefix term token (one
	* that has just a single * character at the end)
	*<p>
	* Depending on settings, prefix term may be lower-cased
	* automatically. It will not go through the default Analyzer,
	* however, since normal Analyzers are unlikely to work properly
	* with wildcard templates.
	*<p>
	* Can be overridden by extending classes, to provide custom handling for
	* wildcard queries, which may be necessary due to missing analyzer calls.
	*
	* @param field Name of the field query will use.
	* @param termStr Term token that contains one or more wild card
	* characters (? or *), but is not simple prefix term
	*
	* @return Resulting {@link Query} built for the term
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getWildcardQuery(String field, String termStr) throws ParseException
	{
	if (lowercaseWildcardTerms) {
	termStr = termStr.toLowerCase();
	}
	Term t = new Term(field, termStr);
	return new WildcardQuery(t);
	}

	/**
	* Factory method for generating a query (similar to
	* ({@link #getWildcardQuery}). Called when parser parses an input term
	* token that uses prefix notation; that is, contains a single '*' wildcard
	* character as its last character. Since this is a special case
	* of generic wildcard term, and such a query can be optimized easily,
	* this usually results in a different query object.
	*<p>
	* Depending on settings, a prefix term may be lower-cased
	* automatically. It will not go through the default Analyzer,
	* however, since normal Analyzers are unlikely to work properly
	* with wildcard templates.
	*<p>
	* Can be overridden by extending classes, to provide custom handling for
	* wild card queries, which may be necessary due to missing analyzer calls.
	*
	* @param field Name of the field query will use.
	* @param termStr Term token to use for building term for the query
	* (<b>without</b> trailing '*' character!)
	*
	* @return Resulting {@link Query} built for the term
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getPrefixQuery(String field, String termStr) throws ParseException
	{
	if (lowercaseWildcardTerms) {
	termStr = termStr.toLowerCase();
	}
	Term t = new Term(field, termStr);
	return new PrefixQuery(t);
	}

	/**
	* Factory method for generating a query (similar to
	* ({@link #getWildcardQuery}). Called when parser parses
	* an input term token that has the fuzzy suffix (~) appended.
	*
	* @param field Name of the field query will use.
	* @param termStr Term token to use for building term for the query
	*
	* @return Resulting {@link Query} built for the term
	* @exception ParseException throw in overridden method to disallow
	*/
	protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
	{
	Term t = new Term(field, termStr);
	return new FuzzyQuery(t, minSimilarity);
	}

	/**
	* Returns a String where the escape char has been
	* removed, or kept only once if there was a double escape.
	*/
	private String discardEscapeChar(String input) {
	char[] caSource = input.toCharArray();
	char[] caDest = new char[caSource.length];
	int j = 0;
	for (int i = 0; i < caSource.length; i++) {
	if ((caSource[i] != '\\') \|\| (i > 0 && caSource[i-1] == '\\')) {
	caDest[j++]=caSource[i];
	}
	}
	return new String(caDest, 0, j);
	}

	/**
	* Returns a String where those characters that QueryParser
	* expects to be escaped are escaped, i.e. preceded by a <code>\</code>.
	*/
	public static String escape(String s) {
	StringBuffer sb = new StringBuffer();
	for (int i = 0; i < s.length(); i++) {
	char c = s.charAt(i);
	// NOTE: keep this in sync with _ESCAPED_CHAR below!
	if (c == '\\' \|\| c == '+' \|\| c == '-' \|\| c == '!' \|\| c == '(' \|\| c == ')' \|\| c == ':'
	\|\| c == '^' \|\| c == '[' \|\| c == ']' \|\| c == '\"' \|\| c == '{' \|\| c == '}' \|\| c == '~'
	\|\| c == '*' \|\| c == '?') {
	sb.append('\\');
	}
	sb.append(c);
	}
	return sb.toString();
	}

	public static void main(String[] args) throws Exception {
	QueryParser qp = new QueryParser("field",
	new org.apache.lucene.analysis.SimpleAnalyzer());
	Query q = qp.parse(args[0]);
	System.out.println(q.toString("field"));
	}
	}

	PARSER_END(QueryParser)

	/* ***************** */
	/* Token Definitions */
	/* ***************** */

	<*> TOKEN : {
	<#_NUM_CHAR: ["0"-"9"] >
	// NOTE: keep this in sync with escape(String) above!
	\| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
	"[", "]", "\"", "{", "}", "~", "*", "?" ] >
	\| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
	"[", "]", "\"", "{", "}", "~", "*", "?" ]
	\| <_ESCAPED_CHAR> ) >
	\| <#_TERM_CHAR: ( <_TERM_START_CHAR> \| <_ESCAPED_CHAR> \| "-" \| "+" ) >
	\| <#_WHITESPACE: ( " " \| "\t" \| "\n" \| "\r") >
	}

	<DEFAULT, RangeIn, RangeEx> SKIP : {
	<<_WHITESPACE>>
	}

	// OG: to support prefix queries:
	// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
	// Change from:
	// \| <WILDTERM: <_TERM_START_CHAR>
	// (<_TERM_CHAR> \| ( [ "", "?" ] )) >
	// To:
	//
	// \| <WILDTERM: (<_TERM_CHAR> \| ( [ "", "?" ] )) >

	<DEFAULT> TOKEN : {
	<AND: ("AND" \| "&&") >
	\| <OR: ("OR" \| "\|\|") >
	\| <NOT: ("NOT" \| "!") >
	\| <PLUS: "+" >
	\| <MINUS: "-" >
	\| <LPAREN: "(" >
	\| <RPAREN: ")" >
	\| <COLON: ":" >
	\| <CARAT: "^" > : Boost
	\| <QUOTED: "\"" (~["\""])+ "\"">
	\| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
	\| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
	\| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
	\| <WILDTERM: <_TERM_START_CHAR>
	(<_TERM_CHAR> \| ( [ "", "?" ] )) >
	\| <RANGEIN_START: "[" > : RangeIn
	\| <RANGEEX_START: "{" > : RangeEx
	}

	<Boost> TOKEN : {
	<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
	}

	<RangeIn> TOKEN : {
	<RANGEIN_TO: "TO">
	\| <RANGEIN_END: "]"> : DEFAULT
	\| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
	\| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
	}

	<RangeEx> TOKEN : {
	<RANGEEX_TO: "TO">
	\| <RANGEEX_END: "}"> : DEFAULT
	\| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
	\| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
	}

	// * Query ::= ( Clause )*
	// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> \| "(" Query ")" )

	int Conjunction() : {
	int ret = CONJ_NONE;
	}
	{
	[
	<AND> { ret = CONJ_AND; }
	\| <OR> { ret = CONJ_OR; }
	]
	{ return ret; }
	}

	int Modifiers() : {
	int ret = MOD_NONE;
	}
	{
	[
	<PLUS> { ret = MOD_REQ; }
	\| <MINUS> { ret = MOD_NOT; }
	\| <NOT> { ret = MOD_NOT; }
	]
	{ return ret; }
	}

	Query Query(String field) :
	{
	Vector clauses = new Vector();
	Query q, firstQuery=null;
	int conj, mods;
	}
	{
	mods=Modifiers() q=Clause(field)
	{
	addClause(clauses, CONJ_NONE, mods, q);
	if (mods == MOD_NONE)
	firstQuery=q;
	}
	(
	conj=Conjunction() mods=Modifiers() q=Clause(field)
	{ addClause(clauses, conj, mods, q); }
	)*
	{
	if (clauses.size() == 1 && firstQuery != null)
	return firstQuery;
	else {
	return getBooleanQuery(clauses);
	}
	}
	}

	Query Clause(String field) : {
	Query q;
	Token fieldToken=null, boost=null;
	}
	{
	[
	LOOKAHEAD(2)
	fieldToken=<TERM> <COLON> {
	field=discardEscapeChar(fieldToken.image);
	}
	]

	(
	q=Term(field)
	\| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?

	)
	{
	if (boost != null) {
	float f = (float)1.0;
	try {
	f = Float.valueOf(boost.image).floatValue();
	q.setBoost(f);
	} catch (Exception ignored) { }
	}
	return q;
	}
	}


	Query Term(String field) : {
	Token term, boost=null, fuzzySlop=null, goop1, goop2;
	boolean prefix = false;
	boolean wildcard = false;
	boolean fuzzy = false;
	boolean rangein = false;
	Query q;
	}
	{
	(
	(
	term=<TERM>
	\| term=<PREFIXTERM> { prefix=true; }
	\| term=<WILDTERM> { wildcard=true; }
	\| term=<NUMBER>
	)
	[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
	[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
	{
	String termImage=discardEscapeChar(term.image);
	if (wildcard) {
	q = getWildcardQuery(field, termImage);
	} else if (prefix) {
	q = getPrefixQuery(field,
	discardEscapeChar(term.image.substring
	(0, term.image.length()-1)));
	} else if (fuzzy) {
	float fms = fuzzyMinSim;
	try {
	fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
	} catch (Exception ignored) { }
	if(fms < 0.0f \|\| fms > 1.0f){
	throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
	}
	q = getFuzzyQuery(field, termImage, fms);
	} else {
	q = getFieldQuery(field, termImage);
	}
	}
	\| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>\|goop1=<RANGEIN_QUOTED> )
	[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>\|goop2=<RANGEIN_QUOTED> )
	<RANGEIN_END> )
	[ <CARAT> boost=<NUMBER> ]
	{
	if (goop1.kind == RANGEIN_QUOTED) {
	goop1.image = goop1.image.substring(1, goop1.image.length()-1);
	} else {
	goop1.image = discardEscapeChar(goop1.image);
	}
	if (goop2.kind == RANGEIN_QUOTED) {
	goop2.image = goop2.image.substring(1, goop2.image.length()-1);
	} else {
	goop2.image = discardEscapeChar(goop2.image);
	}
	q = getRangeQuery(field, goop1.image, goop2.image, true);
	}
	\| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>\|goop1=<RANGEEX_QUOTED> )
	[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>\|goop2=<RANGEEX_QUOTED> )
	<RANGEEX_END> )
	[ <CARAT> boost=<NUMBER> ]
	{
	if (goop1.kind == RANGEEX_QUOTED) {
	goop1.image = goop1.image.substring(1, goop1.image.length()-1);
	} else {
	goop1.image = discardEscapeChar(goop1.image);
	}
	if (goop2.kind == RANGEEX_QUOTED) {
	goop2.image = goop2.image.substring(1, goop2.image.length()-1);
	} else {
	goop2.image = discardEscapeChar(goop2.image);
	}

	q = getRangeQuery(field, goop1.image, goop2.image, false);
	}
	\| term=<QUOTED>
	[ fuzzySlop=<FUZZY_SLOP> ]
	[ <CARAT> boost=<NUMBER> ]
	{
	int s = phraseSlop;

	if (fuzzySlop != null) {
	try {
	s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
	}
	catch (Exception ignored) { }
	}
	q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
	}
	)
	{
	if (boost != null) {
	float f = (float) 1.0;
	try {
	f = Float.valueOf(boost.image).floatValue();
	}
	catch (Exception ignored) {
	/* Should this be handled somehow? (defaults to "no boost", if
	* boost number is invalid)
	*/
	}

	// avoid boosting null queries, such as those caused by stop words
	if (q != null) {
	q.setBoost(f);
	}
	}
	return q;
	}
	}