src/Lucene.Net.QueryParser/Classic/QueryParserBase.cs - lucenenet - Git at Google

 using J2N;
 using J2N.Numerics;
 using Lucene.Net.Analysis;
 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.QueryParsers.Flexible.Standard;
 using Lucene.Net.Search;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 #if FEATURE_SERIALIZABLE_EXCEPTIONS
 using System.Runtime.Serialization;
 #endif
 using System.Text;

 namespace Lucene.Net.QueryParsers.Classic
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     // LUCENENET specific: In Java, this was part of the QueryParser class,
     // but it has been de-nested to make usage syntax shorter.

     /// <summary>
     /// The default operator for parsing queries.
     /// Use <see cref="QueryParserBase.DefaultOperator"/> to change it.
     /// </summary>
     public enum Operator
     {
         OR,
         AND
     }

     /// <summary>
     /// This class is overridden by <see cref="QueryParser"/>.
     /// </summary>
     public abstract class QueryParserBase : QueryBuilder, ICommonQueryParserConfiguration
     {
         /// <summary>
         /// Do not catch this exception in your code, it means you are using methods that you should no longer use.
         /// </summary>
         // LUCENENET: It is no longer good practice to use binary serialization.
         // See: https://github.com/dotnet/corefx/issues/23584#issuecomment-325724568
 #if FEATURE_SERIALIZABLE_EXCEPTIONS
         [Serializable]
 #endif
         public class MethodRemovedUseAnother : Exception
         {
             public MethodRemovedUseAnother()
             { }

 #if FEATURE_SERIALIZABLE_EXCEPTIONS
             /// <summary>
             /// Initializes a new instance of this class with serialized data.
             /// </summary>
             /// <param name="info">The <see cref="SerializationInfo"/> that holds the serialized object data about the exception being thrown.</param>
             /// <param name="context">The <see cref="StreamingContext"/> that contains contextual information about the source or destination.</param>
             protected MethodRemovedUseAnother(SerializationInfo info, StreamingContext context)
                 : base(info, context)
             {
             }
 #endif
         }

         protected const int CONJ_NONE = 0;
         protected const int CONJ_AND = 1;
         protected const int CONJ_OR = 2;

         protected const int MOD_NONE = 0;
         protected const int MOD_NOT = 10;
         protected const int MOD_REQ = 11;


         // make it possible to call setDefaultOperator() without accessing
         // the nested class:

         /// <summary>
         /// Alternative form of <see cref="Operator.AND"/>
         /// </summary>
         public const Operator AND_OPERATOR = Operator.AND;
         /// <summary>
         /// Alternative form of <see cref="Operator.OR"/>
         /// </summary>
         public const Operator OR_OPERATOR = Operator.OR;

         ///// <summary>
         ///// The actual operator that parser uses to combine query terms
         ///// </summary>
         //Operator operator_Renamed = OR_OPERATOR;


         //bool lowercaseExpandedTerms = true;
         //MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
         //bool allowLeadingWildcard = false;

         protected string m_field;
         //int phraseSlop = 0;
         //float fuzzyMinSim = FuzzyQuery.DefaultMinSimilarity;
         //int fuzzyPrefixLength = FuzzyQuery.DefaultPrefixLength;
         CultureInfo locale = null; // LUCENENET NOTE: null indicates read CultureInfo.CurrentCulture on the fly
         TimeZoneInfo timeZone = null; // LUCENENET NOTE: null indicates read TimeZoneInfo.Local on the fly

         // TODO: Work out what the default date resolution SHOULD be (was null in Java, which isn't valid for an enum type)

         /// <summary>
         /// the default date resolution
         /// </summary>
         DateTools.Resolution dateResolution = DateTools.Resolution.DAY;
         /// <summary>
         ///  maps field names to date resolutions
         /// </summary>
         IDictionary<string, DateTools.Resolution> fieldToDateResolution = null;

         /// <summary>
         /// Whether or not to analyze range terms when constructing RangeQuerys
         /// (For example, analyzing terms into collation keys for locale-sensitive RangeQuery)
         /// </summary>
         bool analyzeRangeTerms = false;

         /// <summary>
         /// So the generated QueryParser(CharStream) won't error out
         /// </summary>
         protected QueryParserBase()
             : base(null)
         {
             // Set property defaults.
             DefaultOperator = OR_OPERATOR;
             LowercaseExpandedTerms = true;
             MultiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
             AllowLeadingWildcard = false;

             PhraseSlop = 0;
 #pragma warning disable 612, 618
             FuzzyMinSim = FuzzyQuery.DefaultMinSimilarity;
 #pragma warning restore 612, 618
             FuzzyPrefixLength = FuzzyQuery.DefaultPrefixLength;
         }

         /// <summary>
         /// Initializes a query parser.  Called by the QueryParser constructor
         /// </summary>
         /// <param name="matchVersion">Lucene version to match.</param>
         /// <param name="f">the default field for query terms.</param>
         /// <param name="a">used to find terms in the query text.</param>
         public virtual void Init(LuceneVersion matchVersion, string f, Analyzer a)
         {
             Analyzer = a;
             m_field = f;
 #pragma warning disable 612, 618
             if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
 #pragma warning restore 612, 618
             {
                 AutoGeneratePhraseQueries = false;
             }
             else
             {
                 AutoGeneratePhraseQueries = true;
             }
         }

         // the generated parser will create these in QueryParser
         public abstract void ReInit(ICharStream stream);
         public abstract Query TopLevelQuery(string field);

         /// <summary>
         /// Parses a query string, returning a <see cref="Query"/>.
         /// </summary>
         /// <param name="query">the query string to be parsed.</param>
         /// <exception cref="ParseException">if the parsing fails</exception>
         public virtual Query Parse(string query)
         {
             ReInit(new FastCharStream(new StringReader(query)));
             try
             {
                 // TopLevelQuery is a Query followed by the end-of-input (EOF)
                 Query res = TopLevelQuery(m_field);
                 return res != null ? res : NewBooleanQuery(false);
             }
             catch (ParseException tme)
             {
                 // rethrow to include the original query:
                 throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme);
             }
             catch (TokenMgrError tme)
             {
                 throw new ParseException("Cannot parse '" + query + "': " + tme.Message, tme);
             }
             catch (BooleanQuery.TooManyClausesException tmc)
             {
                 throw new ParseException("Cannot parse '" + query + "': too many boolean clauses", tmc);
             }
         }

         /// <summary>
         /// Returns the default field.
         /// </summary>
         public virtual string Field
         {
             get { return m_field; }
         }

         /// <summary>
         /// Set to true if phrase queries will be automatically generated
         /// when the analyzer returns more than one term from whitespace
         /// delimited text.
         /// NOTE: this behavior may not be suitable for all languages.
         /// <para/>
         /// Set to false if phrase queries should only be generated when
         /// surrounded by double quotes.
         /// </summary>
         public bool AutoGeneratePhraseQueries { get; set; }

         /// <summary>
         /// Get or Set the minimum similarity for fuzzy queries.
         /// Default is 2f.
         /// </summary>
         public virtual float FuzzyMinSim { get; set; }

         /// <summary>
         /// Get or Set the prefix length for fuzzy queries.
         /// Default is 0.
         /// </summary>
         public virtual int FuzzyPrefixLength { get; set; }

         /// <summary>
         /// Gets or Sets the default slop for phrases.
         /// If zero, then exact phrase matches are required.
         /// Default value is zero.
         /// </summary>
         public virtual int PhraseSlop { get; set; }

         /// <summary>
         /// Set to <c>true</c> to allow leading wildcard characters.
         /// <para/>
         /// When set, <c>*</c> or <c>?</c> are allowed as
         /// the first character of a PrefixQuery and WildcardQuery.
         /// Note that this can produce very slow
         /// queries on big indexes.
         /// <para/>
         /// Default: false.
         /// </summary>
         public virtual bool AllowLeadingWildcard { get; set; }

         /// <summary>
         /// Gets or Sets the boolean operator of the QueryParser.
         /// In default mode (<see cref="OR_OPERATOR"/>) terms without any modifiers
         /// are considered optional: for example <c>capital of Hungary</c> is equal to
         /// <c>capital OR of OR Hungary</c>.
         /// <para/>
         /// In <see cref="AND_OPERATOR"/> mode terms are considered to be in conjunction: the
         /// above mentioned query is parsed as <c>capital AND of AND Hungary</c>
         /// </summary>
         public virtual Operator DefaultOperator { get; set; }

         /// <summary>
         /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
         /// lower-cased or not.  Default is <c>true</c>.
         /// </summary>
         public virtual bool LowercaseExpandedTerms { get; set; }

         /// <summary>
         /// By default QueryParser uses <see cref="MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT"/>
         /// when creating a <see cref="PrefixQuery"/>, <see cref="WildcardQuery"/> or <see cref="TermRangeQuery"/>. This implementation is generally preferable because it
         /// a) Runs faster b) Does not have the scarcity of terms unduly influence score
         /// c) avoids any <see cref="BooleanQuery.TooManyClausesException"/> exception.
         /// However, if your application really needs to use the
         /// old-fashioned <see cref="BooleanQuery"/> expansion rewriting and the above
         /// points are not relevant then use this to change
         /// the rewrite method.
         /// </summary>
         public virtual MultiTermQuery.RewriteMethod MultiTermRewriteMethod { get; set; }

         /// <summary>
         /// Get or Set locale used by date range parsing, lowercasing, and other
         /// locale-sensitive operations.
         /// <para/>
         /// By default, the culture is <c>null</c>, which indicates to read the culture on the fly
         /// from <see cref="CultureInfo.CurrentCulture"/>. This ensures if you change the culture on
         /// the current thread, QueryParser will utilize it. You can also explicitly set a culture.
         /// Setting the culture to <c>null</c> will restore the default behavior if you have explicitly set a culture.
         /// </summary>
         public virtual CultureInfo Locale // LUCENENET TODO: API - Rename Culture
         {
             get { return this.locale == null ? CultureInfo.CurrentCulture : this.locale; }
             set { this.locale = value; }
         }

         /// <summary>
         /// Get or Set the current time zone for date and time parsing operations.
         /// <para/>
         /// By default, the time zone is <c>null</c>, which indicates to read the time zone on the fly
         /// from <see cref="TimeZoneInfo.Local"/>. This ensures if you change the time zone on
         /// the current system, QueryParser will utilize it. You can also explicitly set a time zone.
         /// Setting the time zone to <c>null</c> will restore the default behavior if you have explicitly set a time zone.
         /// </summary>
         public virtual TimeZoneInfo TimeZone
         {
             get { return this.timeZone == null ? TimeZoneInfo.Local : this.timeZone; }
             set { this.timeZone = value; }
         }

         /// <summary>
         /// Gets or Sets the default date resolution used by RangeQueries for fields for which no
         /// specific date resolutions has been set. Field specific resolutions can be set
         /// with <see cref="SetDateResolution(string,DateTools.Resolution)"/>.
         /// </summary>
         public virtual void SetDateResolution(DateTools.Resolution dateResolution)
         {
             this.dateResolution = dateResolution;
         }

         /// <summary>
         /// Sets the date resolution used by RangeQueries for a specific field.
         /// </summary>
         /// <param name="fieldName">field for which the date resolution is to be set</param>
         /// <param name="dateResolution">date resolution to set</param>
         public virtual void SetDateResolution(string fieldName, DateTools.Resolution dateResolution)
         {
             if (string.IsNullOrEmpty(fieldName))
             {
                 throw new ArgumentNullException("fieldName cannot be null or empty string.");
             }

             if (fieldToDateResolution == null)
             {
                 // lazily initialize Dictionary
                 fieldToDateResolution = new Dictionary<string, DateTools.Resolution>();
             }

             fieldToDateResolution[fieldName] = dateResolution;
         }

         /// <summary>
         /// Returns the date resolution that is used by RangeQueries for the given field.
         /// Returns null, if no default or field specific date resolution has been set
         /// for the given field.
         /// </summary>
         public virtual DateTools.Resolution GetDateResolution(string fieldName)
         {
             if (string.IsNullOrEmpty(fieldName))
             {
                 throw new ArgumentNullException("fieldName cannot be null or empty string.");
             }

             if (fieldToDateResolution == null)
             {
                 // no field specific date resolutions set; return default date resolution instead
                 return this.dateResolution;
             }

             if (!fieldToDateResolution.TryGetValue(fieldName, out DateTools.Resolution resolution))
             {
                 // no date resolutions set for the given field; return default date resolution instead
                 return this.dateResolution;
             }

             return resolution;
         }

         /// <summary>
         /// Get or Set whether or not to analyze range terms when constructing <see cref="TermRangeQuery"/>s.
         /// For example, setting this to true can enable analyzing terms into
         /// collation keys for locale-sensitive <see cref="TermRangeQuery"/>.
         /// </summary>
         public virtual bool AnalyzeRangeTerms
         {
             get { return analyzeRangeTerms; }
             set { analyzeRangeTerms = value; }
         }

         protected internal virtual void AddClause(IList<BooleanClause> clauses, int conj, int mods, Query q)
         {
             bool required, prohibited;

             // If this term is introduced by AND, make the preceding term required,
             // unless it's already prohibited
             if (clauses.Count > 0 && conj == CONJ_AND)
             {
                 BooleanClause c = clauses[clauses.Count - 1];
                 if (!c.IsProhibited)
                     c.Occur = Occur.MUST;
             }

             if (clauses.Count > 0 && DefaultOperator == AND_OPERATOR && conj == CONJ_OR)
             {
                 // If this term is introduced by OR, make the preceding term optional,
                 // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
                 // notice if the input is a OR b, first term is parsed as required; without
                 // this modification a OR b would parsed as +a OR b
                 BooleanClause c = clauses[clauses.Count - 1];
                 if (!c.IsProhibited)
                     c.Occur = Occur.SHOULD;
             }

             // We might have been passed a null query; the term might have been
             // filtered away by the analyzer.
             if (q == null)
                 return;

             if (DefaultOperator == OR_OPERATOR)
             {
                 // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
                 // introduced by NOT or -; make sure not to set both.
                 prohibited = (mods == MOD_NOT);
                 required = (mods == MOD_REQ);
                 if (conj == CONJ_AND && !prohibited)
                 {
                     required = true;
                 }
             }
             else
             {
                 // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
                 // if not PROHIBITED and not introduced by OR
                 prohibited = (mods == MOD_NOT);
                 required = (!prohibited && conj != CONJ_OR);
             }
             if (required && !prohibited)
                 clauses.Add(NewBooleanClause(q, Occur.MUST));
             else if (!required && !prohibited)
                 clauses.Add(NewBooleanClause(q, Occur.SHOULD));
             else if (!required && prohibited)
                 clauses.Add(NewBooleanClause(q, Occur.MUST_NOT));
             else
                 throw new Exception("Clause cannot be both required and prohibited");
         }

         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         protected internal virtual Query GetFieldQuery(string field, string queryText, bool quoted)
         {
             return NewFieldQuery(Analyzer, field, queryText, quoted);
         }

         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         protected internal virtual Query NewFieldQuery(Analyzer analyzer, string field, string queryText, bool quoted)
         {
             Occur occur = DefaultOperator == Operator.AND ? Occur.MUST : Occur.SHOULD;
             return CreateFieldQuery(analyzer, occur, field, queryText, quoted || AutoGeneratePhraseQueries, PhraseSlop);
         }

         /// <summary>
         /// Base implementation delegates to <see cref="GetFieldQuery(string,string,bool)"/>.
         /// This method may be overridden, for example, to return
         /// a <see cref="Search.Spans.SpanNearQuery"/> instead of a <see cref="PhraseQuery"/>.
         /// </summary>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         protected internal virtual Query GetFieldQuery(string field, string queryText, int slop)
         {
             Query query = GetFieldQuery(field, queryText, true);

             if (query is PhraseQuery)
             {
                 ((PhraseQuery)query).Slop = slop;
             }
             if (query is MultiPhraseQuery)
             {
                 ((MultiPhraseQuery)query).Slop = slop;
             }

             return query;
         }

         protected internal virtual Query GetRangeQuery(string field,
                               string part1,
                               string part2,
                               bool startInclusive,
                               bool endInclusive)
         {
             if (LowercaseExpandedTerms)
             {
                 part1 = part1 == null ? null : Locale.TextInfo.ToLower(part1);
                 part2 = part2 == null ? null : Locale.TextInfo.ToLower(part2);
             }

             string shortDateFormat = Locale.DateTimeFormat.ShortDatePattern;
             DateTime d1;
             DateTime d2 = DateTime.MaxValue; // We really don't care what we set this to, but we need something or the compiler will complain below
             DateTools.Resolution resolution = GetDateResolution(field);

             // LUCENENET specific: This doesn't emulate java perfectly.
             // See LUCENENET-423 - DateRange differences with Java and .NET

             // Java allows parsing of the string up to the end of the pattern
             // and then ignores everything else.  .NET will throw an exception,
             // so this will fail in those cases, though the code below is clear
             // that users can only specify the date, not the time. Unfortunately,
             // the date format is much more strict in .NET.

             // To emulate Java more precisely, it is possible to make a custom format
             // by calling Locale.DateTimeFormat.SetAllDateTimePatterns(string[], char)
             // that contains all of the formats that you need to support and setting
             // the Locale.DateTimeFormat.ShortDatePattern to be the same as the second
             // parameter of SetAllDateTimePatterns.

             // LUCENENET TODO: Try to make setting custom formats easier by adding
             // another configuration setting (IList<string> of date formats).
             // Also consider making a IsStrictDateFormat setting which allows toggling
             // to DateTime.TryParse(part1, Locale, DateTimeStyles.None, out d1);
             // rather than TryParseExact

             if (DateTime.TryParseExact(part1, shortDateFormat, Locale, DateTimeStyles.None, out d1))
             {
                 part1 = DateTools.DateToString(d1, resolution);
             }

             if (DateTime.TryParseExact(part2, shortDateFormat, Locale, DateTimeStyles.None, out d2))
             {
                 if (endInclusive)
                 {
                     // The user can only specify the date, not the time, so make sure
                     // the time is set to the latest possible time of that date to really
                     // include all documents:

                     d2 = TimeZoneInfo.ConvertTime(d2, TimeZone);
                     var cal = Locale.Calendar;
                     d2 = cal.AddHours(d2, 23);
                     d2 = cal.AddMinutes(d2, 59);
                     d2 = cal.AddSeconds(d2, 59);
                     d2 = cal.AddMilliseconds(d2, 999);
                 }

                 part2 = DateTools.DateToString(d2, resolution);
             }

             return NewRangeQuery(field, part1, part2, startInclusive, endInclusive);
         }

         /// <summary>Builds a new <see cref="BooleanClause"/> instance</summary>
         /// <param name="q">sub query</param>
         /// <param name="occur">how this clause should occur when matching documents</param>
         /// <returns> new <see cref="BooleanClause"/> instance</returns>
         protected internal virtual BooleanClause NewBooleanClause(Query q, Occur occur)
         {
             return new BooleanClause(q, occur);
         }

         /// <summary>
         /// Builds a new <see cref="PrefixQuery"/> instance
         /// </summary>
         /// <param name="prefix">Prefix term</param>
         /// <returns>new <see cref="PrefixQuery"/> instance</returns>
         protected internal virtual Query NewPrefixQuery(Term prefix)
         {
             PrefixQuery query = new PrefixQuery(prefix);
             query.MultiTermRewriteMethod = MultiTermRewriteMethod;
             return query;
         }

         /// <summary>
         /// Builds a new <see cref="RegexpQuery"/> instance
         /// </summary>
         /// <param name="regexp">Regexp term</param>
         /// <returns>new <see cref="RegexpQuery"/> instance</returns>
         protected internal virtual Query NewRegexpQuery(Term regexp)
         {
             RegexpQuery query = new RegexpQuery(regexp);
             query.MultiTermRewriteMethod = MultiTermRewriteMethod;
             return query;
         }

         /// <summary>
         /// Builds a new <see cref="FuzzyQuery"/> instance
         /// </summary>
         /// <param name="term">Term</param>
         /// <param name="minimumSimilarity">minimum similarity</param>
         /// <param name="prefixLength">prefix length</param>
         /// <returns>new <see cref="FuzzyQuery"/> Instance</returns>
         protected internal virtual Query NewFuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
         {
             // FuzzyQuery doesn't yet allow constant score rewrite
             string text = term.Text();
 #pragma warning disable 612, 618
             int numEdits = FuzzyQuery.SingleToEdits(minimumSimilarity,
                 text.CodePointCount(0, text.Length));
 #pragma warning restore 612, 618
             return new FuzzyQuery(term, numEdits, prefixLength);
         }

         // LUCENETODO: Should this be protected instead?
         private BytesRef AnalyzeMultitermTerm(string field, string part)
         {
             return AnalyzeMultitermTerm(field, part, Analyzer);
         }

         protected internal virtual BytesRef AnalyzeMultitermTerm(string field, string part, Analyzer analyzerIn)
         {
             if (analyzerIn == null) analyzerIn = Analyzer;

             TokenStream source = null;
             try
             {
                 source = analyzerIn.GetTokenStream(field, part);
                 source.Reset();

                 ITermToBytesRefAttribute termAtt = source.GetAttribute<ITermToBytesRefAttribute>();
                 BytesRef bytes = termAtt.BytesRef;

                 if (!source.IncrementToken())
                     throw new ArgumentException("analyzer returned no terms for multiTerm term: " + part);
                 termAtt.FillBytesRef();
                 if (source.IncrementToken())
                     throw new ArgumentException("analyzer returned too many terms for multiTerm term: " + part);
                 source.End();
                 return BytesRef.DeepCopyOf(bytes);
             }
             catch (IOException e)
             {
                 throw new Exception("Error analyzing multiTerm term: " + part, e);
             }
             finally
             {
                 IOUtils.DisposeWhileHandlingException(source);
             }
         }

         /// <summary>
         /// Builds a new <see cref="TermRangeQuery"/> instance
         /// </summary>
         /// <param name="field">Field</param>
         /// <param name="part1">min</param>
         /// <param name="part2">max</param>
         /// <param name="startInclusive">true if the start of the range is inclusive</param>
         /// <param name="endInclusive">true if the end of the range is inclusive</param>
         /// <returns>new <see cref="TermRangeQuery"/> instance</returns>
         protected internal virtual Query NewRangeQuery(string field, string part1, string part2, bool startInclusive, bool endInclusive)
         {
             BytesRef start;
             BytesRef end;

             if (part1 == null)
             {
                 start = null;
             }
             else
             {
                 start = analyzeRangeTerms ? AnalyzeMultitermTerm(field, part1) : new BytesRef(part1);
             }

             if (part2 == null)
             {
                 end = null;
             }
             else
             {
                 end = analyzeRangeTerms ? AnalyzeMultitermTerm(field, part2) : new BytesRef(part2);
             }

             TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);

             query.MultiTermRewriteMethod = MultiTermRewriteMethod;
             return query;
         }

         /// <summary>
         /// Builds a new <see cref="MatchAllDocsQuery"/> instance
         /// </summary>
         /// <returns>new <see cref="MatchAllDocsQuery"/> instance</returns>
         protected internal virtual Query NewMatchAllDocsQuery()
         {
             return new MatchAllDocsQuery();
         }

         /// <summary>
         /// Builds a new <see cref="WildcardQuery"/> instance
         /// </summary>
         /// <param name="t">wildcard term</param>
         /// <returns>new <see cref="WildcardQuery"/> instance</returns>
         protected internal virtual Query NewWildcardQuery(Term t)
         {
             WildcardQuery query = new WildcardQuery(t);
             query.MultiTermRewriteMethod = MultiTermRewriteMethod;
             return query;
         }

         /// <summary>
         /// Factory method for generating query, given a set of clauses.
         /// By default creates a boolean query composed of clauses passed in.
         /// <para/>
         /// Can be overridden by extending classes, to modify query being
         /// returned.
         /// </summary>
         /// <param name="clauses">List that contains <see cref="BooleanClause"/> instances
         /// to join.</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> object.</returns>
         protected internal virtual Query GetBooleanQuery(IList<BooleanClause> clauses)
         {
             return GetBooleanQuery(clauses, false);
         }

         /// <summary>
         /// Factory method for generating query, given a set of clauses.
         /// By default creates a boolean query composed of clauses passed in.
         /// <para/>
         /// Can be overridden by extending classes, to modify query being
         /// returned.
         /// </summary>
         /// <param name="clauses">List that contains <see cref="BooleanClause"/> instances
         /// to join.</param>
         /// <param name="disableCoord">true if coord scoring should be disabled.</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> object.</returns>
         protected internal virtual Query GetBooleanQuery(IList<BooleanClause> clauses, bool disableCoord)
         {
             if (clauses.Count == 0)
             {
                 return null; // all clause words were filtered away by the analyzer.
             }
             BooleanQuery query = NewBooleanQuery(disableCoord);
             foreach (BooleanClause clause in clauses)
             {
                 query.Add(clause);
             }
             return query;
         }

         /// <summary>
         /// Factory method for generating a query. Called when parser
         /// parses an input term token that contains one or more wildcard
         /// characters (? and *), but is not a prefix term token (one
         /// that has just a single * character at the end)
         /// <para/>
         /// Depending on settings, prefix term may be lower-cased
         /// automatically. It will not go through the default Analyzer,
         /// however, since normal Analyzers are unlikely to work properly
         /// with wildcard templates.
         /// <para/>
         /// Can be overridden by extending classes, to provide custom handling for
         /// wildcard queries, which may be necessary due to missing analyzer calls.
         /// </summary>
         /// <param name="field">Name of the field query will use.</param>
         /// <param name="termStr">Term token that contains one or more wild card
         /// characters (? or *), but is not simple prefix term</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> built for the term</returns>
         protected internal virtual Query GetWildcardQuery(string field, string termStr)
         {
             if ("*".Equals(field, StringComparison.Ordinal))
             {
                 if ("*".Equals(termStr, StringComparison.Ordinal)) return NewMatchAllDocsQuery();
             }
             if (!AllowLeadingWildcard && (termStr.StartsWith("*", StringComparison.Ordinal) || termStr.StartsWith("?", StringComparison.Ordinal)))
                 throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery");
             if (LowercaseExpandedTerms)
             {
                 termStr = Locale.TextInfo.ToLower(termStr);
             }
             Term t = new Term(field, termStr);
             return NewWildcardQuery(t);
         }

         /// <summary>
         /// Factory method for generating a query. Called when parser
         /// parses an input term token that contains a regular expression
         /// query.
         /// <para/>
         /// Depending on settings, pattern term may be lower-cased
         /// automatically. It will not go through the default Analyzer,
         /// however, since normal Analyzers are unlikely to work properly
         /// with regular expression templates.
         /// <para/>
         /// Can be overridden by extending classes, to provide custom handling for
         /// regular expression queries, which may be necessary due to missing analyzer
         /// calls.
         /// </summary>
         /// <param name="field">Name of the field query will use.</param>
         /// <param name="termStr">Term token that contains a regular expression</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> built for the term</returns>
         protected internal virtual Query GetRegexpQuery(string field, string termStr)
         {
             if (LowercaseExpandedTerms)
             {
                 termStr = Locale.TextInfo.ToLower(termStr);
             }
             Term t = new Term(field, termStr);
             return NewRegexpQuery(t);
         }

         /// <summary>
         /// Factory method for generating a query (similar to
         /// <see cref="GetWildcardQuery(string, string)"/>). Called when parser parses an input term
         /// token that uses prefix notation; that is, contains a single '*' wildcard
         /// character as its last character. Since this is a special case
         /// of generic wildcard term, and such a query can be optimized easily,
         /// this usually results in a different query object.
         /// <para/>
         /// Depending on settings, a prefix term may be lower-cased
         /// automatically. It will not go through the default Analyzer,
         /// however, since normal Analyzers are unlikely to work properly
         /// with wildcard templates.
         /// <para/>
         /// Can be overridden by extending classes, to provide custom handling for
         /// wild card queries, which may be necessary due to missing analyzer calls.
         /// </summary>
         /// <param name="field">Name of the field query will use.</param>
         /// <param name="termStr">Term token to use for building term for the query</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> built for the term</returns>
         protected internal virtual Query GetPrefixQuery(string field, string termStr)
         {
             if (!AllowLeadingWildcard && termStr.StartsWith("*", StringComparison.Ordinal))
                 throw new ParseException("'*' not allowed as first character in PrefixQuery");
             if (LowercaseExpandedTerms)
             {
                 termStr = Locale.TextInfo.ToLower(termStr);
             }
             Term t = new Term(field, termStr);
             return NewPrefixQuery(t);
         }

         /// <summary>
         /// Factory method for generating a query (similar to
         /// <see cref="GetWildcardQuery(string, string)"/>). Called when parser parses
         /// an input term token that has the fuzzy suffix (~) appended.
         /// </summary>
         /// <param name="field">Name of the field query will use.</param>
         /// <param name="termStr">Term token to use for building term for the query</param>
         /// <param name="minSimilarity">minimum similarity</param>
         /// <exception cref="ParseException">throw in overridden method to disallow</exception>
         /// <returns>Resulting <see cref="Query"/> built for the term</returns>
         protected internal virtual Query GetFuzzyQuery(string field, string termStr, float minSimilarity)
         {
             if (LowercaseExpandedTerms)
             {
                 termStr = Locale.TextInfo.ToLower(termStr);
             }
             Term t = new Term(field, termStr);
             return NewFuzzyQuery(t, minSimilarity, FuzzyPrefixLength);
         }

         // extracted from the .jj grammar
         internal virtual Query HandleBareTokenQuery(string qfield, Token term, Token fuzzySlop, bool prefix, bool wildcard, bool fuzzy, bool regexp)
         {
             Query q;

             string termImage = DiscardEscapeChar(term.Image);
             if (wildcard)
             {
                 q = GetWildcardQuery(qfield, term.Image);
             }
             else if (prefix)
             {
                 q = GetPrefixQuery(qfield, DiscardEscapeChar(term.Image.Substring(0, term.Image.Length - 1)));
             }
             else if (regexp)
             {
                 q = GetRegexpQuery(qfield, term.Image.Substring(1, term.Image.Length - 2));
             }
             else if (fuzzy)
             {
                 q = HandleBareFuzzy(qfield, fuzzySlop, termImage);
             }
             else
             {
                 q = GetFieldQuery(qfield, termImage, false);
             }
             return q;
         }

         internal virtual Query HandleBareFuzzy(string qfield, Token fuzzySlop, string termImage)
         {
             Query q;
             float fms = FuzzyMinSim;
             try
             {
                 // LUCENENET NOTE: Apparently a "feature" of Lucene is to always
                 // use "." as the decimal specifier for fuzzy slop, even if the culture uses
                 // a different one, such as ",".

                 // LUCENENET TODO: It would probably be more intuitive to use
                 // the current Locale to specify the decimal identifier than
                 // to hard code it to be ".", but this would differ from Java Lucene.
                 // Perhaps just make it a non-default option?
                 fms = float.Parse(fuzzySlop.Image.Substring(1), CultureInfo.InvariantCulture);
             }
             catch (Exception /*ignored*/) { }
             if (fms < 0.0f)
             {
                 throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
             }
             else if (fms >= 1.0f && fms != (int)fms)
             {
                 throw new ParseException("Fractional edit distances are not allowed!");
             }
             q = GetFuzzyQuery(qfield, termImage, fms);
             return q;
         }

         // extracted from the .jj grammar
         internal virtual Query HandleQuotedTerm(string qfield, Token term, Token fuzzySlop)
         {
             int s = PhraseSlop;  // default
             if (fuzzySlop != null)
             {
                 try
                 {
                     // LUCENENET NOTE: Apparently a "feature" of Lucene is to always
                     // use "." as the decimal specifier for fuzzy slop, even if the culture uses
                     // a different one, such as ",".

                     // LUCENENET TODO: It would probably be more intuitive to use
                     // the current Locale to specify the decimal identifier than
                     // to hard code it to be ".", but this would differ from Java Lucene.
                     // Perhaps just make it a non-default option?
                     s = (int)float.Parse(fuzzySlop.Image.Substring(1), CultureInfo.InvariantCulture);
                 }
                 catch (Exception /*ignored*/) { }
             }
             return GetFieldQuery(qfield, DiscardEscapeChar(term.Image.Substring(1, term.Image.Length - 2)), s);
         }

         // extracted from the .jj grammar
         internal virtual Query HandleBoost(Query q, Token boost)
         {
             if (boost != null)
             {
                 float f = (float)1.0;
                 try
                 {
                     // LUCENENET NOTE: Apparently a "feature" of Lucene is to always
                     // use "." as the decimal specifier for boost, even if the culture uses
                     // a different one, such as ",".

                     // LUCENENET TODO: It would probably be more intuitive to use
                     // the current Locale to specify the decimal identifier than
                     // to hard code it to be ".", but this would differ from Java Lucene.
                     // Perhaps just make it a non-default option?
                     f = float.Parse(boost.Image, CultureInfo.InvariantCulture);
                 }
                 catch (Exception /*ignored*/)
                 {
                     /* Should this be handled somehow? (defaults to "no boost", if
                      * boost number is invalid)
                      */
                 }

                 // avoid boosting null queries, such as those caused by stop words
                 if (q != null)
                 {
                     q.Boost = f;
                 }
             }
             return q;
         }

         /// <summary>
         /// Returns a string where the escape char has been
         /// removed, or kept only once if there was a double escape.
         /// <para/>
         /// Supports escaped unicode characters, e. g. translates
         /// <c>\\u0041</c> to <c>A</c>.
         /// </summary>
         internal virtual string DiscardEscapeChar(string input)
         {
             // Create char array to hold unescaped char sequence
             char[] output = new char[input.Length];

             // The length of the output can be less than the input
             // due to discarded escape chars. This variable holds
             // the actual length of the output
             int length = 0;

             // We remember whether the last processed character was
             // an escape character
             bool lastCharWasEscapeChar = false;

             // The multiplier the current unicode digit must be multiplied with.
             // E. g. the first digit must be multiplied with 16^3, the second with 16^2...
             int codePointMultiplier = 0;

             // Used to calculate the codepoint of the escaped unicode character
             int codePoint = 0;

             for (int i = 0; i < input.Length; i++)
             {
                 char curChar = input[i];
                 if (codePointMultiplier > 0)
                 {
                     codePoint += HexToInt32(curChar) * codePointMultiplier;
                     codePointMultiplier = codePointMultiplier.TripleShift(4);
                     if (codePointMultiplier == 0)
                     {
                         output[length++] = (char)codePoint;
                         codePoint = 0;
                     }
                 }
                 else if (lastCharWasEscapeChar)
                 {
                     if (curChar == 'u')
                     {
                         // found an escaped unicode character
                         codePointMultiplier = 16 * 16 * 16;
                     }
                     else
                     {
                         // this character was escaped
                         output[length] = curChar;
                         length++;
                     }
                     lastCharWasEscapeChar = false;
                 }
                 else
                 {
                     if (curChar == '\\')
                     {
                         lastCharWasEscapeChar = true;
                     }
                     else
                     {
                         output[length] = curChar;
                         length++;
                     }
                 }
             }

             if (codePointMultiplier > 0)
             {
                 throw new ParseException("Truncated unicode escape sequence.");
             }

             if (lastCharWasEscapeChar)
             {
                 throw new ParseException("Term can not end with escape character.");
             }

             return new string(output, 0, length);
         }

         /// <summary>
         /// Returns the numeric value of the hexadecimal character
         /// <para/>
         /// NOTE: This was hexToInt() in Lucene
         /// </summary>
         private static int HexToInt32(char c)
         {
             if ('0' <= c && c <= '9')
             {
                 return c - '0';
             }
             else if ('a' <= c && c <= 'f')
             {
                 return c - 'a' + 10;
             }
             else if ('A' <= c && c <= 'F')
             {
                 return c - 'A' + 10;
             }
             else
             {
                 throw new ParseException("Non-hex character in Unicode escape sequence: " + c);
             }
         }

         /// <summary>
         /// Returns a string where those characters that QueryParser
         /// expects to be escaped are escaped by a preceding <code>\</code>.
         /// </summary>
         public static string Escape(string s)
         {
             StringBuilder sb = new StringBuilder();
             for (int i = 0; i < s.Length; i++)
             {
                 char c = s[i];
                 // These characters are part of the query syntax and must be escaped
                 if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
                   || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
                   || c == '*' || c == '?' || c == '|' || c == '&' || c == '/')
                 {
                     sb.Append('\\');
                 }
                 sb.Append(c);
             }
             return sb.ToString();
         }
     }
 }