src/Lucene.Net.Analysis.Phonetic/Language/Bm/Rule.cs - lucenenet - Git at Google

 // commons-codec version compatibility level: 1.9
 using J2N;
 using J2N.Collections.Generic.Extensions;
 using J2N.Text;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Text;
 using System.Text.RegularExpressions;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Analysis.Phonetic.Language.Bm
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// A phoneme rule.
     /// </summary>
     /// <remarks>
     /// Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
     /// and a logical flag indicating if all languages must be in play. A rule matches if:
     /// <list type="bullet">
     ///     <item><description>the pattern matches at the current position</description></item>
     ///     <item><description>the string up until the beginning of the pattern matches the left context</description></item>
     ///     <item><description>the string from the end of the pattern matches the right context</description></item>
     ///     <item><description>logical is ALL and all languages are in scope; or</description></item>
     ///     <item><description>logical is any other value and at least one language is in scope</description></item>
     /// </list>
     /// <para/>
     /// Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
     /// to explicitly construct their own.
     /// <para/>
     /// Rules are immutable and thread-safe.
     /// <para/>
     /// <b>Rules resources</b>
     /// <para/>
     /// Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
     /// named following the pattern:
     /// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_<see cref="RuleType"/>_[language].txt</c>
     /// <para/>
     /// The format of these resources is the following:
     /// <list type="table">
     ///     <item>
     ///         <term>Rules:</term>
     ///         <description>
     ///             whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
     ///             will be interpreted as:
     ///             <list type="number">
     ///                 <item><description>pattern</description></item>
     ///                 <item><description>left context</description></item>
     ///                 <item><description>right context</description></item>
     ///                 <item><description>phoneme</description></item>
     ///             </list>
     ///         </description>
     ///     </item>
     ///     <item>
     ///         <term>End-of-line comments:</term>
     ///         <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
     ///     </item>
     ///     <item>
     ///         <term>Multi-line comments:</term>
     ///         <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
     ///     </item>
     ///     <item>
     ///         <term>Blank lines:</term>
     ///         <description>All blank lines will be skipped.</description>
     ///     </item>
     /// </list>
     /// <para/>
     /// since 1.6
     /// </remarks>
     public class Rule
     {
         private static readonly Regex PIPE = new Regex("[|]", RegexOptions.Compiled);
         private static readonly Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
         private static readonly Regex PLUS = new Regex("[+]", RegexOptions.Compiled);

         private class AllStringsRMatcher : IRPattern
         {
             public bool IsMatch(StringBuilder input)
             {
                 return true;
             }

             public bool IsMatch(string input)
             {
                 return true;
             }

             public bool IsMatch(ICharSequence input)
             {
                 return true;
             }
         }

         public static readonly IRPattern ALL_STRINGS_RMATCHER = new AllStringsRMatcher();


         public const string ALL = "ALL";

         private const string DOUBLE_QUOTE = "\"";

         private const string HASH_INCLUDE = "#include";

         private static readonly IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> RULES = LoadRules();

         private static IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> LoadRules() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
         {
             var rules = new Dictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>>();
             foreach (NameType s in Enum.GetValues(typeof(NameType)))
             {
                 IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> rts =
                         new Dictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>();

                 foreach (RuleType rt in Enum.GetValues(typeof(RuleType)))
                 {
                     IDictionary<string, IDictionary<string, IList<Rule>>> rs = new Dictionary<string, IDictionary<string, IList<Rule>>>();

                     Languages ls = Languages.GetInstance(s);
                     foreach (string l in ls.GetLanguages())
                     {
                         try
                         {
                             rs[l] = ParseRules(CreateScanner(s, rt, l), CreateResourceName(s, rt, l));
                         }
                         catch (InvalidOperationException e)
                         {
                             throw new InvalidOperationException("Problem processing " + CreateResourceName(s, rt, l), e);
                         }
                     }
                     if (!rt.Equals(RuleType.RULES))
                     {
                         rs["common"] = ParseRules(CreateScanner(s, rt, "common"), CreateResourceName(s, rt, "common"));
                     }

                     rts[rt] = rs.AsReadOnly();
                 }

                 rules[s] = rts.AsReadOnly();
             }
             return rules;
         }

 #pragma warning disable IDE0051 // Remove unused private members
         private static bool Contains(ICharSequence chars, char input)
 #pragma warning restore IDE0051 // Remove unused private members
         {
             for (int i = 0; i < chars.Length; i++)
             {
                 if (chars[i] == input)
                 {
                     return true;
                 }
             }
             return false;
         }
         private static bool Contains(string chars, char input)
         {
             for (int i = 0; i < chars.Length; i++)
             {
                 if (chars[i] == input)
                 {
                     return true;
                 }
             }
             return false;
         }
 #pragma warning disable IDE0051 // Remove unused private members
         private static bool Contains(StringBuilder chars, char input)
 #pragma warning restore IDE0051 // Remove unused private members
         {
             for (int i = 0; i < chars.Length; i++)
             {
                 if (chars[i] == input)
                 {
                     return true;
                 }
             }
             return false;
         }

         private static string CreateResourceName(NameType nameType, RuleType rt, string lang)
         {
             return string.Format("{0}_{1}_{2}.txt",
                                  nameType.GetName(), rt.GetName(), lang);
         }

         private static TextReader CreateScanner(NameType nameType, RuleType rt, string lang)
         {
             string resName = CreateResourceName(nameType, rt, lang);
             Stream rulesIS = typeof(Languages).FindAndGetManifestResourceStream(resName);

             if (rulesIS == null)
             {
                 throw new ArgumentException("Unable to load resource: " + resName);
             }

             return new StreamReader(rulesIS, ResourceConstants.ENCODING);
         }

         private static TextReader CreateScanner(string lang)
         {
             string resName = string.Format("{0}.txt", lang);
             Stream rulesIS = typeof(Languages).FindAndGetManifestResourceStream(resName);

             if (rulesIS == null)
             {
                 throw new ArgumentException("Unable to load resource: " + resName);
             }

             return new StreamReader(rulesIS, ResourceConstants.ENCODING);
         }

         private static bool EndsWith(ICharSequence input, string suffix)
         {
             if (suffix.Length > input.Length)
             {
                 return false;
             }
             for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
             {
                 if (input[i] != suffix[j])
                 {
                     return false;
                 }
             }
             return true;
         }

         private static bool EndsWith(string input, string suffix)
         {
             if (suffix.Length > input.Length)
             {
                 return false;
             }
             for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
             {
                 if (input[i] != suffix[j])
                 {
                     return false;
                 }
             }
             return true;
         }

         private static bool EndsWith(StringBuilder input, string suffix)
         {
             if (suffix.Length > input.Length)
             {
                 return false;
             }
             for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
             {
                 if (input[i] != suffix[j])
                 {
                     return false;
                 }
             }
             return true;
         }

         /// <summary>
         /// Gets rules for a combination of name type, rule type and languages.
         /// </summary>
         /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
         /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
         /// <param name="langs">The set of languages to consider.</param>
         /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
         public static IList<Rule> GetInstance(NameType nameType, RuleType rt,
                                      LanguageSet langs)
         {
             IDictionary<string, IList<Rule>> ruleMap = GetInstanceMap(nameType, rt, langs);
             IList<Rule> allRules = new List<Rule>();
             foreach (IList<Rule> rules in ruleMap.Values)
             {
                 allRules.AddRange(rules);
             }
             return allRules;
         }

         /// <summary>
         /// Gets rules for a combination of name type, rule type and a single language.
         /// </summary>
         /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
         /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
         /// <param name="lang">The language to consider.</param>
         /// <returns>A list of <see cref="Rule"/>s that apply.</returns>
         public static IList<Rule> GetInstance(NameType nameType, RuleType rt, string lang)
         {
             return GetInstance(nameType, rt, LanguageSet.From(new JCG.HashSet<string>() { lang }));
         }

         /// <summary>
         /// Gets rules for a combination of name type, rule type and languages.
         /// <para/>
         /// since 1.9
         /// </summary>
         /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
         /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
         /// <param name="langs">The set of languages to consider.</param>
         /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
         public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
                                                              LanguageSet langs)
         {
             return langs.IsSingleton ? GetInstanceMap(nameType, rt, langs.GetAny()) :
                                          GetInstanceMap(nameType, rt, Languages.ANY);
         }

         /// <summary>
         /// Gets rules for a combination of name type, rule type and a single language.
         /// <para/>
         /// since 1.9
         /// </summary>
         /// <param name="nameType">The <see cref="NameType"/> to consider.</param>
         /// <param name="rt">The <see cref="RuleType"/> to consider.</param>
         /// <param name="lang">The language to consider.</param>
         /// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
         public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
                                                              string lang)
         {
             if (RULES.TryGetValue(nameType, out var nameTypes) && nameTypes != null &&
                 nameTypes.TryGetValue(rt, out var ruleTypes) && ruleTypes != null &&
                 ruleTypes.TryGetValue(lang, out var rules) && rules != null)
             {
             }
             else
             {
                 throw new ArgumentException(string.Format("No rules found for {0}, {1}, {2}.",
                                                    nameType.GetName(), rt.GetName(), lang));
             }

             return rules;
         }

         private static Phoneme ParsePhoneme(string ph)
         {
             int open = ph.IndexOf('[');
             if (open >= 0)
             {
                 if (!ph.EndsWith("]", StringComparison.Ordinal))
                 {
                     throw new ArgumentException("Phoneme expression contains a '[' but does not end in ']'");
                 }
                 string before = ph.Substring(0, open - 0);
                 string input = ph.Substring(open + 1, (ph.Length - 1) - (open + 1));
                 ISet<string> langs = new JCG.HashSet<string>(PLUS.Split(input).TrimEnd());

                 return new Phoneme(before, LanguageSet.From(langs));
             }
             else
             {
                 return new Phoneme(ph, Languages.ANY_LANGUAGE);
             }
         }

         private static IPhonemeExpr ParsePhonemeExpr(string ph)
         {
             if (ph.StartsWith("(", StringComparison.Ordinal))
             { // we have a bracketed list of options
                 if (!ph.EndsWith(")", StringComparison.Ordinal))
                 {
                     throw new ArgumentException("Phoneme starts with '(' so must end with ')'");
                 }

                 IList<Phoneme> phs = new List<Phoneme>();
                 string body = ph.Substring(1, (ph.Length - 1) - 1);
                 foreach (string part in PIPE.Split(body).TrimEnd())
                 {
                     phs.Add(ParsePhoneme(part));
                 }
                 if (body.StartsWith("|", StringComparison.Ordinal) || body.EndsWith("|", StringComparison.Ordinal))
                 {
                     phs.Add(new Phoneme("", Languages.ANY_LANGUAGE));
                 }

                 return new PhonemeList(phs);
             }
             else
             {
                 return ParsePhoneme(ph);
             }
         }

         private class RuleAnonymousHelper : Rule
         {
             private readonly int myLine;
             private readonly string loc;

             public RuleAnonymousHelper(string pat, string lCon, string rCon, IPhonemeExpr ph, int cLine, string location)
                 : base(pat, lCon, rCon, ph)
             {
                 this.myLine = cLine;
                 this.loc = location;
             }

             public override string ToString()
             {
                 StringBuilder sb = new StringBuilder();
                 sb.Append("Rule");
                 sb.Append("{line=").Append(myLine);
                 sb.Append(", loc='").Append(loc).Append('\'');
                 sb.Append('}');
                 return sb.ToString();
             }
         }

         private static IDictionary<string, IList<Rule>> ParseRules(TextReader reader, string location)
         {
             IDictionary<string, IList<Rule>> lines = new JCG.Dictionary<string, IList<Rule>>();
             int currentLine = 0;

             bool inMultilineComment = false;
             string rawLine;
             try
             {
                 while ((rawLine = reader.ReadLine()) != null)
                 {
                     currentLine++;
                     string line = rawLine;

                     if (inMultilineComment)
                     {
                         if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
                         {
                             inMultilineComment = false;
                         }
                     }
                     else
                     {
                         if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
                         {
                             inMultilineComment = true;
                         }
                         else
                         {
                             // discard comments
                             int cmtI = line.IndexOf(ResourceConstants.CMT, StringComparison.Ordinal);
                             if (cmtI >= 0)
                             {
                                 line = line.Substring(0, cmtI);
                             }

                             // trim leading-trailing whitespace
                             line = line.Trim();

                             if (line.Length == 0)
                             {
                                 continue; // empty lines can be safely skipped
                             }

                             if (line.StartsWith(HASH_INCLUDE, StringComparison.Ordinal))
                             {
                                 // include statement
                                 string incl = line.Substring(HASH_INCLUDE.Length).Trim();
                                 if (incl.Contains(" "))
                                 {
                                     throw new ArgumentException("Malformed import statement '" + rawLine + "' in " +
                                                                        location);
                                 }
                                 else
                                 {
                                     lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl));
                                 }
                             }
                             else
                             {
                                 // rule
                                 string[] parts = WHITESPACE.Split(line).TrimEnd();
                                 if (parts.Length != 4)
                                 {
                                     throw new ArgumentException("Malformed rule statement split into " + parts.Length +
                                                                        " parts: " + rawLine + " in " + location);
                                 }
                                 else
                                 {
                                     try
                                     {
                                         string pat = StripQuotes(parts[0]);
                                         string lCon = StripQuotes(parts[1]);
                                         string rCon = StripQuotes(parts[2]);
                                         IPhonemeExpr ph = ParsePhonemeExpr(StripQuotes(parts[3]));
                                         int cLine = currentLine;
                                         Rule r = new RuleAnonymousHelper(pat, lCon, rCon, ph, cLine, location);

                                         string patternKey = r.pattern.Substring(0, 1 - 0);
                                         if (!lines.TryGetValue(patternKey, out IList<Rule> rules) || rules == null)
                                         {
                                             rules = new List<Rule>();
                                             lines[patternKey] = rules;
                                         }
                                         rules.Add(r);
                                     }
                                     catch (ArgumentException e)
                                     {
                                         throw new InvalidOperationException("Problem parsing line '" + currentLine + "' in " +
                                                                         location, e);
                                     }
                                 }
                             }
                         }
                     }
                 }
             }
             finally
             {
                 reader.Dispose();
             }

             return lines;
         }

         private class RPatternHelper : IRPattern
         {
             private readonly Func<StringBuilder, bool> isMatchSB;
             private readonly Func<string, bool> isMatchStr;
             private readonly Func<ICharSequence, bool> isMatchCS;

             public RPatternHelper(Func<StringBuilder, bool> isMatchSB, Func<string, bool> isMatchStr, Func<ICharSequence, bool> isMatchCS)
             {
                 this.isMatchSB = isMatchSB;
                 this.isMatchStr = isMatchStr;
                 this.isMatchCS = isMatchCS;
             }

             public bool IsMatch(StringBuilder input)
             {
                 return isMatchSB(input);
             }

             public bool IsMatch(string input)
             {
                 return isMatchStr(input);
             }

             public bool IsMatch(ICharSequence input)
             {
                 return isMatchCS(input);
             }
         }

         /// <summary>
         /// Attempts to compile the regex into direct string ops, falling back to <see cref="Regex"/> and <see cref="Match"/> in the worst case.
         /// </summary>
         /// <param name="regex">The regular expression to compile.</param>
         /// <returns>An RPattern that will match this regex.</returns>
         private static IRPattern GetPattern(string regex)
         {
             bool startsWith = regex.StartsWith("^", StringComparison.Ordinal);
             bool endsWith = regex.EndsWith("$", StringComparison.Ordinal);
             string content = regex.Substring(startsWith ? 1 : 0, (endsWith ? regex.Length - 1 : regex.Length) - (startsWith ? 1 : 0));
             bool boxes = content.Contains("[");

             if (!boxes)
             {
                 if (startsWith && endsWith)
                 {
                     // exact match
                     if (content.Length == 0)
                     {
                         // empty
                         return new RPatternHelper(isMatchSB: (input) =>
                         {
                             return input.Length == 0;
                         }, isMatchStr: (input) =>
                         {
                             return input.Length == 0;
                         }, isMatchCS: (input) =>
                         {
                             return input.Length == 0;
                         });
                     }
                     else
                     {

                         return new RPatternHelper(isMatchSB: (input) =>
                         {
                             return input.Equals(content);
                         }, isMatchStr: (input) =>
                         {
                             return input.Equals(content);
                         }, isMatchCS: (input) =>
                         {
                             return input.Equals(content);
                         });
                     }
                 }
                 else if ((startsWith || endsWith) && content.Length == 0)
                 {
                     // matches every string
                     return ALL_STRINGS_RMATCHER;
                 }
                 else if (startsWith)
                 {
                     // matches from start
                     return new RPatternHelper(isMatchSB: (input) =>
                     {
                         return StartsWith(input, content);
                     }, isMatchStr: (input) =>
                     {
                         return StartsWith(input, content);
                     }, isMatchCS: (input) =>
                     {
                         return StartsWith(input, content);
                     });

                 }
                 else if (endsWith)
                 {
                     // matches from start
                     return new RPatternHelper(isMatchSB: (input) =>
                     {
                         return EndsWith(input, content);
                     }, isMatchStr: (input) =>
                     {
                         return EndsWith(input, content);
                     }, isMatchCS: (input) =>
                     {
                         return EndsWith(input, content);
                     });
                 }
             }
             else
             {
                 bool startsWithBox = content.StartsWith("[", StringComparison.Ordinal);
                 bool endsWithBox = content.EndsWith("]", StringComparison.Ordinal);

                 if (startsWithBox && endsWithBox)
                 {
                     string boxContent = content.Substring(1, (content.Length - 1) - 1);
                     if (!boxContent.Contains("["))
                     {
                         // box containing alternatives
                         bool negate = boxContent.StartsWith("^", StringComparison.Ordinal);
                         if (negate)
                         {
                             boxContent = boxContent.Substring(1);
                         }
                         string bContent = boxContent;
                         bool shouldMatch = !negate;

                         if (startsWith && endsWith)
                         {
                             // exact match
                             return new RPatternHelper(isMatchSB: (input) =>
                             {
                                 return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
                             }, isMatchStr: (input) =>
                             {
                                 return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
                             }, isMatchCS: (input) =>
                             {
                                 return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
                             });
                         }
                         else if (startsWith)
                         {
                             // first char
                             return new RPatternHelper(isMatchSB: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
                             }, isMatchStr: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
                             }, isMatchCS: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
                             });
                         }
                         else if (endsWith)
                         {
                             // last char
                             return new RPatternHelper(isMatchSB: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
                             }, isMatchStr: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
                             }, isMatchCS: (input) =>
                             {
                                 return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
                             });
                         }
                     }
                 }
             }
             Regex pattern = new Regex(regex, RegexOptions.Compiled);

             return new RPatternHelper(isMatchSB: (input) =>
             {
                 Match matcher = pattern.Match(input.ToString());
                 return matcher.Success;
             }, isMatchStr: (input) =>
             {
                 Match matcher = pattern.Match(input);
                 return matcher.Success;
             }, isMatchCS: (input) =>
             {
                 Match matcher = pattern.Match(input.ToString());
                 return matcher.Success;
             });
         }

         private static bool StartsWith(ICharSequence input, string prefix)
         {
             if (prefix.Length > input.Length)
             {
                 return false;
             }
             for (int i = 0; i < prefix.Length; i++)
             {
                 if (input[i] != prefix[i])
                 {
                     return false;
                 }
             }
             return true;
         }

         private static bool StartsWith(string input, string prefix)
         {
             if (prefix.Length > input.Length)
             {
                 return false;
             }
             for (int i = 0; i < prefix.Length; i++)
             {
                 if (input[i] != prefix[i])
                 {
                     return false;
                 }
             }
             return true;
         }

         private static bool StartsWith(StringBuilder input, string prefix)
         {
             if (prefix.Length > input.Length)
             {
                 return false;
             }
             for (int i = 0; i < prefix.Length; i++)
             {
                 if (input[i] != prefix[i])
                 {
                     return false;
                 }
             }
             return true;
         }

         private static string StripQuotes(string str)
         {
             if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
             {
                 str = str.Substring(1);
             }

             if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
             {
                 str = str.Substring(0, str.Length - 1);
             }

             return str;
         }

         private readonly IRPattern lContext;

         private readonly string pattern;

         private readonly IPhonemeExpr phoneme;

         private readonly IRPattern rContext;

         /// <summary>
         /// Creates a new rule.
         /// </summary>
         /// <param name="pattern">The pattern.</param>
         /// <param name="lContext">The left context.</param>
         /// <param name="rContext">The right context.</param>
         /// <param name="phoneme">The resulting phoneme.</param>
         public Rule(string pattern, string lContext, string rContext, IPhonemeExpr phoneme)
         {
             this.pattern = pattern;
             this.lContext = GetPattern(lContext + "$");
             this.rContext = GetPattern("^" + rContext);
             this.phoneme = phoneme;
         }

         /// <summary>
         /// Gets the left context pattern. This is a regular expression that must match to the left of the pattern.
         /// </summary>
         public virtual IRPattern LContext => lContext;

         /// <summary>
         /// Gets the pattern. This is a string-literal that must exactly match.
         /// </summary>
         public virtual string Pattern => pattern;

         /// <summary>
         /// Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
         /// </summary>
         public virtual IPhonemeExpr Phoneme => phoneme;

         /// <summary>
         /// Gets the right context pattern. This is a regular expression that must match to the right of the pattern.
         /// </summary>
         public virtual IRPattern RContext => rContext;

         /// <summary>
         /// Decides if the pattern and context match the input starting at a position. It is a match if the
         /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
         /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
         /// </summary>
         /// <param name="input">The input <see cref="ICharSequence"/>.</param>
         /// <param name="i">The int position within the input.</param>
         /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
         public virtual bool PatternAndContextMatches(ICharSequence input, int i)
         {
             if (i < 0)
             {
                 throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
             }

             int patternLength = this.pattern.Length;
             int ipl = i + patternLength;

             if (ipl > input.Length)
             {
                 // not enough room for the pattern to match
                 return false;
             }

             // evaluate the pattern, left context and right context
             // fail early if any of the evaluations is not successful
             if (!input.Subsequence(i, ipl - i).Equals(this.pattern)) // LUCENENET: Corrected 2nd Subseqence parameter
             {
                 return false;
             }
             else if (!this.rContext.IsMatch(input.Subsequence(ipl, input.Length - ipl))) // LUCENENET: Corrected 2nd Subseqence parameter
             {
                 return false;
             }
             return this.lContext.IsMatch(input.Subsequence(0, i - 0)); // LUCENENET: Corrected 2nd Subseqence parameter
         }

         /// <summary>
         /// Decides if the pattern and context match the input starting at a position. It is a match if the
         /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
         /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
         /// </summary>
         /// <param name="input">The input <see cref="string"/>.</param>
         /// <param name="i">The int position within the input.</param>
         /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
         // LUCENENET specific
         public virtual bool PatternAndContextMatches(string input, int i)
         {
             if (i < 0)
             {
                 throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
             }

             int patternLength = this.pattern.Length;
             int ipl = i + patternLength;

             if (ipl > input.Length)
             {
                 // not enough room for the pattern to match
                 return false;
             }

             // evaluate the pattern, left context and right context
             // fail early if any of the evaluations is not successful
             if (!input.Substring(i, (ipl - i)).Equals(this.pattern, StringComparison.Ordinal))
             {
                 return false;
             }
             else if (!this.rContext.IsMatch(input.Substring(ipl, (input.Length - ipl))))
             {
                 return false;
             }
             return this.lContext.IsMatch(input.Substring(0, (i - 0)));
         }

         /// <summary>
         /// Decides if the pattern and context match the input starting at a position. It is a match if the
         /// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
         /// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
         /// </summary>
         /// <param name="input">The input <see cref="StringBuilder"/>.</param>
         /// <param name="i">The int position within the input.</param>
         /// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
         // LUCENENET specific
         public virtual bool PatternAndContextMatches(StringBuilder input, int i)
         {
             if (i < 0)
             {
                 throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
             }

             int patternLength = this.pattern.Length;
             int ipl = i + patternLength;

             if (ipl > input.Length)
             {
                 // not enough room for the pattern to match
                 return false;
             }

             // evaluate the pattern, left context and right context
             // fail early if any of the evaluations is not successful
             if (!input.ToString(i, (ipl - i)).Equals(this.pattern, StringComparison.Ordinal))
             {
                 return false;
             }
             else if (!this.rContext.IsMatch(input.ToString(ipl, (input.Length - ipl))))
             {
                 return false;
             }
             return this.lContext.IsMatch(input.ToString(0, (i - 0)));
         }

     }

     public sealed class Phoneme : IPhonemeExpr
     {
         private class PhonemeComparer : IComparer<Phoneme>
         {
             public int Compare(Phoneme o1, Phoneme o2)
             {
                 for (int i = 0; i < o1.phonemeText.Length; i++)
                 {
                     if (i >= o2.phonemeText.Length)
                     {
                         return +1;
                     }
                     int c = o1.phonemeText[i] - o2.phonemeText[i];
                     if (c != 0)
                     {
                         return c;
                     }
                 }

                 if (o1.phonemeText.Length < o2.phonemeText.Length)
                 {
                     return -1;
                 }

                 return 0;
             }
         }

         public static readonly IComparer<Phoneme> COMPARER = new PhonemeComparer();
         private readonly StringBuilder phonemeText;
         private readonly LanguageSet languages;

         public Phoneme(string phonemeText, LanguageSet languages)
         {
             this.phonemeText = new StringBuilder(phonemeText);
             this.languages = languages;
         }

         public Phoneme(StringBuilder phonemeText, LanguageSet languages)
         {
             this.phonemeText = new StringBuilder(phonemeText.ToString());
             this.languages = languages;
         }

         public Phoneme(ICharSequence phonemeText, LanguageSet languages)
         {
             this.phonemeText = new StringBuilder(phonemeText.ToString());
             this.languages = languages;
         }

         public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight)
             : this(phonemeLeft.phonemeText, phonemeLeft.languages)
         {
             this.phonemeText.Append(phonemeRight.phonemeText);
         }

         public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight, LanguageSet languages)
             : this(phonemeLeft.phonemeText, languages)
         {
             this.phonemeText.Append(phonemeRight.phonemeText);
         }

         public Phoneme Append(string str)
         {
             this.phonemeText.Append(str);
             return this;
         }

         public LanguageSet Languages => languages;

         public IList<Phoneme> Phonemes => new Phoneme[] { this };

         public string GetPhonemeText()
         {
             return this.phonemeText.ToString();
         }

         [Obsolete("since 1.9")]
         public Phoneme Join(Phoneme right)
         {
             return new Phoneme(this.phonemeText.ToString() + right.phonemeText.ToString(),
                                this.languages.RestrictTo(right.Languages));
         }
     }

     public interface IPhonemeExpr
     {
         IList<Phoneme> Phonemes { get; }
     }

     public sealed class PhonemeList : IPhonemeExpr
     {
         public PhonemeList(IList<Phoneme> phonemes)
         {
             this.Phonemes = phonemes;
         }

         public IList<Phoneme> Phonemes { get; private set; }
     }

     /// <summary>
     /// A minimal wrapper around the functionality of <see cref="Rule"/> Pattern that we use, to allow for alternate implementations.
     /// </summary>
     public interface IRPattern
     {
         bool IsMatch(ICharSequence input);
         bool IsMatch(string input);
         bool IsMatch(StringBuilder input);
     }
 }