blob: 23eb7ec273501227a16e4583dd1d592638f5bb52 [file] [log] [blame]
// commons-codec version compatibility level: 1.9
using J2N;
using J2N.Collections.Generic.Extensions;
using J2N.Text;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Analysis.Phonetic.Language.Bm
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// A phoneme rule.
/// </summary>
/// <remarks>
/// Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
/// and a logical flag indicating if all languages must be in play. A rule matches if:
/// <list type="bullet">
/// <item><description>the pattern matches at the current position</description></item>
/// <item><description>the string up until the beginning of the pattern matches the left context</description></item>
/// <item><description>the string from the end of the pattern matches the right context</description></item>
/// <item><description>logical is ALL and all languages are in scope; or</description></item>
/// <item><description>logical is any other value and at least one language is in scope</description></item>
/// </list>
/// <para/>
/// Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
/// to explicitly construct their own.
/// <para/>
/// Rules are immutable and thread-safe.
/// <para/>
/// <b>Rules resources</b>
/// <para/>
/// Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
/// named following the pattern:
/// <c>Lucene.Net.Analysis.Phonetic.Language.Bm.<see cref="NameType"/>_<see cref="RuleType"/>_[language].txt</c>
/// <para/>
/// The format of these resources is the following:
/// <list type="table">
/// <item>
/// <term>Rules:</term>
/// <description>
/// whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
/// will be interpreted as:
/// <list type="number">
/// <item><description>pattern</description></item>
/// <item><description>left context</description></item>
/// <item><description>right context</description></item>
/// <item><description>phoneme</description></item>
/// </list>
/// </description>
/// </item>
/// <item>
/// <term>End-of-line comments:</term>
/// <description>Any occurrence of '//' will cause all text following on that line to be discarded as a comment.</description>
/// </item>
/// <item>
/// <term>Multi-line comments:</term>
/// <description>Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.</description>
/// </item>
/// <item>
/// <term>Blank lines:</term>
/// <description>All blank lines will be skipped.</description>
/// </item>
/// </list>
/// <para/>
/// since 1.6
/// </remarks>
public class Rule
{
private static readonly Regex PIPE = new Regex("[|]", RegexOptions.Compiled);
private static readonly Regex WHITESPACE = new Regex("\\s+", RegexOptions.Compiled);
private static readonly Regex PLUS = new Regex("[+]", RegexOptions.Compiled);
private class AllStringsRMatcher : IRPattern
{
public bool IsMatch(StringBuilder input)
{
return true;
}
public bool IsMatch(string input)
{
return true;
}
public bool IsMatch(ICharSequence input)
{
return true;
}
}
public static readonly IRPattern ALL_STRINGS_RMATCHER = new AllStringsRMatcher();
public const string ALL = "ALL";
private const string DOUBLE_QUOTE = "\"";
private const string HASH_INCLUDE = "#include";
private static readonly IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> RULES = LoadRules();
private static IDictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>> LoadRules() // LUCENENET: Avoid static constructors (see https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
{
var rules = new Dictionary<NameType, IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>>();
foreach (NameType s in Enum.GetValues(typeof(NameType)))
{
IDictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>> rts =
new Dictionary<RuleType, IDictionary<string, IDictionary<string, IList<Rule>>>>();
foreach (RuleType rt in Enum.GetValues(typeof(RuleType)))
{
IDictionary<string, IDictionary<string, IList<Rule>>> rs = new Dictionary<string, IDictionary<string, IList<Rule>>>();
Languages ls = Languages.GetInstance(s);
foreach (string l in ls.GetLanguages())
{
try
{
rs[l] = ParseRules(CreateScanner(s, rt, l), CreateResourceName(s, rt, l));
}
catch (InvalidOperationException e)
{
throw new InvalidOperationException("Problem processing " + CreateResourceName(s, rt, l), e);
}
}
if (!rt.Equals(RuleType.RULES))
{
rs["common"] = ParseRules(CreateScanner(s, rt, "common"), CreateResourceName(s, rt, "common"));
}
rts[rt] = rs.AsReadOnly();
}
rules[s] = rts.AsReadOnly();
}
return rules;
}
#pragma warning disable IDE0051 // Remove unused private members
private static bool Contains(ICharSequence chars, char input)
#pragma warning restore IDE0051 // Remove unused private members
{
for (int i = 0; i < chars.Length; i++)
{
if (chars[i] == input)
{
return true;
}
}
return false;
}
private static bool Contains(string chars, char input)
{
for (int i = 0; i < chars.Length; i++)
{
if (chars[i] == input)
{
return true;
}
}
return false;
}
#pragma warning disable IDE0051 // Remove unused private members
private static bool Contains(StringBuilder chars, char input)
#pragma warning restore IDE0051 // Remove unused private members
{
for (int i = 0; i < chars.Length; i++)
{
if (chars[i] == input)
{
return true;
}
}
return false;
}
private static string CreateResourceName(NameType nameType, RuleType rt, string lang)
{
return string.Format("{0}_{1}_{2}.txt",
nameType.GetName(), rt.GetName(), lang);
}
private static TextReader CreateScanner(NameType nameType, RuleType rt, string lang)
{
string resName = CreateResourceName(nameType, rt, lang);
Stream rulesIS = typeof(Languages).FindAndGetManifestResourceStream(resName);
if (rulesIS == null)
{
throw new ArgumentException("Unable to load resource: " + resName);
}
return new StreamReader(rulesIS, ResourceConstants.ENCODING);
}
private static TextReader CreateScanner(string lang)
{
string resName = string.Format("{0}.txt", lang);
Stream rulesIS = typeof(Languages).FindAndGetManifestResourceStream(resName);
if (rulesIS == null)
{
throw new ArgumentException("Unable to load resource: " + resName);
}
return new StreamReader(rulesIS, ResourceConstants.ENCODING);
}
private static bool EndsWith(ICharSequence input, string suffix)
{
if (suffix.Length > input.Length)
{
return false;
}
for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
{
if (input[i] != suffix[j])
{
return false;
}
}
return true;
}
private static bool EndsWith(string input, string suffix)
{
if (suffix.Length > input.Length)
{
return false;
}
for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
{
if (input[i] != suffix[j])
{
return false;
}
}
return true;
}
private static bool EndsWith(StringBuilder input, string suffix)
{
if (suffix.Length > input.Length)
{
return false;
}
for (int i = input.Length - 1, j = suffix.Length - 1; j >= 0; i--, j--)
{
if (input[i] != suffix[j])
{
return false;
}
}
return true;
}
/// <summary>
/// Gets rules for a combination of name type, rule type and languages.
/// </summary>
/// <param name="nameType">The <see cref="NameType"/> to consider.</param>
/// <param name="rt">The <see cref="RuleType"/> to consider.</param>
/// <param name="langs">The set of languages to consider.</param>
/// <returns>A list of <see cref="Rule"/>s that apply.</returns>
public static IList<Rule> GetInstance(NameType nameType, RuleType rt,
LanguageSet langs)
{
IDictionary<string, IList<Rule>> ruleMap = GetInstanceMap(nameType, rt, langs);
IList<Rule> allRules = new List<Rule>();
foreach (IList<Rule> rules in ruleMap.Values)
{
allRules.AddRange(rules);
}
return allRules;
}
/// <summary>
/// Gets rules for a combination of name type, rule type and a single language.
/// </summary>
/// <param name="nameType">The <see cref="NameType"/> to consider.</param>
/// <param name="rt">The <see cref="RuleType"/> to consider.</param>
/// <param name="lang">The language to consider.</param>
/// <returns>A list of <see cref="Rule"/>s that apply.</returns>
public static IList<Rule> GetInstance(NameType nameType, RuleType rt, string lang)
{
return GetInstance(nameType, rt, LanguageSet.From(new JCG.HashSet<string>() { lang }));
}
/// <summary>
/// Gets rules for a combination of name type, rule type and languages.
/// <para/>
/// since 1.9
/// </summary>
/// <param name="nameType">The <see cref="NameType"/> to consider.</param>
/// <param name="rt">The <see cref="RuleType"/> to consider.</param>
/// <param name="langs">The set of languages to consider.</param>
/// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
LanguageSet langs)
{
return langs.IsSingleton ? GetInstanceMap(nameType, rt, langs.GetAny()) :
GetInstanceMap(nameType, rt, Languages.ANY);
}
/// <summary>
/// Gets rules for a combination of name type, rule type and a single language.
/// <para/>
/// since 1.9
/// </summary>
/// <param name="nameType">The <see cref="NameType"/> to consider.</param>
/// <param name="rt">The <see cref="RuleType"/> to consider.</param>
/// <param name="lang">The language to consider.</param>
/// <returns>A map containing all <see cref="Rule"/>s that apply, grouped by the first character of the rule pattern.</returns>
public static IDictionary<string, IList<Rule>> GetInstanceMap(NameType nameType, RuleType rt,
string lang)
{
if (RULES.TryGetValue(nameType, out var nameTypes) && nameTypes != null &&
nameTypes.TryGetValue(rt, out var ruleTypes) && ruleTypes != null &&
ruleTypes.TryGetValue(lang, out var rules) && rules != null)
{
}
else
{
throw new ArgumentException(string.Format("No rules found for {0}, {1}, {2}.",
nameType.GetName(), rt.GetName(), lang));
}
return rules;
}
private static Phoneme ParsePhoneme(string ph)
{
int open = ph.IndexOf('[');
if (open >= 0)
{
if (!ph.EndsWith("]", StringComparison.Ordinal))
{
throw new ArgumentException("Phoneme expression contains a '[' but does not end in ']'");
}
string before = ph.Substring(0, open - 0);
string input = ph.Substring(open + 1, (ph.Length - 1) - (open + 1));
ISet<string> langs = new JCG.HashSet<string>(PLUS.Split(input).TrimEnd());
return new Phoneme(before, LanguageSet.From(langs));
}
else
{
return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
}
private static IPhonemeExpr ParsePhonemeExpr(string ph)
{
if (ph.StartsWith("(", StringComparison.Ordinal))
{ // we have a bracketed list of options
if (!ph.EndsWith(")", StringComparison.Ordinal))
{
throw new ArgumentException("Phoneme starts with '(' so must end with ')'");
}
IList<Phoneme> phs = new List<Phoneme>();
string body = ph.Substring(1, (ph.Length - 1) - 1);
foreach (string part in PIPE.Split(body).TrimEnd())
{
phs.Add(ParsePhoneme(part));
}
if (body.StartsWith("|", StringComparison.Ordinal) || body.EndsWith("|", StringComparison.Ordinal))
{
phs.Add(new Phoneme("", Languages.ANY_LANGUAGE));
}
return new PhonemeList(phs);
}
else
{
return ParsePhoneme(ph);
}
}
private class RuleAnonymousHelper : Rule
{
private readonly int myLine;
private readonly string loc;
public RuleAnonymousHelper(string pat, string lCon, string rCon, IPhonemeExpr ph, int cLine, string location)
: base(pat, lCon, rCon, ph)
{
this.myLine = cLine;
this.loc = location;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
sb.Append("Rule");
sb.Append("{line=").Append(myLine);
sb.Append(", loc='").Append(loc).Append('\'');
sb.Append('}');
return sb.ToString();
}
}
private static IDictionary<string, IList<Rule>> ParseRules(TextReader reader, string location)
{
IDictionary<string, IList<Rule>> lines = new JCG.Dictionary<string, IList<Rule>>();
int currentLine = 0;
bool inMultilineComment = false;
string rawLine;
try
{
while ((rawLine = reader.ReadLine()) != null)
{
currentLine++;
string line = rawLine;
if (inMultilineComment)
{
if (line.EndsWith(ResourceConstants.EXT_CMT_END, StringComparison.Ordinal))
{
inMultilineComment = false;
}
}
else
{
if (line.StartsWith(ResourceConstants.EXT_CMT_START, StringComparison.Ordinal))
{
inMultilineComment = true;
}
else
{
// discard comments
int cmtI = line.IndexOf(ResourceConstants.CMT, StringComparison.Ordinal);
if (cmtI >= 0)
{
line = line.Substring(0, cmtI);
}
// trim leading-trailing whitespace
line = line.Trim();
if (line.Length == 0)
{
continue; // empty lines can be safely skipped
}
if (line.StartsWith(HASH_INCLUDE, StringComparison.Ordinal))
{
// include statement
string incl = line.Substring(HASH_INCLUDE.Length).Trim();
if (incl.Contains(" "))
{
throw new ArgumentException("Malformed import statement '" + rawLine + "' in " +
location);
}
else
{
lines.PutAll(ParseRules(CreateScanner(incl), location + "->" + incl));
}
}
else
{
// rule
string[] parts = WHITESPACE.Split(line).TrimEnd();
if (parts.Length != 4)
{
throw new ArgumentException("Malformed rule statement split into " + parts.Length +
" parts: " + rawLine + " in " + location);
}
else
{
try
{
string pat = StripQuotes(parts[0]);
string lCon = StripQuotes(parts[1]);
string rCon = StripQuotes(parts[2]);
IPhonemeExpr ph = ParsePhonemeExpr(StripQuotes(parts[3]));
int cLine = currentLine;
Rule r = new RuleAnonymousHelper(pat, lCon, rCon, ph, cLine, location);
string patternKey = r.pattern.Substring(0, 1 - 0);
if (!lines.TryGetValue(patternKey, out IList<Rule> rules) || rules == null)
{
rules = new List<Rule>();
lines[patternKey] = rules;
}
rules.Add(r);
}
catch (ArgumentException e)
{
throw new InvalidOperationException("Problem parsing line '" + currentLine + "' in " +
location, e);
}
}
}
}
}
}
}
finally
{
reader.Dispose();
}
return lines;
}
private class RPatternHelper : IRPattern
{
private readonly Func<StringBuilder, bool> isMatchSB;
private readonly Func<string, bool> isMatchStr;
private readonly Func<ICharSequence, bool> isMatchCS;
public RPatternHelper(Func<StringBuilder, bool> isMatchSB, Func<string, bool> isMatchStr, Func<ICharSequence, bool> isMatchCS)
{
this.isMatchSB = isMatchSB;
this.isMatchStr = isMatchStr;
this.isMatchCS = isMatchCS;
}
public bool IsMatch(StringBuilder input)
{
return isMatchSB(input);
}
public bool IsMatch(string input)
{
return isMatchStr(input);
}
public bool IsMatch(ICharSequence input)
{
return isMatchCS(input);
}
}
/// <summary>
/// Attempts to compile the regex into direct string ops, falling back to <see cref="Regex"/> and <see cref="Match"/> in the worst case.
/// </summary>
/// <param name="regex">The regular expression to compile.</param>
/// <returns>An RPattern that will match this regex.</returns>
private static IRPattern GetPattern(string regex)
{
bool startsWith = regex.StartsWith("^", StringComparison.Ordinal);
bool endsWith = regex.EndsWith("$", StringComparison.Ordinal);
string content = regex.Substring(startsWith ? 1 : 0, (endsWith ? regex.Length - 1 : regex.Length) - (startsWith ? 1 : 0));
bool boxes = content.Contains("[");
if (!boxes)
{
if (startsWith && endsWith)
{
// exact match
if (content.Length == 0)
{
// empty
return new RPatternHelper(isMatchSB: (input) =>
{
return input.Length == 0;
}, isMatchStr: (input) =>
{
return input.Length == 0;
}, isMatchCS: (input) =>
{
return input.Length == 0;
});
}
else
{
return new RPatternHelper(isMatchSB: (input) =>
{
return input.Equals(content);
}, isMatchStr: (input) =>
{
return input.Equals(content);
}, isMatchCS: (input) =>
{
return input.Equals(content);
});
}
}
else if ((startsWith || endsWith) && content.Length == 0)
{
// matches every string
return ALL_STRINGS_RMATCHER;
}
else if (startsWith)
{
// matches from start
return new RPatternHelper(isMatchSB: (input) =>
{
return StartsWith(input, content);
}, isMatchStr: (input) =>
{
return StartsWith(input, content);
}, isMatchCS: (input) =>
{
return StartsWith(input, content);
});
}
else if (endsWith)
{
// matches from start
return new RPatternHelper(isMatchSB: (input) =>
{
return EndsWith(input, content);
}, isMatchStr: (input) =>
{
return EndsWith(input, content);
}, isMatchCS: (input) =>
{
return EndsWith(input, content);
});
}
}
else
{
bool startsWithBox = content.StartsWith("[", StringComparison.Ordinal);
bool endsWithBox = content.EndsWith("]", StringComparison.Ordinal);
if (startsWithBox && endsWithBox)
{
string boxContent = content.Substring(1, (content.Length - 1) - 1);
if (!boxContent.Contains("["))
{
// box containing alternatives
bool negate = boxContent.StartsWith("^", StringComparison.Ordinal);
if (negate)
{
boxContent = boxContent.Substring(1);
}
string bContent = boxContent;
bool shouldMatch = !negate;
if (startsWith && endsWith)
{
// exact match
return new RPatternHelper(isMatchSB: (input) =>
{
return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
}, isMatchStr: (input) =>
{
return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
}, isMatchCS: (input) =>
{
return input.Length == 1 && Contains(bContent, input[0]) == shouldMatch;
});
}
else if (startsWith)
{
// first char
return new RPatternHelper(isMatchSB: (input) =>
{
return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
}, isMatchStr: (input) =>
{
return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
}, isMatchCS: (input) =>
{
return input.Length > 0 && Contains(bContent, input[0]) == shouldMatch;
});
}
else if (endsWith)
{
// last char
return new RPatternHelper(isMatchSB: (input) =>
{
return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
}, isMatchStr: (input) =>
{
return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
}, isMatchCS: (input) =>
{
return input.Length > 0 && Contains(bContent, input[input.Length - 1]) == shouldMatch;
});
}
}
}
}
Regex pattern = new Regex(regex, RegexOptions.Compiled);
return new RPatternHelper(isMatchSB: (input) =>
{
Match matcher = pattern.Match(input.ToString());
return matcher.Success;
}, isMatchStr: (input) =>
{
Match matcher = pattern.Match(input);
return matcher.Success;
}, isMatchCS: (input) =>
{
Match matcher = pattern.Match(input.ToString());
return matcher.Success;
});
}
private static bool StartsWith(ICharSequence input, string prefix)
{
if (prefix.Length > input.Length)
{
return false;
}
for (int i = 0; i < prefix.Length; i++)
{
if (input[i] != prefix[i])
{
return false;
}
}
return true;
}
private static bool StartsWith(string input, string prefix)
{
if (prefix.Length > input.Length)
{
return false;
}
for (int i = 0; i < prefix.Length; i++)
{
if (input[i] != prefix[i])
{
return false;
}
}
return true;
}
private static bool StartsWith(StringBuilder input, string prefix)
{
if (prefix.Length > input.Length)
{
return false;
}
for (int i = 0; i < prefix.Length; i++)
{
if (input[i] != prefix[i])
{
return false;
}
}
return true;
}
private static string StripQuotes(string str)
{
if (str.StartsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
{
str = str.Substring(1);
}
if (str.EndsWith(DOUBLE_QUOTE, StringComparison.Ordinal))
{
str = str.Substring(0, str.Length - 1);
}
return str;
}
private readonly IRPattern lContext;
private readonly string pattern;
private readonly IPhonemeExpr phoneme;
private readonly IRPattern rContext;
/// <summary>
/// Creates a new rule.
/// </summary>
/// <param name="pattern">The pattern.</param>
/// <param name="lContext">The left context.</param>
/// <param name="rContext">The right context.</param>
/// <param name="phoneme">The resulting phoneme.</param>
public Rule(string pattern, string lContext, string rContext, IPhonemeExpr phoneme)
{
this.pattern = pattern;
this.lContext = GetPattern(lContext + "$");
this.rContext = GetPattern("^" + rContext);
this.phoneme = phoneme;
}
/// <summary>
/// Gets the left context pattern. This is a regular expression that must match to the left of the pattern.
/// </summary>
public virtual IRPattern LContext => lContext;
/// <summary>
/// Gets the pattern. This is a string-literal that must exactly match.
/// </summary>
public virtual string Pattern => pattern;
/// <summary>
/// Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
/// </summary>
public virtual IPhonemeExpr Phoneme => phoneme;
/// <summary>
/// Gets the right context pattern. This is a regular expression that must match to the right of the pattern.
/// </summary>
public virtual IRPattern RContext => rContext;
/// <summary>
/// Decides if the pattern and context match the input starting at a position. It is a match if the
/// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
/// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
/// </summary>
/// <param name="input">The input <see cref="ICharSequence"/>.</param>
/// <param name="i">The int position within the input.</param>
/// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
public virtual bool PatternAndContextMatches(ICharSequence input, int i)
{
if (i < 0)
{
throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
}
int patternLength = this.pattern.Length;
int ipl = i + patternLength;
if (ipl > input.Length)
{
// not enough room for the pattern to match
return false;
}
// evaluate the pattern, left context and right context
// fail early if any of the evaluations is not successful
if (!input.Subsequence(i, ipl - i).Equals(this.pattern)) // LUCENENET: Corrected 2nd Subseqence parameter
{
return false;
}
else if (!this.rContext.IsMatch(input.Subsequence(ipl, input.Length - ipl))) // LUCENENET: Corrected 2nd Subseqence parameter
{
return false;
}
return this.lContext.IsMatch(input.Subsequence(0, i - 0)); // LUCENENET: Corrected 2nd Subseqence parameter
}
/// <summary>
/// Decides if the pattern and context match the input starting at a position. It is a match if the
/// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
/// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
/// </summary>
/// <param name="input">The input <see cref="string"/>.</param>
/// <param name="i">The int position within the input.</param>
/// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
// LUCENENET specific
public virtual bool PatternAndContextMatches(string input, int i)
{
if (i < 0)
{
throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
}
int patternLength = this.pattern.Length;
int ipl = i + patternLength;
if (ipl > input.Length)
{
// not enough room for the pattern to match
return false;
}
// evaluate the pattern, left context and right context
// fail early if any of the evaluations is not successful
if (!input.Substring(i, (ipl - i)).Equals(this.pattern, StringComparison.Ordinal))
{
return false;
}
else if (!this.rContext.IsMatch(input.Substring(ipl, (input.Length - ipl))))
{
return false;
}
return this.lContext.IsMatch(input.Substring(0, (i - 0)));
}
/// <summary>
/// Decides if the pattern and context match the input starting at a position. It is a match if the
/// <see cref="LContext"/> matches <paramref name="input"/> up to <paramref name="i"/>, <see cref="Pattern"/> matches at <paramref name="i"/> and
/// <see cref="RContext"/> matches from the end of the match of <see cref="Pattern"/> to the end of <paramref name="input"/>.
/// </summary>
/// <param name="input">The input <see cref="StringBuilder"/>.</param>
/// <param name="i">The int position within the input.</param>
/// <returns><c>true</c> if the pattern and left/right context match, <c>false</c> otherwise.</returns>
// LUCENENET specific
public virtual bool PatternAndContextMatches(StringBuilder input, int i)
{
if (i < 0)
{
throw new ArgumentOutOfRangeException("Can not match pattern at negative indexes");
}
int patternLength = this.pattern.Length;
int ipl = i + patternLength;
if (ipl > input.Length)
{
// not enough room for the pattern to match
return false;
}
// evaluate the pattern, left context and right context
// fail early if any of the evaluations is not successful
if (!input.ToString(i, (ipl - i)).Equals(this.pattern, StringComparison.Ordinal))
{
return false;
}
else if (!this.rContext.IsMatch(input.ToString(ipl, (input.Length - ipl))))
{
return false;
}
return this.lContext.IsMatch(input.ToString(0, (i - 0)));
}
}
public sealed class Phoneme : IPhonemeExpr
{
private class PhonemeComparer : IComparer<Phoneme>
{
public int Compare(Phoneme o1, Phoneme o2)
{
for (int i = 0; i < o1.phonemeText.Length; i++)
{
if (i >= o2.phonemeText.Length)
{
return +1;
}
int c = o1.phonemeText[i] - o2.phonemeText[i];
if (c != 0)
{
return c;
}
}
if (o1.phonemeText.Length < o2.phonemeText.Length)
{
return -1;
}
return 0;
}
}
public static readonly IComparer<Phoneme> COMPARER = new PhonemeComparer();
private readonly StringBuilder phonemeText;
private readonly LanguageSet languages;
public Phoneme(string phonemeText, LanguageSet languages)
{
this.phonemeText = new StringBuilder(phonemeText);
this.languages = languages;
}
public Phoneme(StringBuilder phonemeText, LanguageSet languages)
{
this.phonemeText = new StringBuilder(phonemeText.ToString());
this.languages = languages;
}
public Phoneme(ICharSequence phonemeText, LanguageSet languages)
{
this.phonemeText = new StringBuilder(phonemeText.ToString());
this.languages = languages;
}
public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight)
: this(phonemeLeft.phonemeText, phonemeLeft.languages)
{
this.phonemeText.Append(phonemeRight.phonemeText);
}
public Phoneme(Phoneme phonemeLeft, Phoneme phonemeRight, LanguageSet languages)
: this(phonemeLeft.phonemeText, languages)
{
this.phonemeText.Append(phonemeRight.phonemeText);
}
public Phoneme Append(string str)
{
this.phonemeText.Append(str);
return this;
}
public LanguageSet Languages => languages;
public IList<Phoneme> Phonemes => new Phoneme[] { this };
public string GetPhonemeText()
{
return this.phonemeText.ToString();
}
[Obsolete("since 1.9")]
public Phoneme Join(Phoneme right)
{
return new Phoneme(this.phonemeText.ToString() + right.phonemeText.ToString(),
this.languages.RestrictTo(right.Languages));
}
}
public interface IPhonemeExpr
{
IList<Phoneme> Phonemes { get; }
}
public sealed class PhonemeList : IPhonemeExpr
{
public PhonemeList(IList<Phoneme> phonemes)
{
this.Phonemes = phonemes;
}
public IList<Phoneme> Phonemes { get; private set; }
}
/// <summary>
/// A minimal wrapper around the functionality of <see cref="Rule"/> Pattern that we use, to allow for alternate implementations.
/// </summary>
public interface IRPattern
{
bool IsMatch(ICharSequence input);
bool IsMatch(string input);
bool IsMatch(StringBuilder input);
}
}