src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/PatternParser.cs - lucenenet - Git at Google

 using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Reflection;
 using System.Text;
 using System.Xml;

 namespace Lucene.Net.Analysis.Compound.Hyphenation
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *      http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// A XMLReader document handler to read and parse hyphenation patterns from a XML
     /// file.
     /// <para/>
     /// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
     /// than a SAX parser.
     /// </summary>
     public class PatternParser
     {
         internal int currElement;

         internal IPatternConsumer consumer;

         internal StringBuilder token;

         internal IList<object> exception;

         internal char hyphenChar;

         internal string errMsg;

         internal const int ELEM_CLASSES = 1;

         internal const int ELEM_EXCEPTIONS = 2;

         internal const int ELEM_PATTERNS = 3;

         internal const int ELEM_HYPHEN = 4;

         public PatternParser()
         {
             token = new StringBuilder();
             hyphenChar = '-'; // default
         }

         public PatternParser(IPatternConsumer consumer)
             : this()
         {
             this.consumer = consumer;
         }

         public virtual IPatternConsumer Consumer
         {
             get // LUCENENET NOTE: Added getter per MSDN guidelines
             {
                 return this.consumer;
             }
             set
             {
                 this.consumer = value;
             }
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="path">The complete file path to be read.</param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(string path)
         {
             // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
             Parse(path, Encoding.UTF8);
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="path">The complete file path to be read.</param>
         /// <param name="encoding">The character encoding to use</param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(string path, Encoding encoding)
         {
             var xmlReaderSettings = GetXmlReaderSettings();

             // LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
             using (var src = XmlReader.Create(new StreamReader(new FileStream(path, FileMode.Open), encoding), xmlReaderSettings))
             {
                 Parse(src);
             }
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="file">  a <see cref="FileInfo"/> object representing the file  </param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(FileInfo file)
         {
             Parse(file, Encoding.UTF8);
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="file">  a <see cref="FileInfo"/> object representing the file </param>
         /// <param name="encoding">The character encoding to use</param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(FileInfo file, Encoding encoding)
         {
             var xmlReaderSettings = GetXmlReaderSettings();

             using (var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings))
             {
                 Parse(src);
             }
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="xmlStream">
         /// The stream containing the XML data.
         /// <para/>
         /// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
         /// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
         /// the stream, and processing continues parsing the input as a stream of (Unicode) characters.
         /// </param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(Stream xmlStream)
         {
             var xmlReaderSettings = GetXmlReaderSettings();

             using (var src = XmlReader.Create(xmlStream, xmlReaderSettings))
             {
                 Parse(src);
             }
         }

         /// <summary>
         /// Parses a hyphenation pattern file.
         /// </summary>
         /// <param name="source"> <see cref="XmlReader"/> input source for the file </param>
         /// <exception cref="IOException"> In case of an exception while parsing </exception>
         public virtual void Parse(XmlReader source)
         {
             source.MoveToContent();
             while (source.Read())
             {
                 ParseNode(source);
             }
         }

         private void ParseNode(XmlReader node)
         {
             string uri, name, raw;
             switch (node.NodeType)
             {
                 case XmlNodeType.Element:

                     // Element start
                     uri = node.NamespaceURI;
                     name = node.Name;
                     bool isEmptyElement = node.IsEmptyElement;
                     var attributes = GetAttributes(node);
                     raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer

                     this.StartElement(uri, name, raw, attributes);
                     if (isEmptyElement)
                     {
                         this.EndElement(uri, name, raw);
                     }
                     break;

                 case XmlNodeType.Text:

                     this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
                     break;

                 case XmlNodeType.EndElement:
                     uri = node.NamespaceURI;
                     name = node.Name;
                     raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer

                     // Element end
                     this.EndElement(uri, name, raw);
                     break;
             }
         }

         private XmlReaderSettings GetXmlReaderSettings()
         {
             return

                 new XmlReaderSettings
                 {
                     // DTD Processing currently is
                     // not supported in .NET Standard but will come back in .NET Standard 2.0.
                     // https://github.com/dotnet/corefx/issues/4376.
 #if FEATURE_DTD_PROCESSING
                     DtdProcessing = DtdProcessing.Parse,
                     XmlResolver = new DtdResolver()
 #else
                     DtdProcessing = DtdProcessing.Ignore
 #endif
                 };
         }

         private IDictionary<string, string> GetAttributes(XmlReader node)
         {
             var result = new Dictionary<string, string>();
             if (node.HasAttributes)
             {
                 for (int i = 0; i < node.AttributeCount; i++)
                 {
                     node.MoveToAttribute(i);
                     result.Add(node.Name, node.Value);
                 }
             }

             return result;
         }

         protected virtual string ReadToken(StringBuilder chars)
         {
             string word;
             bool space = false;
             int i;
             for (i = 0; i < chars.Length; i++)
             {
                 if (char.IsWhiteSpace(chars[i]))
                 {
                     space = true;
                 }
                 else
                 {
                     break;
                 }
             }
             if (space)
             {
                 // chars.delete(0,i);
                 for (int countr = i; countr < chars.Length; countr++)
                 {
                     chars[countr - i] = chars[countr];
                 }
                 chars.Length = chars.Length - i;
                 if (token.Length > 0)
                 {
                     word = token.ToString();
                     token.Length = 0;
                     return word;
                 }
             }
             space = false;
             for (i = 0; i < chars.Length; i++)
             {
                 if (char.IsWhiteSpace(chars[i]))
                 {
                     space = true;
                     break;
                 }
             }
             token.Append(chars.ToString(0, i - 0));
             // chars.delete(0,i);
             for (int countr = i; countr < chars.Length; countr++)
             {
                 chars[countr - i] = chars[countr];
             }
             chars.Length = chars.Length - i;
             if (space)
             {
                 word = token.ToString();
                 token.Length = 0;
                 return word;
             }
             token.Append(chars.ToString());
             return null;
         }

         protected static string GetPattern(string word)
         {
             StringBuilder pat = new StringBuilder();
             int len = word.Length;
             for (int i = 0; i < len; i++)
             {
                 if (!char.IsDigit(word[i]))
                 {
                     pat.Append(word[i]);
                 }
             }
             return pat.ToString();
         }

         protected virtual IList<object> NormalizeException<T1>(IList<T1> ex)
         {
             List<object> res = new List<object>();
             for (int i = 0; i < ex.Count; i++)
             {
                 object item = ex[i];
                 if (item is string)
                 {
                     string str = (string)item;
                     StringBuilder buf = new StringBuilder();
                     for (int j = 0; j < str.Length; j++)
                     {
                         char c = str[j];
                         if (c != hyphenChar)
                         {
                             buf.Append(c);
                         }
                         else
                         {
                             res.Add(buf.ToString());
                             buf.Length = 0;
                             char[] h = new char[1];
                             h[0] = hyphenChar;
                             // we use here hyphenChar which is not necessarily
                             // the one to be printed
                             res.Add(new Hyphen(new string(h), null, null));
                         }
                     }
                     if (buf.Length > 0)
                     {
                         res.Add(buf.ToString());
                     }
                 }
                 else
                 {
                     res.Add(item);
                 }
             }
             return res;
         }

         protected virtual string GetExceptionWord<T1>(IList<T1> ex)
         {
             StringBuilder res = new StringBuilder();
             for (int i = 0; i < ex.Count; i++)
             {
                 object item = ex[i];
                 if (item is string)
                 {
                     res.Append((string)item);
                 }
                 else
                 {
                     if (((Hyphen)item).NoBreak != null)
                     {
                         res.Append(((Hyphen)item).NoBreak);
                     }
                 }
             }
             return res.ToString();
         }

         protected static string GetInterletterValues(string pat)
         {
             StringBuilder il = new StringBuilder();
             string word = pat + "a"; // add dummy letter to serve as sentinel
             int len = word.Length;
             for (int i = 0; i < len; i++)
             {
                 char c = word[i];
                 if (char.IsDigit(c))
                 {
                     il.Append(c);
                     i++;
                 }
                 else
                 {
                     il.Append('0');
                 }
             }
             return il.ToString();
         }

 #if FEATURE_DTD_PROCESSING
         /// <summary>
         /// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
         /// rather than from the file system.
         /// </summary>
         internal class DtdResolver : XmlUrlResolver
         {
             public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
             {
                 string dtdFilename = "hyphenation.dtd";
                 if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.Ordinal))
                 {
                     return GetType().GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(PatternParser), dtdFilename);
                 }

                 return base.GetEntity(absoluteUri, role, ofObjectToReturn);
             }
         }
 #endif

         //
         // ContentHandler methods
         //

         /// <summary>
         /// Receive notification of the beginning of an element.
         /// <para/>
         /// The Parser will invoke this method at the beginning of every element in the XML document;
         /// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
         /// (even when the element is empty). All of the element's content will be reported,
         /// in order, before the corresponding endElement event.
         /// </summary>
         /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
         /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
         /// <param name="raw"></param>
         /// <param name="attrs"> the attributes attached to the element. If there are no attributes, it shall be an empty Attributes object. The value of this object after startElement returns is undefined</param>
         public virtual void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
         {
             if (local.Equals("hyphen-char", StringComparison.Ordinal))
             {
                 if (attrs.TryGetValue("value", out string h) && h != null && h.Length == 1)
                 {
                     hyphenChar = h[0];
                 }
             }
             else if (local.Equals("classes", StringComparison.Ordinal))
             {
                 currElement = ELEM_CLASSES;
             }
             else if (local.Equals("patterns", StringComparison.Ordinal))
             {
                 currElement = ELEM_PATTERNS;
             }
             else if (local.Equals("exceptions", StringComparison.Ordinal))
             {
                 currElement = ELEM_EXCEPTIONS;
                 exception = new List<object>();
             }
             else if (local.Equals("hyphen", StringComparison.Ordinal))
             {
                 if (token.Length > 0)
                 {
                     exception.Add(token.ToString());
                 }
                 exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
                 currElement = ELEM_HYPHEN;
             }
             token.Length = 0;
         }

         /// <summary>
         /// Receive notification of the end of an element.
         /// <para/>
         /// The parser will invoke this method at the end of every element in the XML document;
         /// there will be a corresponding <see cref="StartElement"/> event for every
         /// <see cref="EndElement"/> event (even when the element is empty).
         /// </summary>
         /// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
         /// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
         /// <param name="raw"></param>
         public virtual void EndElement(string uri, string local, string raw)
         {
             if (token.Length > 0)
             {
                 string word = token.ToString();
                 switch (currElement)
                 {
                     case ELEM_CLASSES:
                         consumer.AddClass(word);
                         break;
                     case ELEM_EXCEPTIONS:
                         exception.Add(word);
                         exception = NormalizeException(exception);
                         consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
                         break;
                     case ELEM_PATTERNS:
                         consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
                         break;
                     case ELEM_HYPHEN:
                         // nothing to do
                         break;
                 }
                 if (currElement != ELEM_HYPHEN)
                 {
                     token.Length = 0;
                 }
             }
             if (currElement == ELEM_HYPHEN)
             {
                 currElement = ELEM_EXCEPTIONS;
             }
             else
             {
                 currElement = 0;
             }
         }

         /// <summary>
         /// Receive notification of character data.
         /// <para/>
         /// The Parser will call this method to report each chunk of character data. Parsers may
         /// return all contiguous character data in a single chunk, or they may split it into
         /// several chunks; however, all of the characters in any single event must come from
         /// the same external entity so that the Locator provides useful information.
         /// <para/>
         /// The application must not attempt to read from the array outside of the specified range.
         /// </summary>
         /// <param name="ch"></param>
         /// <param name="start"></param>
         /// <param name="length"></param>
         public virtual void Characters(char[] ch, int start, int length)
         {
             StringBuilder chars = new StringBuilder(length);
             chars.Append(ch, start, length);
             string word = ReadToken(chars);
             while (word != null)
             {
                 // System.out.println("\"" + word + "\"");
                 switch (currElement)
                 {
                     case ELEM_CLASSES:
                         consumer.AddClass(word);
                         break;
                     case ELEM_EXCEPTIONS:
                         exception.Add(word);
                         exception = NormalizeException(exception);
                         consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
                         exception.Clear();
                         break;
                     case ELEM_PATTERNS:
                         consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
                         break;
                 }
                 word = ReadToken(chars);
             }
         }
     }
 }
	using Lucene.Net.Support;
	using System;
	using System.Collections.Generic;
	using System.IO;
	using System.Linq;
	using System.Reflection;
	using System.Text;
	using System.Xml;

	namespace Lucene.Net.Analysis.Compound.Hyphenation
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// A XMLReader document handler to read and parse hyphenation patterns from a XML
	/// file.
	/// <para/>
	/// LUCENENET: This class has been refactored from its Java counterpart to use XmlReader rather
	/// than a SAX parser.
	/// </summary>
	public class PatternParser
	{
	internal int currElement;

	internal IPatternConsumer consumer;

	internal StringBuilder token;

	internal IList<object> exception;

	internal char hyphenChar;

	internal string errMsg;

	internal const int ELEM_CLASSES = 1;

	internal const int ELEM_EXCEPTIONS = 2;

	internal const int ELEM_PATTERNS = 3;

	internal const int ELEM_HYPHEN = 4;

	public PatternParser()
	{
	token = new StringBuilder();
	hyphenChar = '-'; // default
	}

	public PatternParser(IPatternConsumer consumer)
	: this()
	{
	this.consumer = consumer;
	}

	public virtual IPatternConsumer Consumer
	{
	get // LUCENENET NOTE: Added getter per MSDN guidelines
	{
	return this.consumer;
	}
	set
	{
	this.consumer = value;
	}
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="path">The complete file path to be read.</param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(string path)
	{
	// LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
	Parse(path, Encoding.UTF8);
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="path">The complete file path to be read.</param>
	/// <param name="encoding">The character encoding to use</param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(string path, Encoding encoding)
	{
	var xmlReaderSettings = GetXmlReaderSettings();

	// LUCENENET TODO: Create overloads that allow XmlReaderSettings to be passed in.
	using (var src = XmlReader.Create(new StreamReader(new FileStream(path, FileMode.Open), encoding), xmlReaderSettings))
	{
	Parse(src);
	}
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(FileInfo file)
	{
	Parse(file, Encoding.UTF8);
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
	/// <param name="encoding">The character encoding to use</param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(FileInfo file, Encoding encoding)
	{
	var xmlReaderSettings = GetXmlReaderSettings();

	using (var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings))
	{
	Parse(src);
	}
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="xmlStream">
	/// The stream containing the XML data.
	/// <para/>
	/// The <see cref="PatternParser"/> scans the first bytes of the stream looking for a byte order mark
	/// or other sign of encoding. When encoding is determined, the encoding is used to continue reading
	/// the stream, and processing continues parsing the input as a stream of (Unicode) characters.
	/// </param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(Stream xmlStream)
	{
	var xmlReaderSettings = GetXmlReaderSettings();

	using (var src = XmlReader.Create(xmlStream, xmlReaderSettings))
	{
	Parse(src);
	}
	}

	/// <summary>
	/// Parses a hyphenation pattern file.
	/// </summary>
	/// <param name="source"> <see cref="XmlReader"/> input source for the file </param>
	/// <exception cref="IOException"> In case of an exception while parsing </exception>
	public virtual void Parse(XmlReader source)
	{
	source.MoveToContent();
	while (source.Read())
	{
	ParseNode(source);
	}
	}

	private void ParseNode(XmlReader node)
	{
	string uri, name, raw;
	switch (node.NodeType)
	{
	case XmlNodeType.Element:

	// Element start
	uri = node.NamespaceURI;
	name = node.Name;
	bool isEmptyElement = node.IsEmptyElement;
	var attributes = GetAttributes(node);
	raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer

	this.StartElement(uri, name, raw, attributes);
	if (isEmptyElement)
	{
	this.EndElement(uri, name, raw);
	}
	break;

	case XmlNodeType.Text:

	this.Characters(node.Value.ToCharArray(), 0, node.Value.Length);
	break;

	case XmlNodeType.EndElement:
	uri = node.NamespaceURI;
	name = node.Name;
	raw = string.Empty; // node.ReadOuterXml(); - not used, but was messing with the node pointer

	// Element end
	this.EndElement(uri, name, raw);
	break;
	}
	}

	private XmlReaderSettings GetXmlReaderSettings()
	{
	return

	new XmlReaderSettings
	{
	// DTD Processing currently is
	// not supported in .NET Standard but will come back in .NET Standard 2.0.
	// https://github.com/dotnet/corefx/issues/4376.
	#if FEATURE_DTD_PROCESSING
	DtdProcessing = DtdProcessing.Parse,
	XmlResolver = new DtdResolver()
	#else
	DtdProcessing = DtdProcessing.Ignore
	#endif
	};
	}

	private IDictionary<string, string> GetAttributes(XmlReader node)
	{
	var result = new Dictionary<string, string>();
	if (node.HasAttributes)
	{
	for (int i = 0; i < node.AttributeCount; i++)
	{
	node.MoveToAttribute(i);
	result.Add(node.Name, node.Value);
	}
	}

	return result;
	}

	protected virtual string ReadToken(StringBuilder chars)
	{
	string word;
	bool space = false;
	int i;
	for (i = 0; i < chars.Length; i++)
	{
	if (char.IsWhiteSpace(chars[i]))
	{
	space = true;
	}
	else
	{
	break;
	}
	}
	if (space)
	{
	// chars.delete(0,i);
	for (int countr = i; countr < chars.Length; countr++)
	{
	chars[countr - i] = chars[countr];
	}
	chars.Length = chars.Length - i;
	if (token.Length > 0)
	{
	word = token.ToString();
	token.Length = 0;
	return word;
	}
	}
	space = false;
	for (i = 0; i < chars.Length; i++)
	{
	if (char.IsWhiteSpace(chars[i]))
	{
	space = true;
	break;
	}
	}
	token.Append(chars.ToString(0, i - 0));
	// chars.delete(0,i);
	for (int countr = i; countr < chars.Length; countr++)
	{
	chars[countr - i] = chars[countr];
	}
	chars.Length = chars.Length - i;
	if (space)
	{
	word = token.ToString();
	token.Length = 0;
	return word;
	}
	token.Append(chars.ToString());
	return null;
	}

	protected static string GetPattern(string word)
	{
	StringBuilder pat = new StringBuilder();
	int len = word.Length;
	for (int i = 0; i < len; i++)
	{
	if (!char.IsDigit(word[i]))
	{
	pat.Append(word[i]);
	}
	}
	return pat.ToString();
	}

	protected virtual IList<object> NormalizeException<T1>(IList<T1> ex)
	{
	List<object> res = new List<object>();
	for (int i = 0; i < ex.Count; i++)
	{
	object item = ex[i];
	if (item is string)
	{
	string str = (string)item;
	StringBuilder buf = new StringBuilder();
	for (int j = 0; j < str.Length; j++)
	{
	char c = str[j];
	if (c != hyphenChar)
	{
	buf.Append(c);
	}
	else
	{
	res.Add(buf.ToString());
	buf.Length = 0;
	char[] h = new char[1];
	h[0] = hyphenChar;
	// we use here hyphenChar which is not necessarily
	// the one to be printed
	res.Add(new Hyphen(new string(h), null, null));
	}
	}
	if (buf.Length > 0)
	{
	res.Add(buf.ToString());
	}
	}
	else
	{
	res.Add(item);
	}
	}
	return res;
	}

	protected virtual string GetExceptionWord<T1>(IList<T1> ex)
	{
	StringBuilder res = new StringBuilder();
	for (int i = 0; i < ex.Count; i++)
	{
	object item = ex[i];
	if (item is string)
	{
	res.Append((string)item);
	}
	else
	{
	if (((Hyphen)item).NoBreak != null)
	{
	res.Append(((Hyphen)item).NoBreak);
	}
	}
	}
	return res.ToString();
	}

	protected static string GetInterletterValues(string pat)
	{
	StringBuilder il = new StringBuilder();
	string word = pat + "a"; // add dummy letter to serve as sentinel
	int len = word.Length;
	for (int i = 0; i < len; i++)
	{
	char c = word[i];
	if (char.IsDigit(c))
	{
	il.Append(c);
	i++;
	}
	else
	{
	il.Append('0');
	}
	}
	return il.ToString();
	}

	#if FEATURE_DTD_PROCESSING
	/// <summary>
	/// LUCENENET specific helper class to force the DTD file to be read from the embedded resource
	/// rather than from the file system.
	/// </summary>
	internal class DtdResolver : XmlUrlResolver
	{
	public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
	{
	string dtdFilename = "hyphenation.dtd";
	if (dtdFilename.Equals(absoluteUri.Segments.LastOrDefault(), StringComparison.Ordinal))
	{
	return GetType().GetTypeInfo().Assembly.FindAndGetManifestResourceStream(typeof(PatternParser), dtdFilename);
	}

	return base.GetEntity(absoluteUri, role, ofObjectToReturn);
	}
	}
	#endif

	//
	// ContentHandler methods
	//

	/// <summary>
	/// Receive notification of the beginning of an element.
	/// <para/>
	/// The Parser will invoke this method at the beginning of every element in the XML document;
	/// there will be a corresponding <see cref="EndElement"/> event for every <see cref="StartElement"/> event
	/// (even when the element is empty). All of the element's content will be reported,
	/// in order, before the corresponding endElement event.
	/// </summary>
	/// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
	/// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
	/// <param name="raw"></param>
	/// <param name="attrs"> the attributes attached to the element. If there are no attributes, it shall be an empty Attributes object. The value of this object after startElement returns is undefined</param>
	public virtual void StartElement(string uri, string local, string raw, IDictionary<string, string> attrs)
	{
	if (local.Equals("hyphen-char", StringComparison.Ordinal))
	{
	if (attrs.TryGetValue("value", out string h) && h != null && h.Length == 1)
	{
	hyphenChar = h[0];
	}
	}
	else if (local.Equals("classes", StringComparison.Ordinal))
	{
	currElement = ELEM_CLASSES;
	}
	else if (local.Equals("patterns", StringComparison.Ordinal))
	{
	currElement = ELEM_PATTERNS;
	}
	else if (local.Equals("exceptions", StringComparison.Ordinal))
	{
	currElement = ELEM_EXCEPTIONS;
	exception = new List<object>();
	}
	else if (local.Equals("hyphen", StringComparison.Ordinal))
	{
	if (token.Length > 0)
	{
	exception.Add(token.ToString());
	}
	exception.Add(new Hyphen(attrs["pre"], attrs["no"], attrs["post"]));
	currElement = ELEM_HYPHEN;
	}
	token.Length = 0;
	}

	/// <summary>
	/// Receive notification of the end of an element.
	/// <para/>
	/// The parser will invoke this method at the end of every element in the XML document;
	/// there will be a corresponding <see cref="StartElement"/> event for every
	/// <see cref="EndElement"/> event (even when the element is empty).
	/// </summary>
	/// <param name="uri">the Namespace URI, or the empty string if the element has no Namespace URI or if Namespace processing is not being performed</param>
	/// <param name="local">the local name (without prefix), or the empty string if Namespace processing is not being performed</param>
	/// <param name="raw"></param>
	public virtual void EndElement(string uri, string local, string raw)
	{
	if (token.Length > 0)
	{
	string word = token.ToString();
	switch (currElement)
	{
	case ELEM_CLASSES:
	consumer.AddClass(word);
	break;
	case ELEM_EXCEPTIONS:
	exception.Add(word);
	exception = NormalizeException(exception);
	consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
	break;
	case ELEM_PATTERNS:
	consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
	break;
	case ELEM_HYPHEN:
	// nothing to do
	break;
	}
	if (currElement != ELEM_HYPHEN)
	{
	token.Length = 0;
	}
	}
	if (currElement == ELEM_HYPHEN)
	{
	currElement = ELEM_EXCEPTIONS;
	}
	else
	{
	currElement = 0;
	}
	}

	/// <summary>
	/// Receive notification of character data.
	/// <para/>
	/// The Parser will call this method to report each chunk of character data. Parsers may
	/// return all contiguous character data in a single chunk, or they may split it into
	/// several chunks; however, all of the characters in any single event must come from
	/// the same external entity so that the Locator provides useful information.
	/// <para/>
	/// The application must not attempt to read from the array outside of the specified range.
	/// </summary>
	/// <param name="ch"></param>
	/// <param name="start"></param>
	/// <param name="length"></param>
	public virtual void Characters(char[] ch, int start, int length)
	{
	StringBuilder chars = new StringBuilder(length);
	chars.Append(ch, start, length);
	string word = ReadToken(chars);
	while (word != null)
	{
	// System.out.println("\"" + word + "\"");
	switch (currElement)
	{
	case ELEM_CLASSES:
	consumer.AddClass(word);
	break;
	case ELEM_EXCEPTIONS:
	exception.Add(word);
	exception = NormalizeException(exception);
	consumer.AddException(GetExceptionWord(exception), new List<object>(exception));
	exception.Clear();
	break;
	case ELEM_PATTERNS:
	consumer.AddPattern(GetPattern(word), GetInterletterValues(word));
	break;
	}
	word = ReadToken(chars);
	}
	}
	}
	}