src/Lucene.Net.Benchmark/Support/TagSoup/Parser.cs - lucenenet - Git at Google

 // This file is part of TagSoup and is Copyright 2002-2008 by John Cowan.
 //
 // TagSoup is licensed under the Apache License,
 // Version 2.0.  You may obtain a copy of this license at
 // http://www.apache.org/licenses/LICENSE-2.0 .  You may also have
 // additional legal rights not granted by this license.
 //
 // TagSoup is distributed in the hope that it will be useful, but
 // unless required by applicable law or agreed to in writing, TagSoup
 // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
 // OF ANY KIND, either express or implied; not even the implied warranty
 // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 //
 //
 // The TagSoup parser

 using J2N.Text;
 using Lucene.Net.Support;
 using Sax;
 using Sax.Ext;
 using Sax.Helpers;
 using System;
 using System.Collections;
 using System.Collections.Generic;
 using System.IO;
 using System.Text;

 namespace TagSoup
 {
     /// <summary>
     ///   The SAX parser class.
     /// </summary>
     public class Parser : DefaultHandler, IScanHandler, IXMLReader, ILexicalHandler
     {
         // XMLReader implementation

         private IContentHandler theContentHandler;
         private ILexicalHandler theLexicalHandler;
         private IDTDHandler theDTDHandler;
         private IErrorHandler theErrorHandler;
         private IEntityResolver theEntityResolver;
         private Schema theSchema;
         private IScanner theScanner;
         private IAutoDetector theAutoDetector;

         // Default values for feature flags

         private const bool DEFAULT_NAMESPACES = true;
         private const bool DEFAULT_IGNORE_BOGONS = false;
         private const bool DEFAULT_BOGONS_EMPTY = false;
         private const bool DEFAULT_ROOT_BOGONS = true;
         private const bool DEFAULT_DEFAULT_ATTRIBUTES = true;
         private const bool DEFAULT_TRANSLATE_COLONS = false;
         private const bool DEFAULT_RESTART_ELEMENTS = true;
         private const bool DEFAULT_IGNORABLE_WHITESPACE = false;
         private const bool DEFAULT_CDATA_ELEMENTS = true;

         // Feature flags.

         private bool namespaces = DEFAULT_NAMESPACES;
         private bool ignoreBogons = DEFAULT_IGNORE_BOGONS;
         private bool bogonsEmpty = DEFAULT_BOGONS_EMPTY;
         private bool rootBogons = DEFAULT_ROOT_BOGONS;
         private bool defaultAttributes = DEFAULT_DEFAULT_ATTRIBUTES;
         private bool translateColons = DEFAULT_TRANSLATE_COLONS;
         private bool restartElements = DEFAULT_RESTART_ELEMENTS;
         private bool ignorableWhitespace = DEFAULT_IGNORABLE_WHITESPACE;
         private bool cDataElements = DEFAULT_CDATA_ELEMENTS;

         /// <summary>
         ///   A value of "true" indicates namespace URIs and unprefixed local
         ///   names for element and attribute names will be available.
         /// </summary>
         public const string NAMESPACES_FEATURE = "http://xml.org/sax/features/namespaces";

         /// <summary>
         ///   A value of "true" indicates that XML qualified names (with prefixes)
         ///   and attributes (including xmlns* attributes) will be available.
         ///   We don't support this value.
         /// </summary>
         public const string NAMESPACE_PREFIXES_FEATURE = "http://xml.org/sax/features/namespace-prefixes";

         /// <summary>
         ///   Reports whether this parser processes external general entities
         ///   (it doe
         /// </summary>
         public const string EXTERNAL_GENERAL_ENTITIES_FEATURE = "http://xml.org/sax/features/external-general-entities";

         /// <summary>
         ///   Reports whether this parser processes external parameter entities
         ///   (it doesn't).
         /// </summary>
         public const string EXTERNAL_PARAMETER_ENTITIES_FEATURE = "http://xml.org/sax/features/external-parameter-entities";

         /// <summary>
         ///   May be examined only during a parse, after the startDocument()
         ///   callback has been completed; read-only. The value is true if
         ///   the document specified standalone="yes" in its XML declaration,
         ///   and otherwise is false.  (It's always false.)
         /// </summary>
         public const string IS_STANDALONE_FEATURE = "http://xml.org/sax/features/is-standalone";

         /// <summary>
         ///   A value of "true" indicates that the LexicalHandler will report
         ///   the beginning and end of parameter entities (it won't).
         /// </summary>
         public const string LEXICAL_HANDLER_PARAMETER_ENTITIES_FEATURE =
             "http://xml.org/sax/features/lexical-handler/parameter-entities";

         /// <summary>
         ///   A value of "true" indicates that system IDs in declarations will
         ///   be absolutized (relative to their base URIs) before reporting.
         ///   (This returns true but doesn't actually do anything.)
         /// </summary>
         public const string RESOLVE_DTD_URIS_FEATURE = "http://xml.org/sax/features/resolve-dtd-uris";

         /// <summary>
         /// Has a value of "true" if all XML names (for elements,
         /// prefixes, attributes, entities, notations, and local
         /// names), as well as Namespace URIs, will have been interned
         /// using <see cref="StringExtensions.Intern(string)" />. This supports fast testing of
         /// equality/inequality against string constants, rather than forcing
         /// slower calls to <see cref="string.Equals(object)" />.  (We always intern.)
         /// </summary>
         public const string STRING_INTERNING_FEATURE = "http://xml.org/sax/features/string-interning";

         /// <summary>
         /// Returns "true" if the Attributes objects passed by this
         /// parser in <see cref="IContentHandler.StartElement" /> implement the
         /// <see cref="Sax.Ext.IAttributes2" /> interface.	(They don't.)
         /// </summary>
         public const string USE_ATTRIBUTES2_FEATURE = "http://xml.org/sax/features/use-attributes2";

         /// <summary>
         ///   Returns "true" if the Locator objects passed by this parser
         ///   parser in <see cref="IContentHandler.SetDocumentLocator" /> implement the
         ///   <see cref="Sax.Ext.ILocator2" /> interface.  (They don't.)
         /// </summary>
         public const string USE_LOCATOR2_FEATURE = "http://xml.org/sax/features/use-locator2";
         /// <summary>
         ///   Returns "true" if, when setEntityResolver is given an object
         ///   implementing the  <see cref="Sax.Ext.IEntityResolver2" /> interface,
         ///   those new methods will be used.  (They won't be.)
         /// </summary>
         public const string USE_ENTITY_RESOLVER2_FEATURE = "http://xml.org/sax/features/use-entity-resolver2";

         /// <summary>
         ///   Controls whether the parser is reporting all validity errors
         ///   (We don't report any validity errors.)
         /// </summary>
         public const string VALIDATION_FEATURE = "http://xml.org/sax/features/validation";

         /// <summary>
         ///   Controls whether the parser reports Unicode normalization
         ///   errors as described in section 2.13 and Appendix B of the XML
         ///   1.1 Recommendation.  (We don't normalize.)
         /// </summary>
         public const string UNICODE_NORMALIZATION_CHECKING_FEATURE =
             "http://xml.org/sax/features/unicode-normalization-checking";

         /// <summary>
         ///   Controls whether, when the namespace-prefixes feature is set,
         ///   the parser treats namespace declaration attributes as being in
         ///   the http://www.w3.org/2000/xmlns/ namespace.  (It doesn't.)
         /// </summary>
         public const string XMLNS_URIS_FEATURE = "http://xml.org/sax/features/xmlns-uris";

         /// <summary>
         ///   Returns <c>true</c> if the parser supports both XML 1.1 and XML 1.0.
         ///   (Always <c>false</c>.)
         /// </summary>
         public const string XML11_FEATURE = "http://xml.org/sax/features/xml-1.1";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will ignore
         ///   unknown elements.
         /// </summary>
         public const string IGNORE_BOGONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/ignore-bogons";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will give unknown
         ///   elements a content model of EMPTY; a value of <c>false</c>, a
         ///   content model of ANY.
         /// </summary>
         public const string BOGONS_EMPTY_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/bogons-empty";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will allow unknown
         ///   elements to be the root element.
         /// </summary>
         public const string ROOT_BOGONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/root-bogons";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will return default
         ///   attribute values for missing attributes that have default values.
         /// </summary>
         public const string DEFAULT_ATTRIBUTES_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/default-attributes";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will
         ///   translate colons into underscores in names.
         /// </summary>
         public const string TRANSLATE_COLONS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/translate-colons";

         /// <summary>
         ///   A value of <c>true</c> indicates that the parser will
         ///   attempt to restart the restartable elements.
         /// </summary>
         public const string RESTART_ELEMENTS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/restart-elements";

         /// <summary>
         ///   A value of "true" indicates that the parser will
         ///   transmit whitespace in element-only content via the SAX
         ///   ignorableWhitespace callback.  Normally this is not done,
         ///   because HTML is an SGML application and SGML suppresses
         ///   such whitespace.
         /// </summary>
         public const string IGNORABLE_WHITESPACE_FEATURE =
             "http://www.ccil.org/~cowan/tagsoup/features/ignorable-whitespace";

         /// <summary>
         ///   A value of "true" indicates that the parser will treat CDATA
         ///   elements specially.  Normally true, since the input is by
         ///   default HTML.
         /// </summary>
         public const string CDATA_ELEMENTS_FEATURE = "http://www.ccil.org/~cowan/tagsoup/features/cdata-elements";

         /// <summary>
         ///   Used to see some syntax events that are essential in some
         ///   applications: comments, CDATA delimiters, selected general
         ///   entity inclusions, and the start and end of the DTD (and
         ///   declaration of document element name). The Object must implement
         ///   <see cref="ILexicalHandler" />
         /// </summary>
         public const string LEXICAL_HANDLER_PROPERTY = "http://xml.org/sax/properties/lexical-handler";

         /// <summary>
         ///   Specifies the Scanner object this Parser uses.
         /// </summary>
         public const string SCANNER_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/scanner";

         /// <summary>
         ///   Specifies the Schema object this Parser uses.
         /// </summary>
         public const string SCHEMA_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/schema";

         /// <summary>
         ///   Specifies the AutoDetector (for encoding detection) this Parser uses.
         /// </summary>
         public const string AUTO_DETECTOR_PROPERTY = "http://www.ccil.org/~cowan/tagsoup/properties/auto-detector";


         // Due to sucky Java order of initialization issues, these
         // entries are maintained separately from the initial values of
         // the corresponding instance variables, but care must be taken
         // to keep them in sync.

         private readonly Hashtable features = new Hashtable {
             { NAMESPACES_FEATURE, DEFAULT_NAMESPACES },
             { NAMESPACE_PREFIXES_FEATURE, false },
             { EXTERNAL_GENERAL_ENTITIES_FEATURE, false },
             { EXTERNAL_PARAMETER_ENTITIES_FEATURE, false },
             { IS_STANDALONE_FEATURE, false },
             { LEXICAL_HANDLER_PARAMETER_ENTITIES_FEATURE, false },
             { RESOLVE_DTD_URIS_FEATURE, true },
             { STRING_INTERNING_FEATURE, true },
             { USE_ATTRIBUTES2_FEATURE, false },
             { USE_LOCATOR2_FEATURE, false },
             { USE_ENTITY_RESOLVER2_FEATURE, false },
             { VALIDATION_FEATURE, false },
             { XMLNS_URIS_FEATURE, false },
             { XML11_FEATURE, false },
             { IGNORE_BOGONS_FEATURE, DEFAULT_IGNORE_BOGONS },
             { BOGONS_EMPTY_FEATURE, DEFAULT_BOGONS_EMPTY },
             { ROOT_BOGONS_FEATURE, DEFAULT_ROOT_BOGONS },
             { DEFAULT_ATTRIBUTES_FEATURE, DEFAULT_DEFAULT_ATTRIBUTES },
             { TRANSLATE_COLONS_FEATURE, DEFAULT_TRANSLATE_COLONS },
             { RESTART_ELEMENTS_FEATURE, DEFAULT_RESTART_ELEMENTS },
             { IGNORABLE_WHITESPACE_FEATURE, DEFAULT_IGNORABLE_WHITESPACE },
             { CDATA_ELEMENTS_FEATURE, DEFAULT_CDATA_ELEMENTS },
         };

         public virtual bool GetFeature(string name)
         {
             if (features.ContainsKey(name))
             {
                 return (bool)features[name];
             }
             throw new SAXNotRecognizedException("Unknown feature " + name);
         }

         public virtual void SetFeature(string name, bool value)
         {
             if (false == features.ContainsKey(name))
             {
                 throw new SAXNotRecognizedException("Unknown feature " + name);
             }
             features[name] = value;

             if (name.Equals(NAMESPACES_FEATURE, StringComparison.Ordinal))
             {
                 namespaces = value;
             }
             else if (name.Equals(IGNORE_BOGONS_FEATURE, StringComparison.Ordinal))
             {
                 ignoreBogons = value;
             }
             else if (name.Equals(BOGONS_EMPTY_FEATURE, StringComparison.Ordinal))
             {
                 bogonsEmpty = value;
             }
             else if (name.Equals(ROOT_BOGONS_FEATURE, StringComparison.Ordinal))
             {
                 rootBogons = value;
             }
             else if (name.Equals(DEFAULT_ATTRIBUTES_FEATURE, StringComparison.Ordinal))
             {
                 defaultAttributes = value;
             }
             else if (name.Equals(TRANSLATE_COLONS_FEATURE, StringComparison.Ordinal))
             {
                 translateColons = value;
             }
             else if (name.Equals(RESTART_ELEMENTS_FEATURE, StringComparison.Ordinal))
             {
                 restartElements = value;
             }
             else if (name.Equals(IGNORABLE_WHITESPACE_FEATURE, StringComparison.Ordinal))
             {
                 ignorableWhitespace = value;
             }
             else if (name.Equals(CDATA_ELEMENTS_FEATURE, StringComparison.Ordinal))
             {
                 cDataElements = value;
             }
         }

         public virtual object GetProperty(string name)
         {
             if (name.Equals(LEXICAL_HANDLER_PROPERTY, StringComparison.Ordinal))
             {
                 return theLexicalHandler == this ? null : theLexicalHandler;
             }
             if (name.Equals(SCANNER_PROPERTY, StringComparison.Ordinal))
             {
                 return theScanner;
             }
             if (name.Equals(SCHEMA_PROPERTY, StringComparison.Ordinal))
             {
                 return theSchema;
             }
             if (name.Equals(AUTO_DETECTOR_PROPERTY, StringComparison.Ordinal))
             {
                 return theAutoDetector;
             }
             throw new SAXNotRecognizedException("Unknown property " + name);
         }

         public virtual void SetProperty(string name, object value)
         {
             if (name.Equals(LEXICAL_HANDLER_PROPERTY, StringComparison.Ordinal))
             {
                 if (value == null)
                 {
                     theLexicalHandler = this;
                 }
                 else
                 {
                     if (value is ILexicalHandler handler)
                     {
                         theLexicalHandler = handler;
                     }
                     else
                     {
                         throw new SAXNotSupportedException("Your lexical handler is not a ILexicalHandler");
                     }
                 }
             }
             else if (name.Equals(SCANNER_PROPERTY, StringComparison.Ordinal))
             {
                 if (value is IScanner scanner)
                 {
                     theScanner = scanner;
                 }
                 else
                 {
                     throw new SAXNotSupportedException("Your scanner is not a IScanner");
                 }
             }
             else if (name.Equals(SCHEMA_PROPERTY, StringComparison.Ordinal))
             {
                 if (value is Schema schema)
                 {
                     theSchema = schema;
                 }
                 else
                 {
                     throw new SAXNotSupportedException("Your schema is not a Schema");
                 }
             }
             else if (name.Equals(AUTO_DETECTOR_PROPERTY, StringComparison.Ordinal))
             {
                 if (value is IAutoDetector detector)
                 {
                     theAutoDetector = detector;
                 }
                 else
                 {
                     throw new SAXNotSupportedException("Your auto-detector is not an IAutoDetector");
                 }
             }
             else
             {
                 throw new SAXNotRecognizedException("Unknown property " + name);
             }
         }

         public virtual IEntityResolver EntityResolver
         {
             get => theEntityResolver == this ? null : theEntityResolver;
             set => theEntityResolver = value ?? this;
         }

         public virtual IDTDHandler DTDHandler
         {
             get => theDTDHandler == this ? null : theDTDHandler;
             set => theDTDHandler = value ?? this;
         }

         public virtual IContentHandler ContentHandler
         {
             get => theContentHandler == this ? null : theContentHandler;
             set => theContentHandler = value ?? this;
         }

         public virtual IErrorHandler ErrorHandler
         {
             get => theErrorHandler == this ? null : theErrorHandler;
             set => theErrorHandler = value ?? this;
         }

         public virtual void Parse(InputSource input)
         {
             Setup();
             TextReader r = GetReader(input);
             theContentHandler.StartDocument();
             theScanner.ResetDocumentLocator(input.PublicId, input.SystemId);
             if (theScanner is ILocator locator)
             {
                 theContentHandler.SetDocumentLocator(locator);
             }
             if (theSchema.Uri.Length > 0)
             {
                 theContentHandler.StartPrefixMapping(theSchema.Prefix, theSchema.Uri);
             }
             theScanner.Scan(r, this);
         }

         public virtual void Parse(string systemid)
         {
             Parse(new InputSource(systemid));
         }

         // Sets up instance variables that haven't been set by setFeature
         private void Setup()
         {
             if (theSchema == null)
             {
                 theSchema = new HTMLSchema();
             }
             if (theScanner == null)
             {
                 theScanner = new HTMLScanner();
             }
             if (theAutoDetector == null)
             {
                 theAutoDetector = new AutoDetectorDelegate(stream => new StreamReader(stream));
             }
             theStack = new Element(theSchema.GetElementType("<root>"), defaultAttributes);
             thePCDATA = new Element(theSchema.GetElementType("<pcdata>"), defaultAttributes);
             theNewElement = null;
             theAttributeName = null;
             thePITarget = null;
             theSaved = null;
             theEntity = 0;
             virginStack = true;
             theDoctypeName = theDoctypePublicId = theDoctypeSystemId = null;
         }

         /// <summary>
         /// Return a <see cref="TextReader"/> based on the contents of an <see cref="InputSource"/>
         /// Buffer the Stream
         /// </summary>
         /// <param name="s"></param>
         /// <returns></returns>
         private TextReader GetReader(InputSource s)
         {
             TextReader r = s.TextReader;
             Stream i = s.Stream;
             Encoding encoding = s.Encoding;
             string publicid = s.PublicId;
             string systemid = s.SystemId;
             if (r == null)
             {
                 if (i == null)
                 {
                     i = GetInputStream(publicid, systemid);
                 }
                 if (!(i is BufferedStream))
                 {
                     i = new BufferedStream(i);
                 }
                 if (encoding == null)
                 {
                     r = theAutoDetector.AutoDetectingReader(i);
                 }
                 else
                 {
                     //try {
                     //TODO: Safe?
                     r = new StreamReader(i, encoding);
                     //  }
                     //catch (UnsupportedEncodingException e) {
                     //  r = new StreamReader(i);
                     //  }
                 }
             }
             //		r = new BufferedReader(r);
             return r;
         }

         /// <summary>
         ///   Get an Stream based on a publicid and a systemid
         ///   We don't process publicids (who uses them anyhow?)
         /// </summary>
         /// <param name="publicid"></param>
         /// <param name="systemid"></param>
         /// <returns></returns>
 #pragma warning disable IDE0060 // Remove unused parameter
         private static Stream GetInputStream(string publicid, string systemid) // LUCENENET: CA1822: Mark members as static
 #pragma warning restore IDE0060 // Remove unused parameter
         {
             var basis = new Uri("file://" + Directory.GetCurrentDirectory() + Path.DirectorySeparatorChar);
             var url = new Uri(basis, systemid);
             return new FileStream(url.LocalPath, FileMode.Open, FileAccess.Read, FileShare.Read);
         }

         // ScanHandler implementation

         private Element theNewElement;
         private string theAttributeName;
         private bool theDoctypeIsPresent;
         private string theDoctypePublicId;
         private string theDoctypeSystemId;
         private string theDoctypeName;
         private string thePITarget;
         private Element theStack;
         private Element theSaved;
         private Element thePCDATA;
         private int theEntity; // needs to support chars past U+FFFF


         public virtual void Adup(char[] buff, int offset, int length)
         {
             if (theNewElement == null || theAttributeName == null)
             {
                 return;
             }
             theNewElement.SetAttribute(theAttributeName, null, theAttributeName);
             theAttributeName = null;
         }

         public virtual void Aname(char[] buff, int offset, int length)
         {
             if (theNewElement == null)
             {
                 return;
             }
             // Currently we don't rely on Schema to canonicalize
             // attribute names.
             theAttributeName = MakeName(buff, offset, length).ToLowerInvariant();
             //		System.err.println("%% Attribute name " + theAttributeName);
         }

         public virtual void Aval(char[] buff, int offset, int length)
         {
             if (theNewElement == null || theAttributeName == null)
             {
                 return;
             }
             var value = new string(buff, offset, length);
             //		System.err.println("%% Attribute value [" + value + "]");
             value = ExpandEntities(value);
             theNewElement.SetAttribute(theAttributeName, null, value);
             theAttributeName = null;
             //		System.err.println("%% Aval done");
         }

         /// <summary>
         ///   Expand entity references in attribute values selectively.
         ///   Currently we expand a reference iff it is properly terminated
         ///   with a semicolon.
         /// </summary>
         /// <param name="src"></param>
         /// <returns></returns>
         private string ExpandEntities(string src)
         {
             int refStart = -1;
             int len = src.Length;
             var dst = new char[len];
             int dstlen = 0;
             for (int i = 0; i < len; i++)
             {
                 char ch = src[i];
                 dst[dstlen++] = ch;
                 //			System.err.print("i = " + i + ", d = " + dstlen + ", ch = [" + ch + "] ");
                 if (ch == '&' && refStart == -1)
                 {
                     // start of a ref excluding &
                     refStart = dstlen;
                     //				System.err.println("start of ref");
                 }
                 else if (refStart == -1)
                 {
                     // not in a ref
                     //				System.err.println("not in ref");
                 }
                 else if (char.IsLetter(ch) || char.IsDigit(ch) || ch == '#')
                 {
                     // valid entity char
                     //				System.err.println("valid");
                 }
                 else if (ch == ';')
                 {
                     // properly terminated ref
                     //				System.err.print("got [" + new string(dst, refStart, dstlen-refStart-1) + "]");
                     int ent = LookupEntity(dst, refStart, dstlen - refStart - 1);
                     //				System.err.println(" = " + ent);
                     if (ent > 0xFFFF)
                     {
                         ent -= 0x10000;
                         dst[refStart - 1] = (char)((ent >> 10) + 0xD800);
                         dst[refStart] = (char)((ent & 0x3FF) + 0xDC00);
                         dstlen = refStart + 1;
                     }
                     else if (ent != 0)
                     {
                         dst[refStart - 1] = (char)ent;
                         dstlen = refStart;
                     }
                     refStart = -1;
                 }
                 else
                 {
                     // improperly terminated ref
                     //				System.err.println("end of ref");
                     refStart = -1;
                 }
             }
             return new string(dst, 0, dstlen);
         }

         public virtual void Entity(char[] buff, int offset, int length)
         {
             theEntity = LookupEntity(buff, offset, length);
         }

         /// <summary>
         ///   Process numeric character references,
         ///   deferring to the schema for named ones.
         /// </summary>
         /// <param name="buff"></param>
         /// <param name="offset"></param>
         /// <param name="length"></param>
         /// <returns></returns>
         private int LookupEntity(char[] buff, int offset, int length)
         {
             int result = 0;
             if (length < 1)
             {
                 return result;
             }
             //		System.err.println("%% Entity at " + offset + " " + length);
             //		System.err.println("%% Got entity [" + new string(buff, offset, length) + "]");
             if (buff[offset] == '#')
             {
                 if (length > 1 && (buff[offset + 1] == 'x' || buff[offset + 1] == 'X'))
                 {
                     try
                     {
                         return Convert.ToInt32(new string(buff, offset + 2, length - 2), 16);
                     }
                     catch (FormatException)
                     {
                         return 0;
                     }
                 }
                 try
                 {
                     return Convert.ToInt32(new string(buff, offset + 1, length - 1), 10);
                 }
                 catch (FormatException)
                 {
                     return 0;
                 }
             }
             return theSchema.GetEntity(new string(buff, offset, length));
         }

         public virtual void EOF(char[] buff, int offset, int length)
         {
             if (virginStack)
             {
                 Rectify(thePCDATA);
             }
             while (theStack.Next != null)
             {
                 Pop();
             }
             if (theSchema.Uri.Length > 0) // LUCENENET: CA1820: Test for empty strings using string length
             {
                 theContentHandler.EndPrefixMapping(theSchema.Prefix);
             }
             theContentHandler.EndDocument();
         }

         public virtual void ETag(char[] buff, int offset, int length)
         {
             if (ETagCdata(buff, offset, length))
             {
                 return;
             }
             ETagBasic(buff, offset, length);
         }

         private static readonly char[] etagchars = { '<', '/', '>' };
         public virtual bool ETagCdata(char[] buff, int offset, int length)
         {
             string currentName = theStack.Name;
             // If this is a CDATA element and the tag doesn't match,
             // or isn't properly formed (junk after the name),
             // restart CDATA mode and process the tag as characters.
             if (cDataElements && (theStack.Flags & Schema.F_CDATA) != 0)
             {
                 bool realTag = (length == currentName.Length);
                 if (realTag)
                 {
                     for (int i = 0; i < length; i++)
                     {
                         if (char.ToLowerInvariant(buff[offset + i]) != char.ToLowerInvariant(currentName[i]))
                         {
                             realTag = false;
                             break;
                         }
                     }
                 }
                 if (!realTag)
                 {
                     theContentHandler.Characters(etagchars, 0, 2);
                     theContentHandler.Characters(buff, offset, length);
                     theContentHandler.Characters(etagchars, 2, 1);
                     theScanner.StartCDATA();
                     return true;
                 }
             }
             return false;
         }

         public virtual void ETagBasic(char[] buff, int offset, int length)
         {
             theNewElement = null;
             string name;
             if (length != 0)
             {
                 // Canonicalize case of name
                 name = MakeName(buff, offset, length);
                 //			System.err.println("got etag [" + name + "]");
                 ElementType type = theSchema.GetElementType(name);
                 if (type == null)
                 {
                     return; // mysterious end-tag
                 }
                 name = type.Name;
             }
             else
             {
                 name = theStack.Name;
             }
             //		System.err.println("%% Got end of " + name);

             Element sp;
             bool inNoforce = false;
             for (sp = theStack; sp != null; sp = sp.Next)
             {
                 if (sp.Name.Equals(name, StringComparison.Ordinal))
                 {
                     break;
                 }
                 if ((sp.Flags & Schema.F_NOFORCE) != 0)
                 {
                     inNoforce = true;
                 }
             }

             if (sp == null)
             {
                 return; // Ignore unknown etags
             }
             if (sp.Next == null || sp.Next.Next == null)
             {
                 return;
             }
             if (inNoforce)
             {
                 // inside an F_NOFORCE element?
                 sp.Preclose(); // preclose the matching element
             }
             else
             {
                 // restartably pop everything above us
                 while (theStack != sp)
                 {
                     RestartablyPop();
                 }
                 Pop();
             }
             // pop any preclosed elements now at the top
             while (theStack.IsPreclosed)
             {
                 Pop();
             }
             Restart(null);
         }

         /// <summary>
         ///   Push restartables on the stack if possible
         ///   e is the next element to be started, if we know what it is
         /// </summary>
         /// <param name="e"></param>
         private void Restart(Element e)
         {
             while (theSaved != null && theStack.CanContain(theSaved) && (e == null || theSaved.CanContain(e)))
             {
                 Element next = theSaved.Next;
                 Push(theSaved);
                 theSaved = next;
             }
         }

         /// <summary>
         ///   Pop the stack irrevocably
         /// </summary>
         private void Pop()
         {
             if (theStack == null)
             {
                 return; // empty stack
             }
             string name = theStack.Name;
             string localName = theStack.LocalName;
             string ns = theStack.Namespace;
             string prefix = PrefixOf(name);

             //		System.err.println("%% Popping " + name);
             if (!namespaces)
             {
                 ns = localName = "";
             }
             theContentHandler.EndElement(ns, localName, name);
             if (Foreign(prefix, ns))
             {
                 theContentHandler.EndPrefixMapping(prefix);
                 //			System.err.println("%% Unmapping [" + prefix + "] for elements to " + namespace);
             }
             Attributes atts = theStack.Attributes;
             for (int i = atts.Length - 1; i >= 0; i--)
             {
                 string attNamespace = atts.GetURI(i);
                 string attPrefix = PrefixOf(atts.GetQName(i));
                 if (Foreign(attPrefix, attNamespace))
                 {
                     theContentHandler.EndPrefixMapping(attPrefix);
                     //			System.err.println("%% Unmapping [" + attPrefix + "] for attributes to " + attNamespace);
                 }
             }
             theStack = theStack.Next;
         }

         /// <summary>
         ///   Pop the stack restartably
         /// </summary>
         private void RestartablyPop()
         {
             Element popped = theStack;
             Pop();
             if (restartElements && (popped.Flags & Schema.F_RESTART) != 0)
             {
                 popped.Anonymize();
                 popped.Next = theSaved;
                 theSaved = popped;
             }
         }

         // Push element onto stack
         private bool virginStack = true;
         private void Push(Element e)
         {
             string name = e.Name;
             string localName = e.LocalName;
             string ns = e.Namespace;
             string prefix = PrefixOf(name);

             //		System.err.println("%% Pushing " + name);
             e.Clean();
             if (!namespaces)
             {
                 ns = localName = "";
             }
             if (virginStack && localName.Equals(theDoctypeName, StringComparison.OrdinalIgnoreCase))
             {
                 try
                 {
                     theEntityResolver.ResolveEntity(theDoctypePublicId, theDoctypeSystemId);
                 }
                 catch (IOException)
                 {
                 } // Can't be thrown for root I believe.
             }
             if (Foreign(prefix, ns))
             {
                 theContentHandler.StartPrefixMapping(prefix, ns);
                 //			System.err.println("%% Mapping [" + prefix + "] for elements to " + namespace);
             }
             Attributes atts = e.Attributes;
             int len = atts.Length;
             for (int i = 0; i < len; i++)
             {
                 string attNamespace = atts.GetURI(i);
                 string attPrefix = PrefixOf(atts.GetQName(i));
                 if (Foreign(attPrefix, attNamespace))
                 {
                     theContentHandler.StartPrefixMapping(attPrefix, attNamespace);
                     //				System.err.println("%% Mapping [" + attPrefix + "] for attributes to " + attNamespace);
                 }
             }
             theContentHandler.StartElement(ns, localName, name, e.Attributes);
             e.Next = theStack;
             theStack = e;
             virginStack = false;
             if (cDataElements && (theStack.Flags & Schema.F_CDATA) != 0)
             {
                 theScanner.StartCDATA();
             }
         }

         /// <summary>
         ///   Get the prefix from a QName
         /// </summary>
         /// <param name="name"></param>
         /// <returns></returns>
         private static string PrefixOf(string name)
         {
             int i = name.IndexOf(':');
             string prefix = "";
             if (i != -1)
             {
                 prefix = name.Substring(0, i);
             }
             //		System.err.println("%% " + prefix + " is prefix of " + name);
             return prefix;
         }

         /// <summary>
         ///   Return true if we have a foreign name
         /// </summary>
         /// <param name="prefix"></param>
         /// <param name="ns"></param>
         /// <returns></returns>
         private bool Foreign(string prefix, string ns)
         {
             //		System.err.print("%% Testing " + prefix + " and " + namespace + " for foreignness -- ");
             bool foreign = !(prefix.Length == 0 || ns.Length == 0 || ns.Equals(theSchema.Uri, StringComparison.Ordinal)); // LUCENENET: CA1820: Test for empty strings using string length
             //		System.err.println(foreign);
             return foreign;
         }

         /// <summary>
         ///   Parsing the complete XML Document Type Definition is way too complex,
         ///   but for many simple cases we can extract something useful from it.
         ///   doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
         ///   DeclSep ::= PEReference | S
         ///   intSubset ::= (markupdecl | DeclSep)*
         ///   markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
         ///   ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
         /// </summary>
         /// <param name="buff"></param>
         /// <param name="offset"></param>
         /// <param name="length"></param>
         public virtual void Decl(char[] buff, int offset, int length)
         {
             var s = new string(buff, offset, length);
             string name = null;
             string systemid = null;
             string publicid = null;
             string[] v = Split(s);
             if (v.Length > 0 && "DOCTYPE".Equals(v[0], StringComparison.OrdinalIgnoreCase))
             {
                 if (theDoctypeIsPresent)
                 {
                     return; // one doctype only!
                 }
                 theDoctypeIsPresent = true;
                 if (v.Length > 1)
                 {
                     name = v[1];
                     if (v.Length > 3 && "SYSTEM".Equals(v[2], StringComparison.Ordinal))
                     {
                         systemid = v[3];
                     }
                     else if (v.Length > 3 && "PUBLIC".Equals(v[2], StringComparison.Ordinal))
                     {
                         publicid = v[3];
                         if (v.Length > 4)
                         {
                             systemid = v[4];
                         }
                         else
                         {
                             systemid = "";
                         }
                     }
                 }
             }
             publicid = TrimQuotes(publicid);
             systemid = TrimQuotes(systemid);
             if (name != null)
             {
                 publicid = CleanPublicId(publicid);
                 theLexicalHandler.StartDTD(name, publicid, systemid);
                 theLexicalHandler.EndDTD();
                 theDoctypeName = name;
                 theDoctypePublicId = publicid;
                 if (theScanner is ILocator locator)
                 {
                     // Must resolve systemid
                     theDoctypeSystemId = locator.SystemId;
                     try
                     {
                         if (Uri.IsWellFormedUriString(theDoctypeSystemId, UriKind.Absolute))
                         {
                             theDoctypeSystemId = new Uri(new Uri(theDoctypeSystemId), systemid).ToString();
                         }
                     }
                     catch (Exception)
                     {
                     }
                 }
             }
         }

         // If the string is quoted, trim the quotes.
         private static string TrimQuotes(string value)
         {
             if (value == null)
             {
                 return null;
             }
             int length = value.Length;
             if (length == 0)
             {
                 return value;
             }
             char s = value[0];
             char e = value[length - 1];
             if (s == e && (s == '\'' || s == '"'))
             {
                 value = value.Substring(1, value.Length - 1);
             }
             return value;
         }

         /// <summary>
         ///   Split the supplied string into words or phrases seperated by spaces.
         ///   Recognises quotes around a phrase and doesn't split it.
         /// </summary>
         /// <param name="val"></param>
         /// <returns></returns>
         private static string[] Split(string val)
         {
             val = val.Trim();
             if (val.Length == 0)
             {
                 return Arrays.Empty<string>();
             }
             var l = new List<string>();
             int s = 0;
             int e; // LUCENENET: IDE0059: Remove unnecessary value assignment
             bool sq = false; // single quote
             bool dq = false; // double quote
             var lastc = (char)0;
             int len = val.Length;
             for (e = 0; e < len; e++)
             {
                 char c = val[e];
                 if (!dq && c == '\'' && lastc != '\\')
                 {
                     sq = !sq;
                     if (s < 0)
                     {
                         s = e;
                     }
                 }
                 else if (!sq && c == '\"' && lastc != '\\')
                 {
                     dq = !dq;
                     if (s < 0)
                     {
                         s = e;
                     }
                 }
                 else if (!sq && !dq)
                 {
                     if (char.IsWhiteSpace(c))
                     {
                         if (s >= 0)
                         {
                             l.Add(val.Substring(s, e - s));
                         }
                         s = -1;
                     }
                     else if (s < 0 && c != ' ')
                     {
                         s = e;
                     }
                 }
                 lastc = c;
             }
             l.Add(val.Substring(s, e - s));
             return l.ToArray();
         }

         private const string LEGAL = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-'()+,./:=?;!*#@$_%";

         /// <summary>
         ///   Replace junk in publicids with spaces
         /// </summary>
         /// <param name="src"></param>
         /// <returns></returns>
         private string CleanPublicId(string src)
         {
             if (src == null)
             {
                 return null;
             }
             int len = src.Length;
             var dst = new StringBuilder(len);
             bool suppressSpace = true;
             for (int i = 0; i < len; i++)
             {
                 char ch = src[i];
                 if (LEGAL.IndexOf(ch) != -1)
                 {
                     // legal but not whitespace
                     dst.Append(ch);
                     suppressSpace = false;
                 }
                 else if (suppressSpace)
                 {
                     // normalizable whitespace or junk
                 }
                 else
                 {
                     dst.Append(' ');
                     suppressSpace = true;
                 }
             }
             //		System.err.println("%% Publicid [" + dst.tostring().trim() + "]");
             return dst.ToString().Trim(); // trim any final junk whitespace
         }

         public virtual void GI(char[] buff, int offset, int length)
         {
             if (theNewElement != null)
             {
                 return;
             }
             string name = MakeName(buff, offset, length);
             if (name == null)
             {
                 return;
             }
             ElementType type = theSchema.GetElementType(name);
             if (type == null)
             {
                 // Suppress unknown elements if ignore-bogons is on
                 if (ignoreBogons)
                 {
                     return;
                 }
                 int bogonModel = (bogonsEmpty ? Schema.M_EMPTY : Schema.M_ANY);
                 int bogonMemberOf = (rootBogons ? Schema.M_ANY : (Schema.M_ANY & ~Schema.M_ROOT));
                 theSchema.ElementType(name, bogonModel, bogonMemberOf, 0);
                 if (!rootBogons)
                 {
                     theSchema.Parent(name, theSchema.RootElementType.Name);
                 }
                 type = theSchema.GetElementType(name);
             }

             theNewElement = new Element(type, defaultAttributes);
             //		System.err.println("%% Got GI " + theNewElement.name());
         }

         public virtual void CDSect(char[] buff, int offset, int length)
         {
             theLexicalHandler.StartCDATA();
             PCDATA(buff, offset, length);
             theLexicalHandler.EndCDATA();
         }

         public virtual void PCDATA(char[] buff, int offset, int length)
         {
             if (length == 0)
             {
                 return;
             }
             bool allWhite = true;
             for (int i = 0; i < length; i++)
             {
                 if (!char.IsWhiteSpace(buff[offset + i]))
                 {
                     allWhite = false;
                 }
             }
             if (allWhite && !theStack.CanContain(thePCDATA))
             {
                 if (ignorableWhitespace)
                 {
                     theContentHandler.IgnorableWhitespace(buff, offset, length);
                 }
             }
             else
             {
                 Rectify(thePCDATA);
                 theContentHandler.Characters(buff, offset, length);
             }
         }

         public virtual void PITarget(char[] buff, int offset, int length)
         {
             if (theNewElement != null)
             {
                 return;
             }
             thePITarget = MakeName(buff, offset, length).Replace(':', '_');
         }

         public virtual void PI(char[] buff, int offset, int length)
         {
             if (theNewElement != null || thePITarget == null)
             {
                 return;
             }
             if ("xml".Equals(thePITarget, StringComparison.OrdinalIgnoreCase))
             {
                 return;
             }
             //		if (length > 0 && buff[length - 1] == '?') System.err.println("%% Removing ? from PI");
             if (length > 0 && buff[length - 1] == '?')
             {
                 length--; // remove trailing ?
             }
             theContentHandler.ProcessingInstruction(thePITarget, new string(buff, offset, length));
             thePITarget = null;
         }

         public virtual void STagC(char[] buff, int offset, int length)
         {
             //		System.err.println("%% Start-tag");
             if (theNewElement == null)
             {
                 return;
             }
             Rectify(theNewElement);
             if (theStack.Model == Schema.M_EMPTY)
             {
                 // Force an immediate end tag
                 ETagBasic(buff, offset, length);
             }
         }

         public virtual void STagE(char[] buff, int offset, int length)
         {
             //		System.err.println("%% Empty-tag");
             if (theNewElement == null)
             {
                 return;
             }
             Rectify(theNewElement);
             // Force an immediate end tag
             ETagBasic(buff, offset, length);
         }

         //private char[] theCommentBuffer = new char[2000]; // LUCENENET: Never read
         public virtual void Cmnt(char[] buff, int offset, int length)
         {
             theLexicalHandler.Comment(buff, offset, length);
         }

         /// <summary>
         ///   Rectify the stack, pushing and popping as needed
         ///   so that the argument can be safely pushed
         /// </summary>
         /// <param name="e"></param>
         private void Rectify(Element e)
         {
             Element sp;
             while (true)
             {
                 for (sp = theStack; sp != null; sp = sp.Next)
                 {
                     if (sp.CanContain(e))
                     {
                         break;
                     }
                 }
                 if (sp != null)
                 {
                     break;
                 }
                 ElementType parentType = e.Parent;
                 if (parentType == null)
                 {
                     break;
                 }
                 var parent = new Element(parentType, defaultAttributes);
                 //			System.err.println("%% Ascending from " + e.name() + " to " + parent.name());
                 parent.Next = e;
                 e = parent;
             }
             if (sp == null)
             {
                 return; // don't know what to do
             }
             while (theStack != sp)
             {
                 if (theStack == null || theStack.Next == null || theStack.Next.Next == null)
                 {
                     break;
                 }
                 RestartablyPop();
             }
             while (e != null)
             {
                 Element nexte = e.Next;
                 if (!e.Name.Equals("<pcdata>", StringComparison.Ordinal))
                 {
                     Push(e);
                 }
                 e = nexte;
                 Restart(e);
             }
             theNewElement = null;
         }

         public virtual int GetEntity()
         {
             return theEntity;
         }

         /// <summary>
         ///   Return the argument as a valid XML name
         ///   This no longer lowercases the result: we depend on Schema to
         ///   canonicalize case.
         /// </summary>
         /// <param name="buff"></param>
         /// <param name="offset"></param>
         /// <param name="length"></param>
         /// <returns></returns>
         private string MakeName(char[] buff, int offset, int length)
         {
             var dst = new StringBuilder(length + 2);
             bool seenColon = false;
             bool start = true;
             //		string src = new string(buff, offset, length); // DEBUG
             for (; length-- > 0; offset++)
             {
                 char ch = buff[offset];
                 if (char.IsLetter(ch) || ch == '_')
                 {
                     start = false;
                     dst.Append(ch);
                 }
                 else if (char.IsDigit(ch) || ch == '-' || ch == '.')
                 {
                     if (start)
                     {
                         dst.Append('_');
                     }
                     start = false;
                     dst.Append(ch);
                 }
                 else if (ch == ':' && !seenColon)
                 {
                     seenColon = true;
                     if (start)
                     {
                         dst.Append('_');
                     }
                     start = true;
                     dst.Append(translateColons ? '_' : ch);
                 }
             }
             int dstLength = dst.Length;
             if (dstLength == 0 || dst[dstLength - 1] == ':')
             {
                 dst.Append('_');
             }
             //		System.err.println("Made name \"" + dst + "\" from \"" + src + "\"");
             return dst.ToString().Intern();
         }

         private class AutoDetectorDelegate : IAutoDetector
         {
             private readonly Func<Stream, StreamReader> _delegate;

             public AutoDetectorDelegate(Func<Stream, StreamReader> @delegate)
             {
                 _delegate = @delegate;
             }

             public TextReader AutoDetectingReader(Stream stream)
             {
                 return _delegate(stream);
             }
         }

         // Default LexicalHandler implementation

         public virtual void Comment(char[] ch, int start, int length)
         {
         }

         public virtual void EndCDATA()
         {
         }

         public virtual void EndDTD()
         {
         }

         public virtual void EndEntity(string name)
         {
         }

         public virtual void StartCDATA()
         {
         }

         public virtual void StartDTD(string name, string publicid, string systemid)
         {
         }

         public virtual void StartEntity(string name)
         {
         }

         /// <summary>
         ///  Creates a new instance of <see cref="Parser" />
         /// </summary>
         public Parser()
         {
             theNewElement = null;
             theContentHandler = this;
             theLexicalHandler = this;
             theDTDHandler = this;
             theErrorHandler = this;
             theEntityResolver = this;
         }
     }
 }