upstream-diffs/xerces2-j-2.9.1.mcf.patch - manifoldcf - Git at Google

 Index: src/org/apache/xerces/impl/XMLScanner.java
 ===================================================================
 --- src/org/apache/xerces/impl/XMLScanner.java	(.../xerces/java/tags/Xerces-J_2_9_1)	(revision 575832)
 +++ src/org/apache/xerces/impl/XMLScanner.java	(.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk)	(working copy)
 @@ -823,6 +823,7 @@
                          String entityName = fEntityScanner.scanName();
                          if (entityName == null) {
                              reportFatalError("NameRequiredInReference", null);
 +                            entityName = "unknown";
                          }
                          else if (entityDepth == fEntityDepth) {
                              fStringBuffer2.append(entityName);
 @@ -1027,6 +1028,14 @@
                      if (XMLChar.isMarkup(c) || c == ']') {
                          fStringBuffer.append((char)fEntityScanner.scanChar());
                      }
 +                    else if (XMLChar.isHighSurrogate(c)) {
 +                        scanSurrogates(fStringBuffer);
 +                    }
 +                    else if (isInvalidLiteral(c)) {
 +                        reportFatalError("InvalidCharInSystemID",
 +                                new Object[] { Integer.toHexString(c) });
 +                        fEntityScanner.scanChar();
 +                    }
                  } while (fEntityScanner.scanLiteral(quote, ident) != quote);
                  fStringBuffer.append(ident);
                  ident = fStringBuffer;
 Index: src/org/apache/xerces/impl/Constants.java
 ===================================================================
 --- src/org/apache/xerces/impl/Constants.java	(.../xerces/java/tags/Xerces-J_2_9_1)	(revision 575832)
 +++ src/org/apache/xerces/impl/Constants.java	(.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk)	(working copy)
 @@ -219,6 +219,9 @@

      /** Warn on undeclared element feature ("validation/warn-on-undeclared-elemdef"). */
      public static final String WARN_ON_UNDECLARED_ELEMDEF_FEATURE = "validation/warn-on-undeclared-elemdef";
 +
 +    /** Ignore misencoded characters feature */
 +    public static final String IGNORE_BADLY_ENCODED_CHARS = "ignore-badly-encoded-chars";

      /** Warn on duplicate entity declaration feature ("warn-on-duplicate-entitydef"). */
      public static final String WARN_ON_DUPLICATE_ENTITYDEF_FEATURE = "warn-on-duplicate-entitydef";
 Index: src/org/apache/xerces/impl/XMLEntityManager.java
 ===================================================================
 --- src/org/apache/xerces/impl/XMLEntityManager.java	(.../xerces/java/tags/Xerces-J_2_9_1)	(revision 575832)
 +++ src/org/apache/xerces/impl/XMLEntityManager.java	(.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk)	(working copy)
 @@ -132,6 +132,10 @@
  	protected static final String PARSER_SETTINGS =
  		Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;

 +    /** Feature identifier: ignore badly encoded characters */
 +    protected static final String IGNORE_BADLY_ENCODED_CHARS =
 +        Constants.XERCES_FEATURE_PREFIX + Constants.IGNORE_BADLY_ENCODED_CHARS;
 +
      // property identifiers

      /** Property identifier: symbol table. */
 @@ -167,7 +171,8 @@
          EXTERNAL_PARAMETER_ENTITIES,
          ALLOW_JAVA_ENCODINGS,
          WARN_ON_DUPLICATE_ENTITYDEF,
 -        STANDARD_URI_CONFORMANT
 +        STANDARD_URI_CONFORMANT,
 +        IGNORE_BADLY_ENCODED_CHARS
      };

      /** Feature defaults. */
 @@ -177,6 +182,7 @@
          Boolean.TRUE,
          Boolean.FALSE,
          Boolean.FALSE,
 +        Boolean.FALSE,
          Boolean.FALSE
      };

 @@ -262,6 +268,12 @@
       */
      protected boolean fStrictURI;

 +    /**
 +    * allow badly encoded characters (skip them)
 +    * http://apache.org/xml/features/ignore-badly-encoded-chars
 +    */
 +    protected boolean fAllowBadlyEncodedChars;
 +
      // properties

      /**
 @@ -1310,6 +1322,13 @@
              fStrictURI = false;
          }

 +        try {
 +            fAllowBadlyEncodedChars = componentManager.getFeature(IGNORE_BADLY_ENCODED_CHARS);
 +        }
 +        catch (XMLConfigurationException e) {
 +            fAllowBadlyEncodedChars = false;
 +        }
 +
          // xerces properties
          fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
          fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
 @@ -2082,6 +2101,33 @@
      protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
          throws IOException {

 +        Reader internalReader = createInternalReader(inputStream, encoding, isBigEndian);
 +        if (fAllowBadlyEncodedChars)
 +        {
 +            // Wrap the reader so that bad characters are ignored rather than causing aborts
 +            return new LaxReader(internalReader);
 +        }
 +        return internalReader;
 +    }
 +
 +    /**
 +     * Creates a reader capable of reading the given input stream in
 +     * the specified encoding.
 +     *
 +     * @param inputStream  The input stream.
 +     * @param encoding     The encoding name that the input stream is
 +     *                     encoded using. If the user has specified that
 +     *                     Java encoding names are allowed, then the
 +     *                     encoding name may be a Java encoding name;
 +     *                     otherwise, it is an ianaEncoding name.
 +     * @param isBigEndian   For encodings (like uCS-4), whose names cannot
 +     *                      specify a byte order, this tells whether the order is bigEndian.  null menas
 +     *                      unknown or not relevant.
 +     *
 +     * @return Returns a reader.
 +     */
 +    protected Reader createInternalReader(InputStream inputStream, String encoding, Boolean isBigEndian)
 +        throws IOException {
          // if the encoding is UTF-8 use the optimized UTF-8 reader
          if (encoding == "UTF-8" || encoding == null) {
              if (DEBUG_ENCODINGS) {
 @@ -3025,6 +3071,9 @@
                  return -1;
              }
              if (fOffset == fData.length) {
 +                if (fCurrentEntity.mayReadChunks) {
 +                    return fInputStream.read();
 +                }
                  byte[] newData = new byte[fOffset << 1];
                  System.arraycopy(fData, 0, newData, 0, fOffset);
                  fData = newData;
 @@ -3138,4 +3187,105 @@
          }
      } // end of RewindableInputStream class

 +    protected static class LaxReader extends Reader
 +    {
 +        protected Reader internalReader;
 +
 +        public LaxReader(Reader internalReader)
 +        {
 +            this.internalReader = internalReader;
 +        }
 +
 +        public int read()
 +            throws IOException
 +        {
 +            // Since we need to be able to skip ahead at the point of error, and not drop huge amounts on the floor,
 +            // all read operations for this class are channeled through the single-character operation.  This is less
 +            // efficient, but hopefully not terribly so.
 +            try
 +            {
 +                return internalReader.read();
 +            }
 +            catch (org.apache.xerces.impl.io.MalformedByteSequenceException e)
 +            {
 +                // When this fails, it means we detected a bad character.
 +                // However, the bad character has already been pulled off the stream, so we are free to stuff in a "?" and
 +                // just keep going.
 +                return (int)'?';
 +            }
 +        }
 +
 +        public int read(char[] cbuf)
 +            throws IOException
 +        {
 +            return read(cbuf,0,cbuf.length);
 +        }
 +
 +        public int read(char[] cbuf,
 +            int off,
 +            int len)
 +            throws IOException
 +        {
 +            int amtRead = 0;
 +            while (amtRead < len)
 +            {
 +                int cval = read();
 +                if (cval == -1)
 +                {
 +                    if (amtRead == 0)
 +                        return -1;
 +                    else
 +                        return amtRead;
 +                }
 +                cbuf[off++] = (char)cval;
 +                amtRead++;
 +            }
 +            return amtRead;
 +        }
 +
 +        public long skip(long n)
 +            throws IOException
 +        {
 +            long skipped = 0;
 +            while (skipped < n)
 +            {
 +                int cval = read();
 +                if (cval == -1)
 +                    break;
 +                skipped++;
 +            }
 +            return skipped;
 +        }
 +
 +        public boolean ready()
 +            throws IOException
 +        {
 +            return internalReader.ready();
 +        }
 +
 +        public boolean markSupported()
 +        {
 +            return internalReader.markSupported();
 +        }
 +
 +        public void mark(int readAheadLimit)
 +            throws IOException
 +        {
 +            internalReader.mark(readAheadLimit);
 +        }
 +
 +        public void reset()
 +           throws IOException
 +        {
 +            internalReader.reset();
 +        }
 +
 +        public void close()
 +            throws IOException
 +        {
 +            internalReader.close();
 +        }
 +    }
 +
 +
  } // class XMLEntityManager
 Index: src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
 ===================================================================
 --- src/org/apache/xerces/impl/XMLDocumentScannerImpl.java	(.../xerces/java/tags/Xerces-J_2_9_1)	(revision 575832)
 +++ src/org/apache/xerces/impl/XMLDocumentScannerImpl.java	(.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk)	(working copy)
 @@ -783,6 +783,8 @@
                                  else {
                                      reportFatalError("MarkupNotRecognizedInProlog",
                                                       null);
 +                                    // Don't loop forever!
 +                                    fEntityScanner.scanChar();
                                  }
                              }
                              else if (isValidNameStartChar(fEntityScanner.peekChar())) {
 @@ -802,6 +804,8 @@
                              else {
                                  reportFatalError("MarkupNotRecognizedInProlog",
                                                   null);
 +                                // Don't loop forever!
 +                                fEntityScanner.scanChar();
                              }
                              break;
                          }
 @@ -872,6 +876,8 @@
                          }
                          case SCANNER_STATE_REFERENCE: {
                              reportFatalError("ReferenceIllegalInProlog", null);
 +                            // Don't loop forever!
 +                            fEntityScanner.scanChar();
                          }
                      }
                  } while (complete || again);
 @@ -1277,6 +1283,8 @@
                              else {
                                  reportFatalError("MarkupNotRecognizedInMisc",
                                                   null);
 +				// Skip forward one character, otherwise we loop forever.
 +				fEntityScanner.scanChar();
                              }
                              break;
                          }
	Index: src/org/apache/xerces/impl/XMLScanner.java
	===================================================================
	--- src/org/apache/xerces/impl/XMLScanner.java (.../xerces/java/tags/Xerces-J_2_9_1) (revision 575832)
	+++ src/org/apache/xerces/impl/XMLScanner.java (.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk) (working copy)
	@@ -823,6 +823,7 @@
	String entityName = fEntityScanner.scanName();
	if (entityName == null) {
	reportFatalError("NameRequiredInReference", null);
	+ entityName = "unknown";
	}
	else if (entityDepth == fEntityDepth) {
	fStringBuffer2.append(entityName);
	@@ -1027,6 +1028,14 @@
	if (XMLChar.isMarkup(c) \|\| c == ']') {
	fStringBuffer.append((char)fEntityScanner.scanChar());
	}
	+ else if (XMLChar.isHighSurrogate(c)) {
	+ scanSurrogates(fStringBuffer);
	+ }
	+ else if (isInvalidLiteral(c)) {
	+ reportFatalError("InvalidCharInSystemID",
	+ new Object[] { Integer.toHexString(c) });
	+ fEntityScanner.scanChar();
	+ }
	} while (fEntityScanner.scanLiteral(quote, ident) != quote);
	fStringBuffer.append(ident);
	ident = fStringBuffer;
	Index: src/org/apache/xerces/impl/Constants.java
	===================================================================
	--- src/org/apache/xerces/impl/Constants.java (.../xerces/java/tags/Xerces-J_2_9_1) (revision 575832)
	+++ src/org/apache/xerces/impl/Constants.java (.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk) (working copy)
	@@ -219,6 +219,9 @@

	/** Warn on undeclared element feature ("validation/warn-on-undeclared-elemdef"). */
	public static final String WARN_ON_UNDECLARED_ELEMDEF_FEATURE = "validation/warn-on-undeclared-elemdef";
	+
	+ /** Ignore misencoded characters feature */
	+ public static final String IGNORE_BADLY_ENCODED_CHARS = "ignore-badly-encoded-chars";

	/** Warn on duplicate entity declaration feature ("warn-on-duplicate-entitydef"). */
	public static final String WARN_ON_DUPLICATE_ENTITYDEF_FEATURE = "warn-on-duplicate-entitydef";
	Index: src/org/apache/xerces/impl/XMLEntityManager.java
	===================================================================
	--- src/org/apache/xerces/impl/XMLEntityManager.java (.../xerces/java/tags/Xerces-J_2_9_1) (revision 575832)
	+++ src/org/apache/xerces/impl/XMLEntityManager.java (.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk) (working copy)
	@@ -132,6 +132,10 @@
	protected static final String PARSER_SETTINGS =
	Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;

	+ /** Feature identifier: ignore badly encoded characters */
	+ protected static final String IGNORE_BADLY_ENCODED_CHARS =
	+ Constants.XERCES_FEATURE_PREFIX + Constants.IGNORE_BADLY_ENCODED_CHARS;
	+
	// property identifiers

	/** Property identifier: symbol table. */
	@@ -167,7 +171,8 @@
	EXTERNAL_PARAMETER_ENTITIES,
	ALLOW_JAVA_ENCODINGS,
	WARN_ON_DUPLICATE_ENTITYDEF,
	- STANDARD_URI_CONFORMANT
	+ STANDARD_URI_CONFORMANT,
	+ IGNORE_BADLY_ENCODED_CHARS
	};

	/** Feature defaults. */
	@@ -177,6 +182,7 @@
	Boolean.TRUE,
	Boolean.FALSE,
	Boolean.FALSE,
	+ Boolean.FALSE,
	Boolean.FALSE
	};

	@@ -262,6 +268,12 @@
	*/
	protected boolean fStrictURI;

	+ /**
	+ * allow badly encoded characters (skip them)
	+ * http://apache.org/xml/features/ignore-badly-encoded-chars
	+ */
	+ protected boolean fAllowBadlyEncodedChars;
	+
	// properties

	/**
	@@ -1310,6 +1322,13 @@
	fStrictURI = false;
	}

	+ try {
	+ fAllowBadlyEncodedChars = componentManager.getFeature(IGNORE_BADLY_ENCODED_CHARS);
	+ }
	+ catch (XMLConfigurationException e) {
	+ fAllowBadlyEncodedChars = false;
	+ }
	+
	// xerces properties
	fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
	fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
	@@ -2082,6 +2101,33 @@
	protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
	throws IOException {

	+ Reader internalReader = createInternalReader(inputStream, encoding, isBigEndian);
	+ if (fAllowBadlyEncodedChars)
	+ {
	+ // Wrap the reader so that bad characters are ignored rather than causing aborts
	+ return new LaxReader(internalReader);
	+ }
	+ return internalReader;
	+ }
	+
	+ /**
	+ * Creates a reader capable of reading the given input stream in
	+ * the specified encoding.
	+ *
	+ * @param inputStream The input stream.
	+ * @param encoding The encoding name that the input stream is
	+ * encoded using. If the user has specified that
	+ * Java encoding names are allowed, then the
	+ * encoding name may be a Java encoding name;
	+ * otherwise, it is an ianaEncoding name.
	+ * @param isBigEndian For encodings (like uCS-4), whose names cannot
	+ * specify a byte order, this tells whether the order is bigEndian. null menas
	+ * unknown or not relevant.
	+ *
	+ * @return Returns a reader.
	+ */
	+ protected Reader createInternalReader(InputStream inputStream, String encoding, Boolean isBigEndian)
	+ throws IOException {
	// if the encoding is UTF-8 use the optimized UTF-8 reader
	if (encoding == "UTF-8" \|\| encoding == null) {
	if (DEBUG_ENCODINGS) {
	@@ -3025,6 +3071,9 @@
	return -1;
	}
	if (fOffset == fData.length) {
	+ if (fCurrentEntity.mayReadChunks) {
	+ return fInputStream.read();
	+ }
	byte[] newData = new byte[fOffset << 1];
	System.arraycopy(fData, 0, newData, 0, fOffset);
	fData = newData;
	@@ -3138,4 +3187,105 @@
	}
	} // end of RewindableInputStream class

	+ protected static class LaxReader extends Reader
	+ {
	+ protected Reader internalReader;
	+
	+ public LaxReader(Reader internalReader)
	+ {
	+ this.internalReader = internalReader;
	+ }
	+
	+ public int read()
	+ throws IOException
	+ {
	+ // Since we need to be able to skip ahead at the point of error, and not drop huge amounts on the floor,
	+ // all read operations for this class are channeled through the single-character operation. This is less
	+ // efficient, but hopefully not terribly so.
	+ try
	+ {
	+ return internalReader.read();
	+ }
	+ catch (org.apache.xerces.impl.io.MalformedByteSequenceException e)
	+ {
	+ // When this fails, it means we detected a bad character.
	+ // However, the bad character has already been pulled off the stream, so we are free to stuff in a "?" and
	+ // just keep going.
	+ return (int)'?';
	+ }
	+ }
	+
	+ public int read(char[] cbuf)
	+ throws IOException
	+ {
	+ return read(cbuf,0,cbuf.length);
	+ }
	+
	+ public int read(char[] cbuf,
	+ int off,
	+ int len)
	+ throws IOException
	+ {
	+ int amtRead = 0;
	+ while (amtRead < len)
	+ {
	+ int cval = read();
	+ if (cval == -1)
	+ {
	+ if (amtRead == 0)
	+ return -1;
	+ else
	+ return amtRead;
	+ }
	+ cbuf[off++] = (char)cval;
	+ amtRead++;
	+ }
	+ return amtRead;
	+ }
	+
	+ public long skip(long n)
	+ throws IOException
	+ {
	+ long skipped = 0;
	+ while (skipped < n)
	+ {
	+ int cval = read();
	+ if (cval == -1)
	+ break;
	+ skipped++;
	+ }
	+ return skipped;
	+ }
	+
	+ public boolean ready()
	+ throws IOException
	+ {
	+ return internalReader.ready();
	+ }
	+
	+ public boolean markSupported()
	+ {
	+ return internalReader.markSupported();
	+ }
	+
	+ public void mark(int readAheadLimit)
	+ throws IOException
	+ {
	+ internalReader.mark(readAheadLimit);
	+ }
	+
	+ public void reset()
	+ throws IOException
	+ {
	+ internalReader.reset();
	+ }
	+
	+ public void close()
	+ throws IOException
	+ {
	+ internalReader.close();
	+ }
	+ }
	+
	+
	} // class XMLEntityManager
	Index: src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
	===================================================================
	--- src/org/apache/xerces/impl/XMLDocumentScannerImpl.java (.../xerces/java/tags/Xerces-J_2_9_1) (revision 575832)
	+++ src/org/apache/xerces/impl/XMLDocumentScannerImpl.java (.../incubator/lcf/upstream/xerces2-j-2.9.1-mcf/trunk) (working copy)
	@@ -783,6 +783,8 @@
	else {
	reportFatalError("MarkupNotRecognizedInProlog",
	null);
	+ // Don't loop forever!
	+ fEntityScanner.scanChar();
	}
	}
	else if (isValidNameStartChar(fEntityScanner.peekChar())) {
	@@ -802,6 +804,8 @@
	else {
	reportFatalError("MarkupNotRecognizedInProlog",
	null);
	+ // Don't loop forever!
	+ fEntityScanner.scanChar();
	}
	break;
	}
	@@ -872,6 +876,8 @@
	}
	case SCANNER_STATE_REFERENCE: {
	reportFatalError("ReferenceIllegalInProlog", null);
	+ // Don't loop forever!
	+ fEntityScanner.scanChar();
	}
	}
	} while (complete \|\| again);
	@@ -1277,6 +1283,8 @@
	else {
	reportFatalError("MarkupNotRecognizedInMisc",
	null);
	+ // Skip forward one character, otherwise we loop forever.
	+ fEntityScanner.scanChar();
	}
	break;
	}