Fixing bug when getting split buffers during XML parsing (VYSPER-265). Patch by Eilon Yardeni, thanks
git-svn-id: https://svn.apache.org/repos/asf/mina/vysper/branches/xep0114@1068168 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
index 038803b..e96f48d 100644
--- a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
+++ b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
@@ -335,6 +335,7 @@
elements.clear();
nsResolver = new ParserNamespaceResolver();
sentStartDocument = false;
+ tokenizer.restart();
}
private void xmlDeclaration() {
diff --git a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java
index d1a67fe..a2d491c 100644
--- a/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java
+++ b/nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java
@@ -38,7 +38,7 @@
START, IN_TAG, IN_STRING, IN_DOUBLE_ATTRIBUTE_VALUE, IN_SINGLE_ATTRIBUTE_VALUE, IN_TEXT, CLOSED
}
- private int lastPosition = 0;
+ private final IoBuffer buffer = IoBuffer.allocate(16).setAutoExpand(true);
private State state = State.START;
@@ -59,8 +59,6 @@
* @throws Exception
*/
public void parse(IoBuffer byteBuffer, CharsetDecoder decoder) throws SAXException {
- lastPosition = byteBuffer.position();
-
while (byteBuffer.hasRemaining() && state != State.CLOSED) {
char c = (char) byteBuffer.get();
@@ -70,6 +68,7 @@
state = State.IN_TAG;
} else {
state = State.IN_TEXT;
+ buffer.put((byte) c);
}
} else if (state == State.IN_TEXT) {
if (c == '<') {
@@ -92,9 +91,10 @@
} else if (isControlChar(c)) {
emit(c, byteBuffer);
} else if (Character.isWhitespace(c)) {
- lastPosition = byteBuffer.position();
+ buffer.clear();
} else {
state = State.IN_STRING;
+ buffer.put((byte) c);
}
} else if (state == State.IN_STRING) {
if (c == '>') {
@@ -109,28 +109,36 @@
emit(byteBuffer, CharsetUtil.UTF8_DECODER);
state = State.IN_TAG;
} else {
- // do nothing
+ buffer.put((byte) c);
}
} else if (state == State.IN_DOUBLE_ATTRIBUTE_VALUE) {
if (c == '"') {
emit(byteBuffer, decoder);
emit(c, byteBuffer);
state = State.IN_TAG;
+ } else {
+ buffer.put((byte) c);
}
} else if (state == State.IN_SINGLE_ATTRIBUTE_VALUE) {
if (c == '\'') {
emit(byteBuffer, decoder);
emit(c, byteBuffer);
state = State.IN_TAG;
+ } else {
+ buffer.put((byte) c);
}
}
}
-
- byteBuffer.position(lastPosition);
}
public void close() {
state = State.CLOSED;
+ buffer.clear();
+ }
+
+ public void restart() {
+ state = State.START;
+ buffer.clear();
}
private boolean isControlChar(char c) {
@@ -139,24 +147,15 @@
private void emit(char token, IoBuffer byteBuffer) throws SAXException {
listener.token(token, null);
-
- lastPosition = byteBuffer.position();
}
private void emit(IoBuffer byteBuffer, CharsetDecoder decoder) throws SAXException {
- int endPosition = byteBuffer.position();
- int oldLimit = byteBuffer.limit();
- byteBuffer.position(lastPosition);
- byteBuffer.limit(endPosition - 1);
-
try {
- listener.token(NO_CHAR, byteBuffer.getString(decoder));
+ buffer.flip();
+ listener.token(NO_CHAR, buffer.getString(decoder));
+ buffer.clear();
} catch (CharacterCodingException e) {
throw new SAXException(e);
}
- byteBuffer.limit(oldLimit);
- byteBuffer.position(endPosition);
- lastPosition = byteBuffer.position();
-
}
}
diff --git a/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseElementsTestCase.java b/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseElementsTestCase.java
index 2944c31..5fa184f 100644
--- a/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseElementsTestCase.java
+++ b/nbxml/src/test/java/org/apache/vysper/xml/sax/impl/ParseElementsTestCase.java
@@ -20,7 +20,11 @@
package org.apache.vysper.xml.sax.impl;
import java.util.Iterator;
+import java.util.Map.Entry;
+import org.apache.mina.core.buffer.IoBuffer;
+import org.apache.vysper.charset.CharsetUtil;
+import org.apache.vysper.xml.sax.NonBlockingXMLReader;
import org.apache.vysper.xml.sax.impl.TestHandler.TestEvent;
/**
@@ -166,4 +170,28 @@
assertNoMoreevents(events);
}
+
+ public void testSplitBuffers() throws Exception {
+ TestHandler handler = new TestHandler();
+ NonBlockingXMLReader reader = new DefaultNonBlockingXMLReader();
+
+ reader.setContentHandler(handler);
+ reader.setErrorHandler(handler);
+
+ String xml1 = "<root></r";
+ String xml2 = "oot>";
+
+ reader.parse(IoBuffer.wrap(xml1.getBytes("UTF-8")), CharsetUtil.UTF8_DECODER);
+ reader.parse(IoBuffer.wrap(xml2.getBytes("UTF-8")), CharsetUtil.UTF8_DECODER);
+
+ Iterator<TestEvent> events = handler.getEvents().iterator();
+
+ assertStartDocument(events.next());
+ assertStartElement("", "root", "root", events.next());
+ assertEndElement("", "root", "root", events.next());
+ assertEndDocument(events.next());
+
+ assertNoMoreevents(events);
+ }
+
}
\ No newline at end of file