Make sure we don't accumulate indefinite string amounts during long tagless regions.

git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-633@1444616 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/XMLFuzzyHierarchicalParseState.java b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/XMLFuzzyHierarchicalParseState.java
index 47e93fe..51df3f5 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/XMLFuzzyHierarchicalParseState.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/XMLFuzzyHierarchicalParseState.java
@@ -48,6 +48,10 @@
   /** Whether we're capturing escaped characters */
   protected boolean captureEscaped = false;
   
+  /** This is the maximum size of a chunk of characters getting sent to the characters() method.
+  */
+  protected final static int MAX_CHUNK_SIZE = 4096;
+  
   /** Constructor with default properties.
   */
   public XMLFuzzyHierarchicalParseState()
@@ -89,12 +93,7 @@
   protected boolean noteTagEx(String tagName, String nameSpace, String localName, Map<String,String> attributes)
     throws ManifoldCFException
   {
-    if (characterBuffer.length() > 0)
-    {
-      if (currentContext != null)
-        currentContext.characters(characterBuffer.toString());
-      characterBuffer.setLength(0);
-    }
+    flushCharacterBuffer();
     if (currentContext != null)
       currentContext.startElement(nameSpace,localName,tagName,attributes);
     return false;
@@ -106,12 +105,7 @@
   protected boolean noteEndTagEx(String tagName, String nameSpace, String localName)
     throws ManifoldCFException
   {
-    if (characterBuffer.length() > 0)
-    {
-      if (currentContext != null)
-        currentContext.characters(characterBuffer.toString());
-      characterBuffer.setLength(0);
-    }
+    flushCharacterBuffer();
     if (currentContext != null)
       currentContext.endElement(nameSpace,localName,tagName);
     return false;
@@ -125,10 +119,29 @@
   protected boolean noteNormalCharacter(char thisChar)
     throws ManifoldCFException
   {
-    characterBuffer.append(thisChar);
+    appendToCharacterBuffer(thisChar);
     return false;
   }
+  
+  protected void appendToCharacterBuffer(char thisChar)
+    throws ManifoldCFException
+  {
+    characterBuffer.append(thisChar);
+    if (characterBuffer.length() >= MAX_CHUNK_SIZE)
+      flushCharacterBuffer();
+  }
 
+  protected void flushCharacterBuffer()
+    throws ManifoldCFException
+  {
+    if (characterBuffer.length() > 0)
+    {
+      if (currentContext != null)
+        currentContext.characters(characterBuffer.toString());
+      characterBuffer.setLength(0);
+    }
+  }
+  
   /** New version of the noteEscapedTag method.
   *@return true to halt further processing.
   */
@@ -151,7 +164,7 @@
     throws ManifoldCFException
   {
     if (captureEscaped)
-      characterBuffer.append(thisChar);
+      appendToCharacterBuffer(thisChar);
     return false;
   }
 
@@ -172,12 +185,7 @@
   public void finishUp()
     throws ManifoldCFException
   {
-    if (characterBuffer.length() > 0)
-    {
-      if (currentContext != null)
-        currentContext.characters(characterBuffer.toString());
-      characterBuffer.setLength(0);
-    }
+    flushCharacterBuffer();
     super.finishUp();
   }