Fix for CONNECTORS-1325.

git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/trunk@1749863 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index eafaa5f..f97ecef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -3,6 +3,11 @@
 
 ======================= 2.5-dev =====================
 
+CONNECTORS-1325: Deal with SharePoint bad XML when document has 
+optional plane utf-16 characters.  Documents with these are skipped
+since Java can't represent them either.
+(Karl Wright)
+
 CONNECTORS-1324: Fix (again) the SharePoint connector's "use all metadata"
 option.
 (Konstantin Avdeev)
diff --git a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
index 78ef852..309fd7c 100644
--- a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
+++ b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
@@ -132,7 +132,15 @@
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getACLs xml response: "+xmlResponse);
       
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+      
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -352,7 +360,15 @@
         Logging.connectors.debug("SharePoint: getDocumentACLs xml response: " + xmlResponse);
       }
 
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+      
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -571,7 +587,14 @@
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("SharePoint: getChildren xml response: "+xmlResponse);
 
-        XMLDoc doc = new XMLDoc( xmlResponse );
+        final XMLDoc doc;
+        try
+        {
+          doc = new XMLDoc( xmlResponse );
+        }
+        catch (ManifoldCFException e) {
+          return false;
+        }
 
         doc.processPath(nodeList, "*", null);
         if (nodeList.size() != 1)
@@ -812,7 +835,16 @@
       final String xmlResponse = lists[0].toString();
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getDocLibID xml response: "+xmlResponse);
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -1015,7 +1047,16 @@
       final String xmlResponse = lists[0].toString();
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getListID xml response: "+xmlResponse);
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -1215,7 +1256,15 @@
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getVersions response: "+xmlResponse);
       
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -1641,7 +1690,15 @@
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getAttachmentNames response: "+xmlResponse);
 
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -1766,8 +1823,16 @@
       final String xmlResponse = List[0].toString();
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getFieldList xml response: "+xmlResponse);
+
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
       
-      XMLDoc doc = new XMLDoc( xmlResponse );
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -1967,7 +2032,15 @@
           Logging.connectors.debug("SharePoint: getFieldValues xml response: '" +xmlResponse+ "'");
         }
 
-        XMLDoc doc = new XMLDoc( xmlResponse );
+        final XMLDoc doc;
+        try
+        {
+          doc = new XMLDoc( xmlResponse );
+        }
+        catch (ManifoldCFException e) {
+          return null;
+        }
+
         ArrayList nodeList = new ArrayList();
 
         doc.processPath(nodeList, "*", null);
@@ -2035,7 +2108,15 @@
         }
 
         ArrayList nodeList = new ArrayList();
-        XMLDoc doc = new XMLDoc(xmlResponse);
+        
+        final XMLDoc doc;
+        try
+        {
+          doc = new XMLDoc( xmlResponse );
+        }
+        catch (ManifoldCFException e) {
+          return null;
+        }
 
         doc.processPath(nodeList, "*", null);
         if (nodeList.size() != 1)
@@ -2174,7 +2255,16 @@
       final String xmlResponse = webList[0].toString();
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getSites xml response: "+xmlResponse);
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -2315,7 +2405,15 @@
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getDocumentLibraries xml response: "+xmlResponse);
       
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);
@@ -2472,7 +2570,15 @@
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("SharePoint: getLists xml response: "+xmlResponse);
 
-      XMLDoc doc = new XMLDoc( xmlResponse );
+      final XMLDoc doc;
+      try
+      {
+        doc = new XMLDoc( xmlResponse );
+      }
+      catch (ManifoldCFException e) {
+        return null;
+      }
+
       ArrayList nodeList = new ArrayList();
 
       doc.processPath(nodeList, "*", null);