Fix for CONNECTORS-1325.
git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/trunk@1749863 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/CHANGES.txt b/CHANGES.txt
index eafaa5f..f97ecef 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -3,6 +3,11 @@
======================= 2.5-dev =====================
+CONNECTORS-1325: Deal with SharePoint bad XML when document has
+optional plane utf-16 characters. Documents with these are skipped
+since Java can't represent them either.
+(Karl Wright)
+
CONNECTORS-1324: Fix (again) the SharePoint connector's "use all metadata"
option.
(Konstantin Avdeev)
diff --git a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
index 78ef852..309fd7c 100644
--- a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
+++ b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
@@ -132,7 +132,15 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getACLs xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -352,7 +360,15 @@
Logging.connectors.debug("SharePoint: getDocumentACLs xml response: " + xmlResponse);
}
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -571,7 +587,14 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getChildren xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return false;
+ }
doc.processPath(nodeList, "*", null);
if (nodeList.size() != 1)
@@ -812,7 +835,16 @@
final String xmlResponse = lists[0].toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getDocLibID xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -1015,7 +1047,16 @@
final String xmlResponse = lists[0].toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getListID xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -1215,7 +1256,15 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getVersions response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -1641,7 +1690,15 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getAttachmentNames response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -1766,8 +1823,16 @@
final String xmlResponse = List[0].toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getFieldList xml response: "+xmlResponse);
+
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
- XMLDoc doc = new XMLDoc( xmlResponse );
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -1967,7 +2032,15 @@
Logging.connectors.debug("SharePoint: getFieldValues xml response: '" +xmlResponse+ "'");
}
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -2035,7 +2108,15 @@
}
ArrayList nodeList = new ArrayList();
- XMLDoc doc = new XMLDoc(xmlResponse);
+
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
doc.processPath(nodeList, "*", null);
if (nodeList.size() != 1)
@@ -2174,7 +2255,16 @@
final String xmlResponse = webList[0].toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getSites xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -2315,7 +2405,15 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getDocumentLibraries xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);
@@ -2472,7 +2570,15 @@
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: getLists xml response: "+xmlResponse);
- XMLDoc doc = new XMLDoc( xmlResponse );
+ final XMLDoc doc;
+ try
+ {
+ doc = new XMLDoc( xmlResponse );
+ }
+ catch (ManifoldCFException e) {
+ return null;
+ }
+
ArrayList nodeList = new ArrayList();
doc.processPath(nodeList, "*", null);