Get attachment crawling working
git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-778@1525412 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
index bc19c92..d6525a6 100644
--- a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
+++ b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SPSProxyHelper.java
@@ -1556,8 +1556,6 @@
listCall.getAttachmentCollection( listName, itemID );
org.apache.axis.message.MessageElement[] List = listResponse.get_any();
- System.out.println(List[0].toString());
-
XMLDoc doc = new XMLDoc( List[0].toString() );
ArrayList nodeList = new ArrayList();
diff --git a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
index dd24ec6..4190927 100644
--- a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
+++ b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
@@ -792,10 +792,10 @@
if (modifiedDate != null)
{
// Item has a modified date so we presume it exists.
-
+
Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
Date createdDateValue = DateParser.parseISO8601Date(createdDate);
-
+
// Build version string
String versionToken = modifiedDate;
@@ -873,12 +873,20 @@
url = null;
// If we have no modified or created date, it means that the parent has gone away, so we go away too.
- if (modifiedDate != null && createdDate != null && url != null)
+ if (modifiedDate != null && url != null)
{
// Item has a modified date so we presume it exists.
- Date modifiedDateValue = new Date(new Long(modifiedDate).longValue());
- Date createdDateValue = new Date(new Long(createdDate).longValue());
+ Date modifiedDateValue;
+ if (modifiedDate != null)
+ modifiedDateValue = new Date(new Long(modifiedDate).longValue());
+ else
+ modifiedDateValue = null;
+ Date createdDateValue;
+ if (createdDate != null)
+ createdDateValue = new Date(new Long(createdDate).longValue());
+ else
+ createdDateValue = null;
// Build version string
String versionToken = modifiedDate;
@@ -910,7 +918,7 @@
{
// Can't look up list ID, which means the list is gone, so delete
if (Logging.connectors.isDebugEnabled())
- Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because created date, modified data, or attachment url not found");
+ Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because modified date or attachment url not found");
rval[i] = null;
}
}
@@ -1510,7 +1518,7 @@
startPosition = unpackList(denyTokens,version,startPosition,'+');
startPosition = unpackDate(version,startPosition,modifiedDate);
startPosition = unpackDate(version,startPosition,createdDate);
-
+
if (modifiedDate.getTime() == 0L)
modifiedDate = null;
if (createdDate.getTime() == 0L)
diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java b/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
index 6602d46..269ea1c 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/common/DateParser.java
@@ -32,29 +32,41 @@
{
if (isoDateValue == null)
return null;
- // There are a number of variations on the basic format.
- // We'll look for key characters to help is determine which is which.
- StringBuilder isoFormatString = new StringBuilder("yy");
- if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
- isoFormatString.append("yy");
- isoFormatString.append("-MM-dd'T'HH:mm:ss");
- if (isoDateValue.indexOf(".") != -1)
- isoFormatString.append(".SSS");
- if (isoDateValue.endsWith("Z"))
- isoFormatString.append("'Z'");
+
+ boolean isMicrosoft = (isoDateValue.indexOf("T") == -1);
+
+ String formatString;
+ if (isMicrosoft)
+ {
+ formatString = "yyyy-MM-dd' 'HH:mm:ss";
+ }
else
{
- // We need to be able to parse either "-08:00" or "-0800". The 'Z' specifier only handles
- // -0800, unfortunately - see CONNECTORS-700. So we have to do some hackery to remove the colon.
- int colonIndex = isoDateValue.lastIndexOf(":");
- int dashIndex = isoDateValue.lastIndexOf("-");
- int plusIndex = isoDateValue.lastIndexOf("+");
- if (colonIndex != -1 &&
- ((dashIndex != -1 && colonIndex == dashIndex+3 && isNumeral(isoDateValue,dashIndex-1)) || (plusIndex != -1 && colonIndex == plusIndex+3 && isNumeral(isoDateValue,plusIndex-1))))
- isoDateValue = isoDateValue.substring(0,colonIndex) + isoDateValue.substring(colonIndex+1);
- isoFormatString.append("Z"); // RFC 822 time, including general time zones
+ // There are a number of variations on the basic format.
+ // We'll look for key characters to help is determine which is which.
+ StringBuilder isoFormatString = new StringBuilder("yy");
+ if (isoDateValue.length() > 2 && isoDateValue.charAt(2) != '-')
+ isoFormatString.append("yy");
+ isoFormatString.append("-MM-dd'T'HH:mm:ss");
+ if (isoDateValue.indexOf(".") != -1)
+ isoFormatString.append(".SSS");
+ if (isoDateValue.endsWith("Z"))
+ isoFormatString.append("'Z'");
+ else
+ {
+ // We need to be able to parse either "-08:00" or "-0800". The 'Z' specifier only handles
+ // -0800, unfortunately - see CONNECTORS-700. So we have to do some hackery to remove the colon.
+ int colonIndex = isoDateValue.lastIndexOf(":");
+ int dashIndex = isoDateValue.lastIndexOf("-");
+ int plusIndex = isoDateValue.lastIndexOf("+");
+ if (colonIndex != -1 &&
+ ((dashIndex != -1 && colonIndex == dashIndex+3 && isNumeral(isoDateValue,dashIndex-1)) || (plusIndex != -1 && colonIndex == plusIndex+3 && isNumeral(isoDateValue,plusIndex-1))))
+ isoDateValue = isoDateValue.substring(0,colonIndex) + isoDateValue.substring(colonIndex+1);
+ isoFormatString.append("Z"); // RFC 822 time, including general time zones
+ }
+ formatString = isoFormatString.toString();
}
- java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(isoFormatString.toString());
+ java.text.DateFormat iso8601Format = new java.text.SimpleDateFormat(formatString);
try
{
return iso8601Format.parse(isoDateValue);