blob: 8f778b9c80348093b77df0e55579b8b7b6f977ea [file] [log] [blame]
/* $Id: SharePointRepository.java 996524 2010-09-13 13:38:01Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.connectors.sharepoint;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.crawler.system.Logging;
import org.apache.manifoldcf.crawler.system.ManifoldCF;
import org.apache.manifoldcf.core.common.*;
import org.apache.manifoldcf.connectorcommon.interfaces.*;
import org.apache.manifoldcf.connectorcommon.extmimemap.ExtensionMimeMap;
import org.apache.manifoldcf.core.util.URLEncoder;
import org.apache.manifoldcf.core.util.URLDecoder;
import java.io.*;
import java.util.Date;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Locale;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.concurrent.TimeUnit;
import java.net.*;
import org.apache.log4j.Logger;
import org.apache.log4j.Level;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.protocol.HttpRequestExecutor;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.config.SocketConfig;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.DefaultHostnameVerifier;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.NTCredentials;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultRedirectStrategy;
import org.apache.http.util.EntityUtils;
import org.apache.http.client.HttpRequestRetryHandler;
import org.apache.http.protocol.HttpContext;
import org.apache.http.HttpHost;
/** This is the "repository connector" for Microsoft SharePoint.
* Document identifiers for this connector come in three forms:
* (1) An "S" followed by the encoded subsite/library path, which represents the encoded relative path from the root site to a library. [deprecated and no longer supported];
* (2) A "D" followed by a subsite/library/folder/file path, which represents the relative path from the root site to a file. [deprecated and no longer supported]
* (3) Six different kinds of unencoded path, each of which starts with a "/" at the beginning, where the "/" represents the root site of the connection, as follows:
* /sitepath/ - the relative path to a site. The path MUST both begin and end with a single "/".
* /sitepath/libraryname// - the relative path to a library. The path MUST begin with a single "/" and end with "//".
* /sitepath/libraryname//folderfilepath - the relative path to a file. The path MUST begin with a single "/" and MUST include a "//" after the library, and must NOT end with a "/".
* /sitepath/listname/// - the relative path to a list. The path MUST begin with a single "/" and end with "///".
* /sitepath/listname///rowid - the relative path to a list item. The path MUST begin with a single "/" and MUST include a "///" after the list name, and must NOT end in a "/".
* /sitepath/listname///rowid//attachment_filename - the relative path to a list attachment. The path MUST begin with a single "/", MUST include a "///" after the list name, and
* MUST include a "//" separating the rowid from the filename.
*/
public class SharePointRepository extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector
{
public static final String _rcsid = "@(#)$Id: SharePointRepository.java 996524 2010-09-13 13:38:01Z kwright $";
// Properties we need
public final static String wsddPathProperty = "org.apache.manifoldcf.sharepoint.wsddpath";
// Activities we log
public final static String ACTIVITY_FETCH = "fetch";
protected final static long sessionExpirationInterval = 300000L;
private boolean supportsItemSecurity = false;
private boolean dspStsWorks = true;
private boolean attachmentsSupported = false;
private boolean activeDirectoryAuthority = true;
private String serverProtocol = null;
private String serverUrl = null;
private String fileBaseUrl = null;
private String userName = null;
private String strippedUserName = null;
private String password = null;
private String ntlmDomain = null;
private String serverName = null;
private String serverLocation = null;
private String encodedServerLocation = null;
private int serverPort = -1;
private SPSProxyHelper proxy = null;
private long sessionTimeout;
// SSL support
private String keystoreData = null;
private IKeystoreManager keystoreManager = null;
private HttpClientConnectionManager connectionManager = null;
private HttpClient httpClient = null;
// Current host name
private static String currentHost = null;
static
{
// Find the current host name
try
{
java.net.InetAddress addr = java.net.InetAddress.getLocalHost();
// Get hostname
currentHost = addr.getHostName();
}
catch (UnknownHostException e)
{
}
}
// Turn off AXIS debug output that we don't want
static
{
Logger logger = Logger.getLogger("org.apache.axis.ConfigurationException");
logger.setLevel(Level.INFO);
}
/** Deny access token for default authority */
private final static String defaultAuthorityDenyToken = GLOBAL_DENY_TOKEN;
/** Constructor.
*/
public SharePointRepository()
{
}
/** Set up a session */
protected void getSession()
throws ManifoldCFException
{
if (proxy == null)
{
String serverVersion = params.getParameter( SharePointConfig.PARAM_SERVERVERSION );
if (serverVersion == null)
serverVersion = "4.0";
supportsItemSecurity = !serverVersion.equals("2.0");
dspStsWorks = serverVersion.equals("2.0") || serverVersion.equals("3.0");
attachmentsSupported = !serverVersion.equals("2.0");
String authorityType = params.getParameter( SharePointConfig.PARAM_AUTHORITYTYPE );
if (authorityType == null)
authorityType = "ActiveDirectory";
activeDirectoryAuthority = authorityType.equals("ActiveDirectory");
serverProtocol = params.getParameter( SharePointConfig.PARAM_SERVERPROTOCOL );
if (serverProtocol == null)
serverProtocol = "http";
try
{
String serverPort = params.getParameter( SharePointConfig.PARAM_SERVERPORT );
if (serverPort == null || serverPort.length() == 0)
{
if (serverProtocol.equals("https"))
this.serverPort = 443;
else
this.serverPort = 80;
}
else
this.serverPort = Integer.parseInt(serverPort);
}
catch (NumberFormatException e)
{
throw new ManifoldCFException(e.getMessage(),e);
}
serverLocation = params.getParameter(SharePointConfig.PARAM_SERVERLOCATION);
if (serverLocation == null)
serverLocation = "";
if (serverLocation.endsWith("/"))
serverLocation = serverLocation.substring(0,serverLocation.length()-1);
if (serverLocation.length() > 0 && !serverLocation.startsWith("/"))
serverLocation = "/" + serverLocation;
encodedServerLocation = serverLocation;
serverLocation = decodePath(serverLocation);
userName = params.getParameter(SharePointConfig.PARAM_SERVERUSERNAME);
password = params.getObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD);
int index = userName.indexOf("\\");
if (index != -1)
{
strippedUserName = userName.substring(index+1);
ntlmDomain = userName.substring(0,index);
}
else
{
strippedUserName = null;
ntlmDomain = null;
}
String proxyHost = params.getParameter(SharePointConfig.PARAM_PROXYHOST);
String proxyPortString = params.getParameter(SharePointConfig.PARAM_PROXYPORT);
int proxyPort = 8080;
if (proxyPortString != null && proxyPortString.length() > 0)
{
try
{
proxyPort = Integer.parseInt(proxyPortString);
}
catch (NumberFormatException e)
{
throw new ManifoldCFException(e.getMessage(),e);
}
}
String proxyUsername = params.getParameter(SharePointConfig.PARAM_PROXYUSER);
String proxyPassword = params.getObfuscatedParameter(SharePointConfig.PARAM_PROXYPASSWORD);
String proxyDomain = params.getParameter(SharePointConfig.PARAM_PROXYDOMAIN);
serverUrl = serverProtocol + "://" + serverName;
if (serverProtocol.equals("https"))
{
if (serverPort != 443)
serverUrl += ":" + Integer.toString(serverPort);
}
else
{
if (serverPort != 80)
serverUrl += ":" + Integer.toString(serverPort);
}
fileBaseUrl = serverUrl + encodedServerLocation;
// Set up ssl if indicated
keystoreData = params.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE);
int connectionTimeout = 60000;
int socketTimeout = 900000;
SSLConnectionSocketFactory myFactory = null;
if (keystoreData != null)
{
keystoreManager = KeystoreManagerFactory.make("",keystoreData);
myFactory = new SSLConnectionSocketFactory(keystoreManager.getSecureSocketFactory(), new DefaultHostnameVerifier());
}
else
{
myFactory = SSLConnectionSocketFactory.getSocketFactory();
}
PoolingHttpClientConnectionManager poolingConnectionManager = new PoolingHttpClientConnectionManager(RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.getSocketFactory())
.register("https", myFactory)
.build());
poolingConnectionManager.setDefaultMaxPerRoute(1);
poolingConnectionManager.setValidateAfterInactivity(2000);
poolingConnectionManager.setDefaultSocketConfig(SocketConfig.custom()
.setTcpNoDelay(true)
.setSoTimeout(socketTimeout)
.build());
connectionManager = poolingConnectionManager;
CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
if (strippedUserName != null)
{
credentialsProvider.setCredentials(
new AuthScope(serverName,serverPort),
new NTCredentials(strippedUserName, password, currentHost, ntlmDomain));
}
RequestConfig.Builder requestBuilder = RequestConfig.custom()
.setCircularRedirectsAllowed(true)
.setSocketTimeout(socketTimeout)
.setExpectContinueEnabled(false)
.setConnectTimeout(connectionTimeout)
.setConnectionRequestTimeout(socketTimeout);
// If there's a proxy, set that too.
if (proxyHost != null && proxyHost.length() > 0)
{
// Configure proxy authentication
if (proxyUsername != null && proxyUsername.length() > 0)
{
if (proxyPassword == null)
proxyPassword = "";
if (proxyDomain == null)
proxyDomain = "";
credentialsProvider.setCredentials(
new AuthScope(proxyHost, proxyPort),
new NTCredentials(proxyUsername, proxyPassword, currentHost, proxyDomain));
}
HttpHost proxy = new HttpHost(proxyHost, proxyPort);
requestBuilder.setProxy(proxy);
}
HttpClientBuilder builder = HttpClients.custom()
.setConnectionManager(connectionManager)
.disableAutomaticRetries()
.setDefaultRequestConfig(requestBuilder.build())
.setDefaultCredentialsProvider(credentialsProvider);
builder.setRequestExecutor(new HttpRequestExecutor(socketTimeout))
.setRedirectStrategy(new DefaultRedirectStrategy());
httpClient = builder.build();
proxy = new SPSProxyHelper( serverUrl, encodedServerLocation, serverLocation, userName, password,
org.apache.manifoldcf.connectorcommon.common.CommonsHTTPSender.class, "client-config.wsdd",
httpClient );
}
sessionTimeout = System.currentTimeMillis() + sessionExpirationInterval;
}
protected void expireSession()
throws ManifoldCFException
{
serverUrl = null;
fileBaseUrl = null;
userName = null;
strippedUserName = null;
password = null;
ntlmDomain = null;
serverLocation = null;
encodedServerLocation = null;
serverPort = -1;
keystoreData = null;
keystoreManager = null;
proxy = null;
httpClient = null;
if (connectionManager != null)
connectionManager.shutdown();
connectionManager = null;
}
/** Return the list of activities that this connector supports (i.e. writes into the log).
*@return the list.
*/
@Override
public String[] getActivitiesList()
{
return new String[]{ACTIVITY_FETCH};
}
/** Connect.
*@param configParameters is the set of configuration parameters, which
* in this case describe the root directory.
*/
@Override
public void connect(ConfigParams configParameters)
{
super.connect(configParameters);
// This is needed by getBins()
serverName = configParameters.getParameter( SharePointConfig.PARAM_SERVERNAME );
}
/** Close the connection. Call this before discarding the repository connector.
*/
@Override
public void disconnect()
throws ManifoldCFException
{
serverUrl = null;
fileBaseUrl = null;
userName = null;
strippedUserName = null;
password = null;
ntlmDomain = null;
serverName = null;
serverLocation = null;
encodedServerLocation = null;
serverPort = -1;
keystoreData = null;
keystoreManager = null;
proxy = null;
httpClient = null;
if (connectionManager != null)
connectionManager.shutdown();
connectionManager = null;
super.disconnect();
}
/** Get the bin name string for a document identifier. The bin name describes the queue to which the
* document will be assigned for throttling purposes. Throttling controls the rate at which items in a
* given queue are fetched; it does not say anything about the overall fetch rate, which may operate on
* multiple queues or bins.
* For example, if you implement a web crawler, a good choice of bin name would be the server name, since
* that is likely to correspond to a real resource that will need real throttle protection.
*@param documentIdentifier is the document identifier.
*@return the bin name.
*/
@Override
public String[] getBinNames(String documentIdentifier)
{
return new String[]{serverName};
}
/** Get the maximum number of documents to amalgamate together into one batch, for this connector.
*@return the maximum number. 0 indicates "unlimited".
*/
@Override
public int getMaxDocumentRequest()
{
// Since we went to a carrydown-based implementation, having this greater than 1 does not help.
return 1;
}
/** Test the connection. Returns a string describing the connection integrity.
*@return the connection's status as a displayable string.
*/
@Override
public String check()
throws ManifoldCFException
{
getSession();
try
{
URL urlServer = new URL( serverUrl );
}
catch ( MalformedURLException e )
{
return "Illegal SharePoint url: "+e.getMessage();
}
try
{
proxy.checkConnection( "/", supportsItemSecurity );
}
catch ( ServiceInterruption e )
{
return "SharePoint temporarily unavailable: "+e.getMessage();
}
catch (ManifoldCFException e)
{
return e.getMessage();
}
return super.check();
}
/** This method is periodically called for all connectors that are connected but not
* in active use.
*/
@Override
public void poll()
throws ManifoldCFException
{
if (proxy != null && System.currentTimeMillis() >= sessionTimeout)
expireSession();
if (connectionManager != null)
connectionManager.closeIdleConnections(60000L,TimeUnit.MILLISECONDS);
}
/** This method is called to assess whether to count this connector instance should
* actually be counted as being connected.
*@return true if the connector instance is actually connected.
*/
@Override
public boolean isConnected()
{
return connectionManager != null;
}
/** Request arbitrary connector information.
* This method is called directly from the API in order to allow API users to perform any one of several connector-specific
* queries.
*@param output is the response object, to be filled in by this method.
*@param command is the command, which is taken directly from the API request.
*@return true if the resource is found, false if not. In either case, output may be filled in.
*/
@Override
public boolean requestInfo(Configuration output, String command)
throws ManifoldCFException
{
if (command.startsWith("fields/"))
{
String library;
String sitePath;
String remainder = command.substring("fields/".length());
try
{
int index = remainder.indexOf("/");
if (index == -1)
{
library = remainder;
sitePath = "";
}
else
{
library = remainder.substring(0,index);
sitePath = remainder.substring(index+1);
}
Map<String,String> fieldSet = getLibFieldList(sitePath,library);
Iterator<String> iter = fieldSet.keySet().iterator();
while (iter.hasNext())
{
String fieldName = iter.next();
String displayName = fieldSet.get(fieldName);
ConfigurationNode node = new ConfigurationNode("field");
ConfigurationNode child;
child = new ConfigurationNode("name");
child.setValue(fieldName);
node.addChild(node.getChildCount(),child);
child = new ConfigurationNode("display_name");
child.setValue(displayName);
node.addChild(node.getChildCount(),child);
output.addChild(output.getChildCount(),node);
}
}
catch (ServiceInterruption e)
{
ManifoldCF.createServiceInterruptionNode(output,e);
}
catch (ManifoldCFException e)
{
ManifoldCF.createErrorNode(output,e);
}
}
else if (command.startsWith("listfields/"))
{
String listName;
String sitePath;
String remainder = command.substring("listfields/".length());
try
{
int index = remainder.indexOf("/");
if (index == -1)
{
listName = remainder;
sitePath = "";
}
else
{
listName = remainder.substring(0,index);
sitePath = remainder.substring(index+1);
}
Map<String,String> fieldSet = getListFieldList(sitePath,listName);
Iterator<String> iter = fieldSet.keySet().iterator();
while (iter.hasNext())
{
String fieldName = iter.next();
String displayName = fieldSet.get(fieldName);
ConfigurationNode node = new ConfigurationNode("field");
ConfigurationNode child;
child = new ConfigurationNode("name");
child.setValue(fieldName);
node.addChild(node.getChildCount(),child);
child = new ConfigurationNode("display_name");
child.setValue(displayName);
node.addChild(node.getChildCount(),child);
output.addChild(output.getChildCount(),node);
}
}
catch (ServiceInterruption e)
{
ManifoldCF.createServiceInterruptionNode(output,e);
}
catch (ManifoldCFException e)
{
ManifoldCF.createErrorNode(output,e);
}
}
else if (command.startsWith("sites/"))
{
try
{
String sitePath = command.substring("sites/".length());
List<NameValue> sites = getSites(sitePath);
int i = 0;
while (i < sites.size())
{
NameValue site = sites.get(i++);
ConfigurationNode node = new ConfigurationNode("site");
ConfigurationNode child;
child = new ConfigurationNode("name");
child.setValue(site.getValue());
node.addChild(node.getChildCount(),child);
child = new ConfigurationNode("display_name");
child.setValue(site.getPrettyName());
node.addChild(node.getChildCount(),child);
output.addChild(output.getChildCount(),node);
}
}
catch (ServiceInterruption e)
{
ManifoldCF.createServiceInterruptionNode(output,e);
}
catch (ManifoldCFException e)
{
ManifoldCF.createErrorNode(output,e);
}
}
else if (command.startsWith("libraries/"))
{
try
{
String sitePath = command.substring("libraries/".length());
List<NameValue> libs = getDocLibsBySite(sitePath);
int i = 0;
while (i < libs.size())
{
NameValue lib = libs.get(i++);
ConfigurationNode node = new ConfigurationNode("library");
ConfigurationNode child;
child = new ConfigurationNode("name");
child.setValue(lib.getValue());
node.addChild(node.getChildCount(),child);
child = new ConfigurationNode("display_name");
child.setValue(lib.getPrettyName());
node.addChild(node.getChildCount(),child);
output.addChild(output.getChildCount(),node);
}
}
catch (ServiceInterruption e)
{
ManifoldCF.createServiceInterruptionNode(output,e);
}
catch (ManifoldCFException e)
{
ManifoldCF.createErrorNode(output,e);
}
}
else if (command.startsWith("lists/"))
{
try
{
String sitePath = command.substring("lists/".length());
List<NameValue> libs = getListsBySite(sitePath);
int i = 0;
while (i < libs.size())
{
NameValue lib = libs.get(i++);
ConfigurationNode node = new ConfigurationNode("list");
ConfigurationNode child;
child = new ConfigurationNode("name");
child.setValue(lib.getValue());
node.addChild(node.getChildCount(),child);
child = new ConfigurationNode("display_name");
child.setValue(lib.getPrettyName());
node.addChild(node.getChildCount(),child);
output.addChild(output.getChildCount(),node);
}
}
catch (ServiceInterruption e)
{
ManifoldCF.createServiceInterruptionNode(output,e);
}
catch (ManifoldCFException e)
{
ManifoldCF.createErrorNode(output,e);
}
}
else
return super.requestInfo(output,command);
return true;
}
/** Queue "seed" documents. Seed documents are the starting places for crawling activity. Documents
* are seeded when this method calls appropriate methods in the passed in ISeedingActivity object.
*
* This method can choose to find repository changes that happen only during the specified time interval.
* The seeds recorded by this method will be viewed by the framework based on what the
* getConnectorModel() method returns.
*
* It is not a big problem if the connector chooses to create more seeds than are
* strictly necessary; it is merely a question of overall work required.
*
* The end time and seeding version string passed to this method may be interpreted for greatest efficiency.
* For continuous crawling jobs, this method will
* be called once, when the job starts, and at various periodic intervals as the job executes.
*
* When a job's specification is changed, the framework automatically resets the seeding version string to null. The
* seeding version string may also be set to null on each job run, depending on the connector model returned by
* getConnectorModel().
*
* Note that it is always ok to send MORE documents rather than less to this method.
* The connector will be connected before this method can be called.
*@param activities is the interface this method should use to perform whatever framework actions are desired.
*@param spec is a document specification (that comes from the job).
*@param seedTime is the end of the time range of documents to consider, exclusive.
*@param lastSeedVersionString is the last seeding version string for this job, or null if the job has no previous seeding version string.
*@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
*@return an updated seeding version string, to be stored with the job.
*/
@Override
public String addSeedDocuments(ISeedingActivity activities, Specification spec,
String lastSeedVersion, long seedTime, int jobMode)
throws ManifoldCFException, ServiceInterruption
{
// Check the session
getSession();
// Add just the root.
activities.addSeedDocument("/");
return "";
}
protected static final String[] attachmentDataNames = new String[]{"createdDate","modifiedDate","accessTokens","denyTokens","url","guids"};
/** Process a set of documents.
* This is the method that should cause each document to be fetched, processed, and the results either added
* to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
* The document specification allows this class to filter what is done based on the job.
* The connector will be connected before this method can be called.
*@param documentIdentifiers is the set of document identifiers to process.
*@param statuses are the currently-stored document versions for each document in the set of document identifiers
* passed in above.
*@param activities is the interface this method should use to queue up new document references
* and ingest documents.
*@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
*@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
*/
@Override
public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
throws ManifoldCFException, ServiceInterruption
{
// Get the forced acls. (We need this only for the case where documents have their own acls)
String[] forcedAcls = getAcls(spec);
SystemMetadataDescription sDesc = new SystemMetadataDescription(spec);
// Look at the metadata attributes.
// So that the version strings are comparable, we will put them in an array first, and sort them.
String pathAttributeName = null;
MatchMap matchMap = new MatchMap();
int i = 0;
while (i < spec.getChildCount())
{
SpecificationNode n = spec.getChild(i++);
if (n.getType().equals("pathnameattribute"))
pathAttributeName = n.getAttributeValue("value");
else if (n.getType().equals("pathmap"))
{
// Path mapping info also needs to be looked at, because it affects what is
// ingested.
String pathMatch = n.getAttributeValue("match");
String pathReplace = n.getAttributeValue("replace");
matchMap.appendMatchPair(pathMatch,pathReplace);
}
}
// Calculate the part of the version string that comes from path name and mapping.
// This starts with = since ; is used by another optional component (the forced acls)
StringBuilder pathNameAttributeVersion = new StringBuilder();
if (pathAttributeName != null)
pathNameAttributeVersion.append("=").append(pathAttributeName).append(":").append(matchMap);
for (String documentIdentifier : documentIdentifiers)
{
// Check if we should abort
activities.checkJobStillActive();
getSession();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Getting version of '" + documentIdentifier + "'");
if ( documentIdentifier.startsWith("D") || documentIdentifier.startsWith("S") )
{
// Old-style document identifier. We don't recognize these anymore, so signal deletion.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Removing old-style document identifier '"+documentIdentifier+"'");
activities.deleteDocument(documentIdentifier);
continue;
}
else if (documentIdentifier.startsWith("/"))
{
// New-style document identifier. A double-slash marks the separation between the library and folder/file levels.
// A triple-slash marks the separation between a list name and list row ID.
int dListSeparatorIndex = documentIdentifier.indexOf("///");
int dLibSeparatorIndex = documentIdentifier.indexOf("//");
if (dListSeparatorIndex != -1)
{
// === List-style identifier ===
if (dListSeparatorIndex == documentIdentifier.length() - 3)
{
// == List path! ==
if (!checkIncludeList(documentIdentifier.substring(0,documentIdentifier.length()-3),spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List specification no longer includes list '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// Version string for a list
String versionString = "";
// Chained connectors always scan parent nodes, so they don't bother setting a version
String siteListPath = documentIdentifier.substring(0,documentIdentifier.length()-3);
int listCutoff = siteListPath.lastIndexOf( "/" );
String site = siteListPath.substring(0,listCutoff);
String listName = siteListPath.substring( listCutoff + 1 );
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a list: '" + siteListPath + "'" );
String listID = proxy.getListID( encodePath(site), site, listName );
if (listID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: GUID lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(site);
// Get the list's fields
Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, listID );
if (fieldNames == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Field list lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
// Note well: There's been a lot of back-and forth about this code.
// See CONNECTORS-1324.
// The fieldNames map returned by proxy.getFieldList() has the internal name as a key, and the display name as a value.
// Since we want the complete list of fields here, by *internal* name, we iterate over the keySet(), not the values.
String[] fields = new String[fieldNames.size()];
int j = 0;
for (String field : fieldNames.keySet())
{
fields[j++] = field;
}
String[] accessTokens;
String[] denyTokens;
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length != 0)
{
// Forced security
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security enabled, native security
accessTokens = proxy.getACLs( encodedSitePath, listID, activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Access token lookup failed for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
ListItemStream fs = new ListItemStream( activities, encodedServerLocation, site, siteListPath, spec,
documentIdentifier, accessTokens, denyTokens, listID, fields );
boolean success = proxy.getChildren( fs, encodedSitePath , listID, dspStsWorks );
if (!success)
{
// Site/list no longer exists, so delete entry
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No list found for list '"+siteListPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
activities.noDocument(documentIdentifier,versionString);
}
else
{
// == List item or attachment path! ==
// Convert the modified document path to an unmodified one, plus a library path.
String decodedListPath = documentIdentifier.substring(0,dListSeparatorIndex);
String itemAndAttachment = documentIdentifier.substring(dListSeparatorIndex+2);
String decodedItemPath = decodedListPath + itemAndAttachment;
int cutoff = decodedListPath.lastIndexOf("/");
String sitePath = decodedListPath.substring(0,cutoff);
String list = decodedListPath.substring(cutoff+1);
String encodedSitePath = encodePath(sitePath);
int attachmentSeparatorIndex = itemAndAttachment.indexOf("//",1);
if (attachmentSeparatorIndex == -1)
{
// == List item path! ==
if (!checkIncludeListItem(decodedItemPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List item '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This file is included, so calculate a version string. This will include metadata info, so get that first.
MetadataInformation metadataInfo = getMetadataSpecification(decodedItemPath,spec);
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] listIDs = activities.retrieveParentData(documentIdentifier, "guids");
String[] listFields = activities.retrieveParentData(documentIdentifier, "fields");
String[] displayURLs = activities.retrieveParentData(documentIdentifier, "displayURLs");
String listID;
if (listIDs.length >= 1)
listID = listIDs[0];
else
listID = null;
String displayURL;
if (displayURLs.length >= 1)
displayURL = displayURLs[0];
else
displayURL = null;
if (listID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because list '"+decodedListPath+"' does not exist - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,listFields);
// Sort access tokens so they are comparable in the version string
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
// Next, get the actual timestamp field for the file.
List<String> metadataDescription = new ArrayList<String>();
metadataDescription.add("Modified");
metadataDescription.add("Created");
metadataDescription.add("ID");
metadataDescription.add("GUID");
// The document path includes the library, with no leading slash, and is decoded.
String decodedItemPathWithoutSite = decodedItemPath.substring(cutoff+1);
Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, listID, "/Lists/" + decodedItemPathWithoutSite, dspStsWorks );
if (values == null) {
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because of bad XML characters(?)");
activities.deleteDocument(documentIdentifier);
continue;
}
String modifiedDate = values.get("Modified");
String createdDate = values.get("Created");
String id = values.get("ID");
String guid = values.get("GUID");
if (modifiedDate == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date so we presume it exists.
Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
Date createdDateValue = DateParser.parseISO8601Date(createdDate);
// Build version string
String versionToken = modifiedDate;
// Revamped version string on 9/21/2013 to make parseability better
StringBuilder sb = new StringBuilder();
packList(sb,sortedMetadataFields,'+');
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
pack(sb,id,'+');
pack(sb,guid,'+');
pack(sb,displayURL,'+');
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
// Added 9/7/07
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
// Before we index, we queue up any attachments
// Now, do any queuing that is needed.
if (attachmentsSupported)
{
String itemNumber = id;
List<NameValue> attachmentNames = proxy.getAttachmentNames( sitePath, listID, itemNumber );
// Now, queue up each attachment as a separate entry
for (NameValue attachmentName : attachmentNames)
{
// For attachments, we use the carry-down feature to get the data where we need it. That's why
// we unpacked the version information early above.
// No check for inclusion; if the list item is included, so is this
String[][] dataValues = new String[attachmentDataNames.length][];
if (createdDateValue == null)
dataValues[0] = new String[0];
else
dataValues[0] = new String[]{new Long(createdDateValue.getTime()).toString()};
if (modifiedDateValue == null)
dataValues[1] = new String[0];
else
dataValues[1] = new String[]{new Long(modifiedDateValue.getTime()).toString()};
if (accessTokens == null)
dataValues[2] = new String[0];
else
dataValues[2] = accessTokens;
if (denyTokens == null)
dataValues[3] = new String[0];
else
dataValues[3] = denyTokens;
dataValues[4] = new String[]{attachmentName.getPrettyName()};
dataValues[5] = new String[]{guid};
activities.addDocumentReference(documentIdentifier + "//" + attachmentName.getValue(),
documentIdentifier, null, attachmentDataNames, dataValues);
}
}
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// Convert the modified document path to an unmodified one, plus a library path.
String encodedItemPath = encodePath(decodedListPath.substring(0,cutoff) + "/Lists/" + decodedItemPath.substring(cutoff+1));
// Generate the URL we are going to use
String itemUrl = serverUrl + displayURL; //fileBaseUrl + encodedItemPath;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Processing list item '"+documentIdentifier+"'; url: '" + itemUrl + "'" );
// Fetch the metadata we will be indexing
Map<String,String> metadataValues = null;
if (sortedMetadataFields.length > 0)
{
metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), listID, "/Lists/" + decodedItemPath.substring(cutoff+1), dspStsWorks );
if (metadataValues == null)
{
// Item has vanished
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Item metadata fetch failure indicated that item is gone: '"+documentIdentifier+"' - removing");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","List item metadata is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
}
if (!activities.checkLengthIndexable(0L))
{
// Document too long (should never happen; length is 0)
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,activities.EXCLUDED_LENGTH,"List item excluded due to content length (0)",null);
activities.noDocument( documentIdentifier, versionString );
continue;
}
InputStream is = new ByteArrayInputStream(new byte[0]);
try
{
RepositoryDocument data = new RepositoryDocument();
data.setBinary( is, 0L );
if (modifiedDateValue != null)
data.setModifiedDate(modifiedDateValue);
if (createdDateValue != null)
data.setCreatedDate(createdDateValue);
setDataACLs(data,accessTokens,denyTokens);
setPathAttribute(data,sDesc,documentIdentifier);
if (metadataValues != null)
{
Iterator<String> iter = metadataValues.keySet().iterator();
while (iter.hasNext())
{
String fieldName = iter.next();
String fieldData = metadataValues.get(fieldName);
data.addField(fieldName,fieldData);
}
}
data.addField("GUID",guid);
try
{
activities.ingestDocumentWithException( documentIdentifier, versionString, itemUrl , data );
}
catch (IOException e)
{
handleIOException(e,"reading document");
}
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
handleIOException(e,"closing stream");
}
}
}
else
{
// == List item attachment path! ==
if (!checkIncludeListItemAttachment(decodedItemPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List item attachment '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// To save work, we retrieve most of what we need in version info from the parent.
// Retrieve modified and created dates
String[] modifiedDateSet = activities.retrieveParentData(documentIdentifier, "modifiedDate");
String[] createdDateSet = activities.retrieveParentData(documentIdentifier, "createdDate");
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] urlSet = activities.retrieveParentData(documentIdentifier, "url");
// Only one modifiedDate and createdDate can be used. If there's more than one, just pick one - the item will be reindexed
// anyhow.
String modifiedDate;
if (modifiedDateSet.length >= 1)
modifiedDate = modifiedDateSet[0];
else
modifiedDate = null;
String createdDate;
if (createdDateSet.length >= 1)
createdDate = createdDateSet[0];
else
createdDate = null;
String url;
if (urlSet.length >=1)
url = urlSet[0];
else
url = null;
// If we have no modified or created date, it means that the parent has gone away, so we go away too.
if (modifiedDate == null || url == null)
{
// Can't look up list ID, which means the list is gone, so delete
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because modified date or attachment url not found");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date so we presume it exists.
Date modifiedDateValue;
if (modifiedDate != null)
modifiedDateValue = new Date(new Long(modifiedDate).longValue());
else
modifiedDateValue = null;
Date createdDateValue;
if (createdDate != null)
createdDateValue = new Date(new Long(createdDate).longValue());
else
createdDateValue = null;
// Build version string
String versionToken = modifiedDate;
StringBuilder sb = new StringBuilder();
// Pack the URL to get the data from
pack(sb,url,'+');
// Do the acls. If we get this far, we are guaranteed to have them, but we need to sort.
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// We need the list ID, which we've already fetched, so grab that from the parent data.
String[] guids = activities.retrieveParentData(documentIdentifier, "guids");
String guid;
if (guids.length >= 1)
guid = guids[0];
else
guid = null;
if (guid == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Skipping attachment '"+documentIdentifier+"' because no parent guid found");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOGUID","List item attachment GUID is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
int lastIndex = url.lastIndexOf("/");
guid = guid + ":" + url.substring(lastIndex+1);
// Fetch and index. This also filters documents based on output connector restrictions.
String fileUrl = serverUrl + encodePath(url);
String fetchUrl = fileUrl;
fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, fetchUrl,
accessTokens, denyTokens, createdDateValue, modifiedDateValue, null, guid, sDesc);
}
}
}
else if (dLibSeparatorIndex != -1)
{
// === Library-style identifier ===
if (dLibSeparatorIndex == documentIdentifier.length() - 2)
{
// Library path!
if (!checkIncludeLibrary(documentIdentifier.substring(0,documentIdentifier.length()-2),spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library specification no longer includes library '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This is the path for the library: No versioning
String versionString = "";
// Chained document parents are always rescanned
String siteLibPath = documentIdentifier.substring(0,documentIdentifier.length()-2);
int libCutoff = siteLibPath.lastIndexOf( "/" );
String site = siteLibPath.substring(0,libCutoff);
String libName = siteLibPath.substring( libCutoff + 1 );
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a library: '" + siteLibPath + "'" );
String libID = proxy.getDocLibID( encodePath(site), site, libName );
if (libID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: GUID lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(site);
// Get the lib's fields
Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, libID );
if (fieldNames == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Field list lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
// See CONNECTORS-1324. We want internal field names only,
// which are the keys of the map.
String[] fields = new String[fieldNames.size()];
int j = 0;
for (String field : fieldNames.keySet())
{
fields[j++] = field;
}
String[] accessTokens;
String[] denyTokens;
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length != 0)
{
// Forced security
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security enabled, native security
accessTokens = proxy.getACLs( encodedSitePath, libID, activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Access token lookup failed for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
FileStream fs = new FileStream( activities, encodedServerLocation, site, siteLibPath, spec,
documentIdentifier, accessTokens, denyTokens, libID, fields );
boolean success = proxy.getChildren( fs, encodedSitePath , libID, dspStsWorks );
if (!success)
{
// Site/library no longer exists, so delete entry
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No list found for library '"+siteLibPath+"' - deleting");
activities.deleteDocument(documentIdentifier);
continue;
}
activities.noDocument(documentIdentifier,versionString);
}
else
{
// == Document path ==
// Convert the modified document path to an unmodified one, plus a library path.
String decodedLibPath = documentIdentifier.substring(0,dLibSeparatorIndex);
String decodedDocumentPath = decodedLibPath + documentIdentifier.substring(dLibSeparatorIndex+1);
if (!checkIncludeFile(decodedDocumentPath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' is no longer included - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
// This file is included, so calculate a version string. This will include metadata info, so get that first.
MetadataInformation metadataInfo = getMetadataSpecification(decodedDocumentPath,spec);
int lastIndex = decodedLibPath.lastIndexOf("/");
String sitePath = decodedLibPath.substring(0,lastIndex);
String lib = decodedLibPath.substring(lastIndex+1);
// Retrieve the carry-down data we will be using.
// Note well: for sharepoint versions that include document/folder acls, these access tokens will be ignored,
// but they will still be carried down nonetheless, in case someone switches versions on us.
String[] accessTokens = activities.retrieveParentData(documentIdentifier, "accessTokens");
String[] denyTokens = activities.retrieveParentData(documentIdentifier, "denyTokens");
String[] libIDs = activities.retrieveParentData(documentIdentifier, "guids");
String[] libFields = activities.retrieveParentData(documentIdentifier, "fields");
String libID;
if (libIDs.length >= 1)
libID = libIDs[0];
else
libID = null;
if (libID == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because library '"+decodedLibPath+"' does not exist - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
String encodedSitePath = encodePath(sitePath);
// Get the fields we want (internal field names only at this point), given the list of all fields (internal field names again).
String[] sortedMetadataFields = getInterestingFieldSetSorted(metadataInfo,libFields);
// Sort access tokens
java.util.Arrays.sort(accessTokens);
java.util.Arrays.sort(denyTokens);
// Next, get the actual timestamp field for the file.
List<String> metadataDescription = new ArrayList<String>();
metadataDescription.add("Last_x0020_Modified");
metadataDescription.add("Modified");
metadataDescription.add("Created");
metadataDescription.add("GUID");
// The document path includes the library, with no leading slash, and is decoded.
int cutoff = decodedLibPath.lastIndexOf("/");
String decodedDocumentPathWithoutSite = decodedDocumentPath.substring(cutoff);
Map<String,String> values = proxy.getFieldValues( metadataDescription.toArray(new String[0]), encodedSitePath, libID, decodedDocumentPathWithoutSite, dspStsWorks );
if (values == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has bad characters(?)");
activities.deleteDocument(documentIdentifier);
continue;
}
String modifiedDate = values.get("Modified");
String createdDate = values.get("Created");
String guid = values.get("GUID");
String modifyDate = values.get("Last_x0020_Modified");
if (modifyDate == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Can't get version of '"+documentIdentifier+"' because it has no modify date");
activities.deleteDocument(documentIdentifier);
continue;
}
// Item has a modified date, so we presume it exists
Date modifiedDateValue = DateParser.parseISO8601Date(modifiedDate);
Date createdDateValue = DateParser.parseISO8601Date(createdDate);
// Build version string
String versionToken = modifyDate;
if (supportsItemSecurity)
{
// Do the acls.
if (forcedAcls == null)
{
// Security is off
accessTokens = new String[0];
denyTokens = new String[0];
}
else if (forcedAcls.length > 0)
{
// Security on, forced acls
accessTokens = forcedAcls;
denyTokens = new String[0];
}
else
{
// Security on, is native
accessTokens = proxy.getDocumentACLs( encodedSitePath, encodePath(decodedDocumentPath), activeDirectoryAuthority );
denyTokens = new String[]{defaultAuthorityDenyToken};
}
}
if (accessTokens == null)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Couldn't get access tokens for item '"+decodedDocumentPath+"'; removing document '"+documentIdentifier+"'");
activities.deleteDocument(documentIdentifier);
continue;
}
// Revamped version string on 9/21/2013 to make parseability better
StringBuilder sb = new StringBuilder();
packList(sb,sortedMetadataFields,'+');
packList(sb,accessTokens,'+');
packList(sb,denyTokens,'+');
packDate(sb,modifiedDateValue);
packDate(sb,createdDateValue);
pack(sb,guid,'+');
// The rest of this is unparseable
sb.append(versionToken);
sb.append(pathNameAttributeVersion);
// Added 9/7/07
sb.append("_").append(fileBaseUrl);
//
String versionString = sb.toString();
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Complete version string for '"+documentIdentifier+"': " + versionString);
if (!activities.checkDocumentNeedsReindexing(documentIdentifier,versionString))
continue;
// Convert the modified document path to an unmodified one, plus a library path.
String encodedDocumentPath = encodePath(decodedDocumentPath);
// Parse what we need out of version string.
// Generate the URL we are going to use
String fileUrl = fileBaseUrl + encodedDocumentPath;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Processing file '"+documentIdentifier+"'; url: '" + fileUrl + "'" );
// First, fetch the metadata we plan to index.
Map<String,String> metadataValues = null;
if (sortedMetadataFields.length > 0)
{
metadataValues = proxy.getFieldValues( sortedMetadataFields, encodePath(sitePath), libID, decodedDocumentPath.substring(cutoff), dspStsWorks );
if (metadataValues == null)
{
// Document has vanished
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document metadata fetch failure indicated that document is gone: '"+documentIdentifier+"' - removing");
activities.recordActivity(null,ACTIVITY_FETCH,null,documentIdentifier,"NOMETADATA","Document metadata is missing",null);
activities.noDocument(documentIdentifier,versionString);
continue;
}
}
// Fetch and index. This also filters documents based on output connector restrictions.
fetchAndIndexFile(activities, documentIdentifier, versionString, fileUrl, serverUrl + encodedServerLocation + encodedDocumentPath,
accessTokens, denyTokens, createdDateValue, modifiedDateValue, metadataValues, guid, sDesc);
}
}
else
{
// === Site-style identifier ===
String sitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
if (sitePath.length() == 0)
sitePath = "/";
if (!checkIncludeSite(sitePath,spec))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site specification no longer includes site '"+documentIdentifier+"' - removing");
activities.deleteDocument(documentIdentifier);
continue;
}
String versionString = "";
activities.noDocument(documentIdentifier,versionString);
// Strip off the trailing "/" to get the site name.
String decodedSitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Document identifier is a site: '" + decodedSitePath + "'" );
// Look at subsites
List<NameValue> subsites = proxy.getSites( encodePath(decodedSitePath) );
if (subsites != null)
{
for (NameValue subSiteName : subsites)
{
String newPath = decodedSitePath + "/" + subSiteName.getValue();
String encodedNewPath = encodePath(newPath);
if ( checkIncludeSite(newPath,spec) )
activities.addDocumentReference(newPath + "/");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access subsites of '"+decodedSitePath+"' - skipping");
}
// Look at libraries
List<NameValue> libraries = proxy.getDocumentLibraries( encodePath(decodedSitePath), decodedSitePath );
if (libraries != null)
{
for (NameValue library : libraries)
{
String newPath = decodedSitePath + "/" + library.getValue();
if (checkIncludeLibrary(newPath,spec))
activities.addDocumentReference(newPath + "//");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access libraries of '"+decodedSitePath+"' - skipping");
}
// Look at lists
List<NameValue> lists = proxy.getLists( encodePath(decodedSitePath), decodedSitePath );
if (lists != null)
{
for (NameValue list : lists)
{
String newPath = decodedSitePath + "/" + list.getValue();
if (checkIncludeList(newPath,spec))
activities.addDocumentReference(newPath + "///");
}
}
else
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: No permissions to access lists of '"+decodedSitePath+"' - skipping");
}
}
}
else
throw new ManifoldCFException("Invalid document identifier discovered: '"+documentIdentifier+"'");
}
}
protected static void packDate(StringBuilder sb, Date dateValue)
{
if (dateValue != null)
{
sb.append("+");
pack(sb,new Long(dateValue.getTime()).toString(),'+');
}
else
sb.append("-");
}
protected static int unpackDate(String value, int index, Date theDate)
{
if (value.length() > index)
{
if (value.charAt(index++) == '+')
{
StringBuilder sb = new StringBuilder();
index = unpack(sb,value,index,'+');
if (sb.length() > 0)
{
theDate.setTime(new Long(sb.toString()).longValue());
}
}
}
return index;
}
protected String[] getInterestingFieldSetSorted(MetadataInformation metadataInfo, String[] allFields)
{
Set<String> metadataFields = new HashSet<String>();
// Figure out the actual metadata fields we will request
if (metadataInfo.getAllMetadata())
{
for (String field : allFields)
{
metadataFields.add(field);
}
}
else
{
String[] fields = metadataInfo.getMetadataFields();
for (String field : fields)
{
metadataFields.add(field);
}
}
// Convert the hashtable to an array and sort it.
String[] sortedMetadataFields = new String[metadataFields.size()];
int z = 0;
for (String field : metadataFields)
{
sortedMetadataFields[z++] = field;
}
java.util.Arrays.sort(sortedMetadataFields);
return sortedMetadataFields;
}
/** Method that fetches and indexes a file fetched from a SharePoint URL, with appropriate error handling
* etc.
*/
protected void fetchAndIndexFile(IProcessActivity activities, String documentIdentifier, String version,
String fileUrl, String fetchUrl, String[] accessTokens, String[] denyTokens, Date createdDate, Date modifiedDate,
Map<String,String> metadataValues, String guid, SystemMetadataDescription sDesc)
throws ManifoldCFException, ServiceInterruption
{
String errorCode = null;
String errorDesc = null;
long startTime = System.currentTimeMillis();
Long fileLengthLong = null;
try
{
// Before we fetch, confirm that the output connector will accept the document
if (!activities.checkURLIndexable(fileUrl))
{
// URL failed
errorCode = activities.EXCLUDED_URL;
errorDesc = "Document rejected because of URL ("+fileUrl+")";
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says URL '"+fileUrl+"' is not indexable");
activities.noDocument(documentIdentifier, version);
return;
}
// Also check mime type
String contentType = mapExtensionToMimeType(documentIdentifier);
if (!activities.checkMimeTypeIndexable(contentType))
{
// Mime type failed
errorCode = activities.EXCLUDED_MIMETYPE;
errorDesc = "Document rejected because of mime type ("+contentType+")";
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says mime type '"+((contentType==null)?"null":contentType)+"' is not indexable");
activities.noDocument(documentIdentifier, version);
return;
}
// Now check date stamp
if (!activities.checkDateIndexable(modifiedDate))
{
// Date failed
errorCode = activities.EXCLUDED_DATE;
errorDesc = "Document rejected because of date ("+modifiedDate+")";
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Skipping document '"+documentIdentifier+"' because output connector says date '"+((modifiedDate==null)?"null":modifiedDate)+"' is not indexable");
activities.noDocument(documentIdentifier, version);
return;
}
// Set stuff up for fetch activity logging
try
{
// Read the document into a local temporary file, so I get a reliable length.
File tempFile = File.createTempFile("__shp__",".tmp");
try
{
// Open the output stream
OutputStream os = new FileOutputStream(tempFile);
try
{
// Catch all exceptions having to do with reading the document
try
{
ExecuteMethodThread emt = new ExecuteMethodThread(httpClient, fetchUrl, os);
emt.start();
int returnCode = emt.finishUp();
if (returnCode == 404 || returnCode == 401 || returnCode == 400 || returnCode == 415)
{
// Well, sharepoint thought the document was there, but it really isn't, so delete it.
errorCode = "DOCUMENTNOTFOUND";
errorDesc = "Document not found; HTTP code "+returnCode;
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document at '"+fileUrl+"' failed to fetch with code "+Integer.toString(returnCode)+", deleting");
activities.noDocument(documentIdentifier, version);
return;
}
else if (returnCode != 200)
{
errorCode = "UNKNOWNHTTPCODE";
errorDesc = "Unknown HTTP return code "+returnCode;
throw new ManifoldCFException("Error fetching document '"+fileUrl+"': "+Integer.toString(returnCode));
}
}
catch (InterruptedException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (java.net.SocketTimeoutException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
Logging.connectors.warn("SharePoint: SocketTimeoutException thrown: "+e.getMessage(),e);
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
catch (org.apache.http.conn.ConnectTimeoutException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
Logging.connectors.warn("SharePoint: ConnectTimeoutException thrown: "+e.getMessage(),e);
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IllegalArgumentException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
Logging.connectors.error("SharePoint: Illegal argument: "+e.getMessage(), e);
throw new ManifoldCFException("SharePoint: Illegal argument: "+e.getMessage(),e);
}
catch (org.apache.http.HttpException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
Logging.connectors.warn("SharePoint: HttpException thrown: "+e.getMessage(),e);
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
catch (IOException e)
{
errorCode = e.getClass().getSimpleName().toUpperCase(Locale.ROOT);
errorDesc = e.getMessage();
Logging.connectors.warn("SharePoint: IOException thrown: "+e.getMessage(),e);
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to read '"+fileUrl+"', retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
}
finally
{
os.close();
}
// Ingest the document
long documentLength = tempFile.length();
if (!activities.checkLengthIndexable(documentLength))
{
// Document too long
errorCode = activities.EXCLUDED_LENGTH;
errorDesc = "Document excluded due to length ("+documentLength+")";
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Document '"+documentIdentifier+"' was too long, according to output connector");
activities.noDocument(documentIdentifier, version);
return;
}
InputStream is = new FileInputStream(tempFile);
try
{
RepositoryDocument data = new RepositoryDocument();
data.setBinary( is, documentLength );
data.setFileName(mapToFileName(documentIdentifier));
if (contentType != null)
data.setMimeType(contentType);
setDataACLs(data,accessTokens,denyTokens);
setPathAttribute(data,sDesc,documentIdentifier);
if (modifiedDate != null)
data.setModifiedDate(modifiedDate);
if (createdDate != null)
data.setCreatedDate(createdDate);
if (metadataValues != null)
{
Iterator<String> iter = metadataValues.keySet().iterator();
while (iter.hasNext())
{
String fieldName = iter.next();
String fieldData = metadataValues.get(fieldName);
data.addField(fieldName,fieldData);
}
}
data.addField("GUID",guid);
try
{
activities.ingestDocumentWithException( documentIdentifier, version, fileUrl , data );
errorCode = "OK";
fileLengthLong = new Long(documentLength);
}
catch (IOException e)
{
handleIOException(e,"reading document");
}
return;
}
finally
{
try
{
is.close();
}
catch (java.net.SocketTimeoutException e)
{
// This is not fatal
Logging.connectors.debug("SharePoint: Timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
}
catch (org.apache.http.conn.ConnectTimeoutException e)
{
// This is not fatal
Logging.connectors.debug("SharePoint: Connect timeout before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IOException e)
{
// This is not fatal
Logging.connectors.debug("SharePoint: Server closed connection before read could finish for '"+fileUrl+"': "+e.getMessage(),e);
}
}
}
finally
{
tempFile.delete();
}
}
catch (java.net.SocketTimeoutException e)
{
throw new ManifoldCFException("Socket timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
}
catch (org.apache.http.conn.ConnectTimeoutException e)
{
throw new ManifoldCFException("Connect timeout error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
}
catch (InterruptedIOException e)
{
throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
}
catch (IOException e)
{
throw new ManifoldCFException("IO error writing '"+fileUrl+"' to temporary file: "+e.getMessage(),e);
}
}
catch (ManifoldCFException e)
{
if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
errorCode = null;
throw e;
}
finally
{
if (errorCode != null)
activities.recordActivity(new Long(startTime),ACTIVITY_FETCH,
fileLengthLong,documentIdentifier,errorCode,errorDesc,null);
}
}
protected static void handleIOException(IOException e, String context)
throws ManifoldCFException, ServiceInterruption
{
if (e instanceof java.net.SocketTimeoutException)
{
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to "+context+", retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
else if (e instanceof org.apache.http.conn.ConnectTimeoutException)
{
long currentTime = System.currentTimeMillis();
throw new ServiceInterruption("SharePoint is down attempting to "+context+", retrying: "+e.getMessage(),e,currentTime + 300000L,
currentTime + 12 * 60 * 60000L,-1,true);
}
else if (e instanceof InterruptedIOException)
throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
else
throw new ManifoldCFException(e.getMessage(),e);
}
/** Map an extension to a mime type */
protected static String mapExtensionToMimeType(String fileName)
{
int slashIndex = fileName.lastIndexOf("/");
if (slashIndex != -1)
fileName = fileName.substring(slashIndex+1);
int dotIndex = fileName.lastIndexOf(".");
if (dotIndex == -1)
return null;
return ExtensionMimeMap.mapToMimeType(fileName.substring(dotIndex+1));
}
/** Map document identifier to file name */
protected static String mapToFileName(String fileName)
{
int slashIndex = fileName.lastIndexOf("/");
if (slashIndex != -1)
fileName = fileName.substring(slashIndex+1);
return fileName;
}
protected static void setDataACLs(RepositoryDocument data, String[] acls, String[] denyAcls)
{
if (acls != null)
{
if (Logging.connectors.isDebugEnabled())
{
StringBuilder sb = new StringBuilder("SharePoint: Acls: [ ");
for (String acl : acls)
{
sb.append(acl).append(" ");
}
sb.append("]");
Logging.connectors.debug( sb.toString() );
}
data.setSecurityACL( RepositoryDocument.SECURITY_TYPE_DOCUMENT, acls );
}
if (denyAcls != null)
{
if (Logging.connectors.isDebugEnabled())
{
StringBuilder sb = new StringBuilder("SharePoint: DenyAcls: [ ");
for (String denyAcl : denyAcls)
{
sb.append(denyAcl).append(" ");
}
sb.append("]");
Logging.connectors.debug( sb.toString() );
}
data.setSecurityDenyACL( RepositoryDocument.SECURITY_TYPE_DOCUMENT, denyAcls);
}
}
protected static void setPathAttribute(RepositoryDocument data, SystemMetadataDescription sDesc, String documentIdentifier)
throws ManifoldCFException
{
// Add the path metadata item into the mix, if enabled
String pathAttributeName = sDesc.getPathAttributeName();
if (pathAttributeName != null && pathAttributeName.length() > 0)
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Path attribute name is '"+pathAttributeName+"'");
String pathString = sDesc.getPathAttributeValue(documentIdentifier);
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Path attribute value is '"+pathString+"'");
data.addField(pathAttributeName,pathString);
}
else
Logging.connectors.debug("SharePoint: Path attribute name is null");
}
protected final static String[] fileStreamDataNames = new String[]{"accessTokens", "denyTokens", "guids", "fields"};
protected class FileStream implements IFileStream
{
protected final IProcessActivity activities;
protected final Specification spec;
protected final String rootPath;
protected final String sitePath;
protected final String siteLibPath;
// For carry-down
protected final String documentIdentifier;
protected final String[][] dataValues;
public FileStream(IProcessActivity activities, String rootPath, String sitePath, String siteLibPath, Specification spec,
String documentIdentifier, String[] accessTokens, String denyTokens[], String libID, String[] fields)
{
this.activities = activities;
this.spec = spec;
this.rootPath = rootPath;
this.sitePath = sitePath;
this.siteLibPath = siteLibPath;
this.documentIdentifier = documentIdentifier;
this.dataValues = new String[fileStreamDataNames.length][];
this.dataValues[0] = accessTokens;
this.dataValues[1] = denyTokens;
this.dataValues[2] = new String[]{libID};
this.dataValues[3] = fields;
}
@Override
public void addFile(String relPath, String displayURL)
throws ManifoldCFException
{
// First, convert the relative path to a full path
if ( !relPath.startsWith("/") )
{
relPath = rootPath + sitePath + "/" + relPath;
}
// Now, strip away what we don't want - namely, the root path. This makes the path relative to the root.
if ( relPath.startsWith(rootPath) )
{
relPath = relPath.substring(rootPath.length());
if ( checkIncludeFile( relPath, spec ) )
{
// Since the processing for a file needs to know the library path, we need a way to signal the cutoff between library and folder levels.
// The way I've chosen to do this is to use a double slash at that point, as a separator.
if (relPath.startsWith(siteLibPath))
{
// Split at the libpath/file boundary
String modifiedPath = siteLibPath + "/" + relPath.substring(siteLibPath.length());
activities.addDocumentReference( modifiedPath, documentIdentifier, null, fileStreamDataNames, dataValues );
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '"+relPath+"', but expected to see something beginning with '"+siteLibPath+"'");
}
}
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '"+relPath+"', but expected to see something beginning with '"+rootPath+"'");
}
}
}
protected final static String[] listItemStreamDataNames = new String[]{"accessTokens", "denyTokens", "guids", "fields", "displayURLs"};
protected class ListItemStream implements IFileStream
{
protected final IProcessActivity activities;
protected final Specification spec;
protected final String rootPath;
protected final String sitePath;
protected final String siteListPath;
// For carry-down
protected final String documentIdentifier;
protected final String[][] dataValues;
public ListItemStream(IProcessActivity activities, String rootPath, String sitePath, String siteListPath, Specification spec,
String documentIdentifier, String[] accessTokens, String denyTokens[], String listID, String[] fields)
{
this.activities = activities;
this.spec = spec;
this.rootPath = rootPath;
this.sitePath = sitePath;
this.siteListPath = siteListPath;
this.documentIdentifier = documentIdentifier;
this.dataValues = new String[listItemStreamDataNames.length][];
this.dataValues[0] = accessTokens;
this.dataValues[1] = denyTokens;
this.dataValues[2] = new String[]{listID};
this.dataValues[3] = fields;
}
@Override
public void addFile(String relPath, String displayURL)
throws ManifoldCFException
{
// First, convert the relative path to a full path
if ( !relPath.startsWith("/") )
{
relPath = rootPath + sitePath + "/" + relPath;
}
String fullPath = relPath;
// Now, strip away what we don't want - namely, the root path. This makes the path relative to the root.
if ( relPath.startsWith(rootPath) )
{
relPath = relPath.substring(rootPath.length());
if (relPath.startsWith(sitePath))
{
relPath = relPath.substring(sitePath.length());
// Now, strip "Lists" from relPath. If it doesn't start with /Lists/, ignore it.
if (relPath.startsWith("/Lists/"))
{
relPath = sitePath + relPath.substring("/Lists".length());
if ( checkIncludeListItem( relPath, spec ) )
{
if (relPath.startsWith(siteListPath))
{
// Since the processing for a item needs to know the list path, we need a way to signal the cutoff between list and item levels.
// The way I've chosen to do this is to use a triple slash at that point, as a separator.
String modifiedPath = relPath.substring(0,siteListPath.length()) + "//" + relPath.substring(siteListPath.length());
if (displayURL != null)
dataValues[4] = new String[]{displayURL};
else
dataValues[4] = new String[]{fullPath};
activities.addDocumentReference( modifiedPath, documentIdentifier, null, listItemStreamDataNames, dataValues );
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; site path is '"+relPath+"', but expected to see something beginning with '"+siteListPath+"'");
}
}
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; rel path is '"+relPath+"', but expected to see something beginning with '/Lists/'");
}
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; site path is '"+relPath+"', but expected to see something beginning with '"+sitePath+"'");
}
}
else
{
Logging.connectors.warn("SharePoint: Unexpected relPath structure; path is '"+relPath+"', but expected to see something beginning with '"+rootPath+"'");
}
}
}
// UI support methods.
//
// These support methods come in two varieties. The first bunch is involved in setting up connection configuration information. The second bunch
// is involved in presenting and editing document specification information for a job. The two kinds of methods are accordingly treated differently,
// in that the first bunch cannot assume that the current connector object is connected, while the second bunch can. That is why the first bunch
// receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
// method, above).
/** Output the configuration header section.
* This method is called in the head section of the connector's configuration page. Its purpose is to add the required tabs to the list, and to output any
* javascript methods that might be needed by the configuration editing HTML.
*@param threadContext is the local thread context.
*@param out is the output to which any HTML should be sent.
*@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
*@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
*/
@Override
public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out,
Locale locale, ConfigParams parameters, List<String> tabsArray)
throws ManifoldCFException, IOException
{
tabsArray.add(Messages.getString(locale,"SharePointRepository.Server"));
tabsArray.add(Messages.getString(locale,"SharePointRepository.AuthorityType"));
Messages.outputResourceWithVelocity(out,locale,"editConfiguration.js",null);
}
/** Output the configuration body section.
* This method is called in the body section of the connector's configuration page. Its purpose is to present the required form elements for editing.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
* form is "editconnection".
*@param threadContext is the local thread context.
*@param out is the output to which any HTML should be sent.
*@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
*@param tabName is the current tab name.
*/
@Override
public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out,
Locale locale, ConfigParams parameters, String tabName)
throws ManifoldCFException, IOException
{
Map<String,Object> velocityContext = new HashMap<String,Object>();
velocityContext.put("TabName",tabName);
fillInServerTab(velocityContext,out,parameters);
fillInAuthorityTypeTab(velocityContext,out,parameters);
Messages.outputResourceWithVelocity(out,locale,"editConfiguration_Server.html",velocityContext);
Messages.outputResourceWithVelocity(out,locale,"editConfiguration_AuthorityType.html",velocityContext);
}
/** Process a configuration post.
* This method is called at the start of the connector's configuration page, whenever there is a possibility that form data for a connection has been
* posted. Its purpose is to gather form information and modify the configuration parameters accordingly.
* The name of the posted form is "editconnection".
*@param threadContext is the local thread context.
*@param variableContext is the set of variables available from the post, including binary file post information.
*@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
*@return null if all is well, or a string error message if there is an error that should prevent saving of the connection (and cause a redirection to an error page).
*/
@Override
public String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext,
Locale locale, ConfigParams parameters)
throws ManifoldCFException
{
String serverVersion = variableContext.getParameter("serverVersion");
if (serverVersion != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERVERSION,serverVersion);
String serverProtocol = variableContext.getParameter("serverProtocol");
if (serverProtocol != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERPROTOCOL,serverProtocol);
String serverName = variableContext.getParameter("serverName");
if (serverName != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERNAME,serverName);
String serverPort = variableContext.getParameter("serverPort");
if (serverPort != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERPORT,serverPort);
String serverLocation = variableContext.getParameter("serverLocation");
if (serverLocation != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERLOCATION,serverLocation);
String userName = variableContext.getParameter("serverUserName");
if (userName != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERUSERNAME,userName);
String password = variableContext.getParameter("serverPassword");
if (password != null)
parameters.setObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD,variableContext.mapKeyToPassword(password));
String proxyHost = variableContext.getParameter("proxyhost");
if (proxyHost != null)
parameters.setParameter(SharePointConfig.PARAM_PROXYHOST,proxyHost);
String proxyPort = variableContext.getParameter("proxyport");
if (proxyPort != null)
parameters.setParameter(SharePointConfig.PARAM_PROXYPORT,proxyPort);
String proxyUser = variableContext.getParameter("proxyuser");
if (proxyUser != null)
parameters.setParameter(SharePointConfig.PARAM_PROXYUSER,proxyUser);
String proxyPassword = variableContext.getParameter("proxypassword");
if (proxyPassword != null)
parameters.setObfuscatedParameter(SharePointConfig.PARAM_PROXYPASSWORD,variableContext.mapKeyToPassword(proxyPassword));
String proxyDomain = variableContext.getParameter("proxydomain");
if (proxyDomain != null)
parameters.setParameter(SharePointConfig.PARAM_PROXYDOMAIN,proxyDomain);
String keystoreValue = variableContext.getParameter("keystoredata");
if (keystoreValue != null)
parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE,keystoreValue);
String configOp = variableContext.getParameter("configop");
if (configOp != null)
{
if (configOp.equals("Delete"))
{
String alias = variableContext.getParameter("shpkeystorealias");
keystoreValue = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE);
IKeystoreManager mgr;
if (keystoreValue != null)
mgr = KeystoreManagerFactory.make("",keystoreValue);
else
mgr = KeystoreManagerFactory.make("");
mgr.remove(alias);
parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE,mgr.getString());
}
else if (configOp.equals("Add"))
{
String alias = IDFactory.make(threadContext);
byte[] certificateValue = variableContext.getBinaryBytes("shpcertificate");
keystoreValue = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE);
IKeystoreManager mgr;
if (keystoreValue != null)
mgr = KeystoreManagerFactory.make("",keystoreValue);
else
mgr = KeystoreManagerFactory.make("");
java.io.InputStream is = new java.io.ByteArrayInputStream(certificateValue);
String certError = null;
try
{
mgr.importCertificate(alias,is);
}
catch (Throwable e)
{
certError = e.getMessage();
}
finally
{
try
{
is.close();
}
catch (IOException e)
{
// Don't report anything
}
}
if (certError != null)
{
// Redirect to error page
return "Illegal certificate: "+certError;
}
parameters.setParameter(SharePointConfig.PARAM_SERVERKEYSTORE,mgr.getString());
}
}
String authorityType = variableContext.getParameter("authorityType");
if (authorityType != null)
parameters.setParameter(SharePointConfig.PARAM_AUTHORITYTYPE,authorityType);
return null;
}
/** View configuration.
* This method is called in the body section of the connector's view configuration page. Its purpose is to present the connection information to the user.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
*@param threadContext is the local thread context.
*@param out is the output to which any HTML should be sent.
*@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
*/
@Override
public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
Locale locale, ConfigParams parameters)
throws ManifoldCFException, IOException
{
Map<String,Object> velocityContext = new HashMap<String,Object>();
fillInServerTab(velocityContext,out,parameters);
fillInAuthorityTypeTab(velocityContext,out,parameters);
Messages.outputResourceWithVelocity(out,locale,"viewConfiguration.html",velocityContext);
}
protected static void fillInAuthorityTypeTab(Map<String,Object> velocityContext, IHTTPOutput out, ConfigParams parameters)
throws ManifoldCFException
{
// Default to Active Directory, for backwards compatibility
String authorityType = parameters.getParameter(SharePointConfig.PARAM_AUTHORITYTYPE);
if (authorityType == null)
authorityType = "ActiveDirectory";
velocityContext.put("AUTHORITYTYPE", authorityType);
}
protected static void fillInServerTab(Map<String,Object> velocityContext, IHTTPOutput out, ConfigParams parameters)
throws ManifoldCFException
{
String serverVersion = parameters.getParameter(SharePointConfig.PARAM_SERVERVERSION);
if (serverVersion == null)
serverVersion = "4.0";
String serverProtocol = parameters.getParameter(SharePointConfig.PARAM_SERVERPROTOCOL);
if (serverProtocol == null)
serverProtocol = "http";
String serverName = parameters.getParameter(SharePointConfig.PARAM_SERVERNAME);
if (serverName == null)
serverName = "localhost";
String serverPort = parameters.getParameter(SharePointConfig.PARAM_SERVERPORT);
if (serverPort == null)
serverPort = "";
String serverLocation = parameters.getParameter(SharePointConfig.PARAM_SERVERLOCATION);
if (serverLocation == null)
serverLocation = "";
String userName = parameters.getParameter(SharePointConfig.PARAM_SERVERUSERNAME);
if (userName == null)
userName = "";
String password = parameters.getObfuscatedParameter(SharePointConfig.PARAM_SERVERPASSWORD);
if (password == null)
password = "";
else
password = out.mapPasswordToKey(password);
String keystore = parameters.getParameter(SharePointConfig.PARAM_SERVERKEYSTORE);
IKeystoreManager localKeystore;
if (keystore == null)
localKeystore = KeystoreManagerFactory.make("");
else
localKeystore = KeystoreManagerFactory.make("",keystore);
List<Map<String,String>> certificates = new ArrayList<Map<String,String>>();
String[] contents = localKeystore.getContents();
for (String alias : contents)
{
String description = localKeystore.getDescription(alias);
if (description.length() > 128)
description = description.substring(0,125) + "...";
Map<String,String> certificate = new HashMap<String,String>();
certificate.put("ALIAS", alias);
certificate.put("DESCRIPTION", description);
certificates.add(certificate);
}
String proxyHost = parameters.getParameter(SharePointConfig.PARAM_PROXYHOST);
if (proxyHost == null)
proxyHost = "";
String proxyPort = parameters.getParameter(SharePointConfig.PARAM_PROXYPORT);
if (proxyPort == null)
proxyPort = "";
String proxyUser = parameters.getParameter(SharePointConfig.PARAM_PROXYUSER);
if (proxyUser == null)
proxyUser = "";
String proxyPassword = parameters.getObfuscatedParameter(SharePointConfig.PARAM_PROXYPASSWORD);
if (proxyPassword == null)
proxyPassword = "";
else
proxyPassword = out.mapPasswordToKey(proxyPassword);
String proxyDomain = parameters.getParameter(SharePointConfig.PARAM_PROXYDOMAIN);
if (proxyDomain == null)
proxyDomain = "";
// Fill in context
velocityContext.put("SERVERVERSION", serverVersion);
velocityContext.put("SERVERPROTOCOL", serverProtocol);
velocityContext.put("SERVERNAME", serverName);
velocityContext.put("SERVERPORT", serverPort);
velocityContext.put("SERVERLOCATION", serverLocation);
velocityContext.put("SERVERUSERNAME", userName);
velocityContext.put("SERVERPASSWORD", password);
if (keystore != null)
velocityContext.put("KEYSTORE", keystore);
velocityContext.put("CERTIFICATELIST", certificates);
velocityContext.put("PROXYHOST", proxyHost);
velocityContext.put("PROXYPORT", proxyPort);
velocityContext.put("PROXYUSER", proxyUser);
velocityContext.put("PROXYPASSWORD", proxyPassword);
velocityContext.put("PROXYDOMAIN", proxyDomain);
}
/** Output the specification header section.
* This method is called in the head section of a job page which has selected a repository connection of the
* current type. Its purpose is to add the required tabs to the list, and to output any javascript methods
* that might be needed by the job editing HTML.
* The connector will be connected before this method can be called.
*@param out is the output to which any HTML should be sent.
*@param locale is the locale the output is preferred to be in.
*@param ds is the current document specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
*/
@Override
public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification ds,
int connectionSequenceNumber, List<String> tabsArray)
throws ManifoldCFException, IOException
{
tabsArray.add(Messages.getString(locale,"SharePointRepository.Paths"));
tabsArray.add(Messages.getString(locale,"SharePointRepository.Security"));
tabsArray.add(Messages.getString(locale,"SharePointRepository.Metadata"));
Map<String,Object> velocityContext = new HashMap<String,Object>();
velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber));
Messages.outputResourceWithVelocity(out,locale,"editSpecification.js",velocityContext);
}
/** Output the specification body section.
* This method is called in the body section of a job page which has selected a repository connection of the
* current type. Its purpose is to present the required form elements for editing.
* The coder can presume that the HTML that is output from this configuration will be within appropriate
* <html>, <body>, and <form> tags. The name of the form is always "editjob".
* The connector will be connected before this method can be called.
*@param out is the output to which any HTML should be sent.
*@param locale is the locale the output is preferred to be in.
*@param ds is the current document specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@param actualSequenceNumber is the connection within the job that has currently been selected.
*@param tabName is the current tab name. (actualSequenceNumber, tabName) form a unique tuple within
* the job.
*/
@Override
public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification ds,
int connectionSequenceNumber, int actualSequenceNumber, String tabName)
throws ManifoldCFException, IOException
{
Map<String,Object> velocityContext = new HashMap<String,Object>();
velocityContext.put("TabName",tabName);
velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber));
velocityContext.put("SelectedNum", Integer.toString(actualSequenceNumber));
fillInSecurityTab(velocityContext,out,ds);
fillInPathsTab(velocityContext,out,ds);
fillInMetadataTab(velocityContext,out,ds);
// Now, do the part of the tabs that requires context logic
if (tabName.equals(Messages.getString(locale,"SharePointRepository.Paths")))
fillInTransientPathsInfo(velocityContext,connectionSequenceNumber);
else if (tabName.equals(Messages.getString(locale,"SharePointRepository.Metadata")))
fillInTransientMetadataInfo(velocityContext,connectionSequenceNumber);
Messages.outputResourceWithVelocity(out,locale,"editSpecification_Security.html",velocityContext);
Messages.outputResourceWithVelocity(out,locale,"editSpecification_Paths.html",velocityContext);
Messages.outputResourceWithVelocity(out,locale,"editSpecification_Metadata.html",velocityContext);
}
/** Fill in metadata tab */
protected static void fillInMetadataTab(Map<String,Object> velocityContext, IHTTPOutput out, Specification ds)
{
// Find the path-value metadata attribute name
String pathNameAttribute = "";
MatchMap matchMap = new MatchMap();
List<Map<String,Object>> metadataRules = new ArrayList<Map<String,Object>>();
for (int i = 0; i < ds.getChildCount(); i++)
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("pathnameattribute"))
{
pathNameAttribute = sn.getAttributeValue("value");
}
else if (sn.getType().equals("pathmap"))
{
String pathMatch = sn.getAttributeValue("match");
String pathReplace = sn.getAttributeValue("replace");
matchMap.appendMatchPair(pathMatch,pathReplace);
}
else if (sn.getType().equals("startpoint"))
{
String site = sn.getAttributeValue("site");
String lib = sn.getAttributeValue("lib");
String path = site + "/" + lib + "/*";
String allmetadata = sn.getAttributeValue("allmetadata");
StringBuilder metadataFieldList = new StringBuilder();
List<String> metadataFieldArray = new ArrayList<String>();
if (allmetadata == null || !allmetadata.equals("true"))
{
for (int j = 0; j < sn.getChildCount(); j++)
{
SpecificationNode node = sn.getChild(j);
if (node.getType().equals("metafield"))
{
if (metadataFieldList.length() > 0)
metadataFieldList.append(", ");
String val = node.getAttributeValue("value");
metadataFieldList.append(val);
metadataFieldArray.add(val);
}
}
allmetadata = "false";
}
if (allmetadata.equals("true") || metadataFieldList.length() > 0)
{
Map<String,Object> item = new HashMap<String,Object>();
item.put("THEPATH",path);
item.put("THEACTION","include");
item.put("ALLFLAG",allmetadata);
item.put("FIELDLIST",metadataFieldArray);
item.put("FIELDS",metadataFieldList.toString());
metadataRules.add(item);
}
}
else if (sn.getType().equals("metadatarule"))
{
String path = sn.getAttributeValue("match");
String action = sn.getAttributeValue("action");
String allmetadata = sn.getAttributeValue("allmetadata");
StringBuilder metadataFieldList = new StringBuilder();
List<String> metadataFieldArray = new ArrayList<String>();
if (action.equals("include"))
{
if (allmetadata == null || !allmetadata.equals("true"))
{
for (int j = 0; j < sn.getChildCount(); j++)
{
SpecificationNode node = sn.getChild(j);
if (node.getType().equals("metafield"))
{
String val = node.getAttributeValue("value");
if (metadataFieldList.length() > 0)
metadataFieldList.append(", ");
metadataFieldList.append(val);
metadataFieldArray.add(val);
}
}
allmetadata="false";
}
}
else
allmetadata = "";
Map<String,Object> item = new HashMap<String,Object>();
item.put("THEPATH",path);
item.put("THEACTION",action);
item.put("ALLFLAG",allmetadata);
item.put("FIELDLIST",metadataFieldArray);
item.put("FIELDS",metadataFieldList.toString());
metadataRules.add(item);
}
}
List<Map<String,String>> mapList = new ArrayList<Map<String,String>>();
for (int i = 0; i < matchMap.getMatchCount(); i++)
{
String matchString = matchMap.getMatchString(i);
String replaceString = matchMap.getReplaceString(i);
Map<String,String> item = new HashMap<String,String>();
item.put("MATCH",matchString);
item.put("REPLACE",replaceString);
mapList.add(item);
}
velocityContext.put("PATHNAMEATTRIBUTE",pathNameAttribute);
velocityContext.put("MAPLIST",mapList);
velocityContext.put("METADATARULES",metadataRules);
}
/** Fill in transient metadata info */
protected void fillInTransientMetadataInfo(Map<String,Object> velocityContext, int connectionSequenceNumber)
{
String seqPrefix = "s"+connectionSequenceNumber+"_";
// The following variables may be in the thread context because postspec.jsp put them there:
// (1) "metapath", which contains the rule path as it currently stands;
// (2) "metapathstate", which describes what the current path represents. Values are "unknown", "site", "library".
// (3) "metapathlibrary" is the library or list path (if this is known yet).
// Once the widget is in the state "unknown", it can only be reset, and cannot be further modified
String metaPathSoFar = (String)currentContext.get(seqPrefix+"metapath");
String metaPathState = (String)currentContext.get(seqPrefix+"metapathstate");
String metaPathLibrary = (String)currentContext.get(seqPrefix+"metapathlibrary");
if (metaPathState == null)
metaPathState = "unknown";
if (metaPathSoFar == null)
{
metaPathSoFar = "/";
metaPathState = "site";
}
String message = null;
List<NameValue> fieldList = null;
if (metaPathLibrary != null)
{
// Look up metadata fields
int index = metaPathLibrary.lastIndexOf("/");
String site = metaPathLibrary.substring(0,index);
String libOrList = metaPathLibrary.substring(index+1);
Map<String,String> metaFieldList = null;
try
{
if (metaPathState.equals("library") || metaPathState.equals("file"))
metaFieldList = getLibFieldList(site,libOrList);
else if (metaPathState.equals("list"))
metaFieldList = getListFieldList(site,libOrList);
}
catch (ManifoldCFException e)
{
e.printStackTrace();
message = e.getMessage();
}
catch (ServiceInterruption e)
{
message = "SharePoint unavailable: "+e.getMessage();
}
if (metaFieldList != null)
{
String[] fields = new String[metaFieldList.size()];
int j = 0;
Iterator<String> iter = metaFieldList.keySet().iterator();
while (iter.hasNext())
{
fields[j++] = iter.next();
}
java.util.Arrays.sort(fields);
fieldList = new ArrayList<NameValue>();
for (String field : fields)
{
fieldList.add(new NameValue(field,metaFieldList.get(field)));
}
}
}
// Grab next site list and lib list
List<NameValue> childSiteList = null;
List<NameValue> childLibList = null;
List<NameValue> childListList = null;
if (message == null && metaPathState.equals("site"))
{
try
{
String queryPath = metaPathSoFar;
if (queryPath.equals("/"))
queryPath = "";
childSiteList = getSites(queryPath);
if (childSiteList == null)
{
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
// Illegal path - state becomes "unknown".
metaPathState = "unknown";
metaPathLibrary = null;
}
childLibList = getDocLibsBySite(queryPath);
if (childLibList == null)
{
// Illegal path - state becomes "unknown"
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
metaPathState = "unknown";
metaPathLibrary = null;
}
childListList = getListsBySite(queryPath);
if (childListList == null)
{
// Illegal path - state becomes "unknown"
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
metaPathState = "unknown";
metaPathLibrary = null;
}
}
catch (ManifoldCFException e)
{
Logging.connectors.warn(e.getMessage(),e);
message = e.getMessage();
}
catch (ServiceInterruption e)
{
message = "SharePoint unavailable: "+e.getMessage();
}
}
if (metaPathSoFar != null)
velocityContext.put("METAPATHSOFAR",metaPathSoFar);
if (metaPathState != null)
velocityContext.put("METAPATHSTATE",metaPathState);
if (metaPathLibrary != null)
velocityContext.put("METAPATHLIBRARY",metaPathLibrary);
if (message != null)
velocityContext.put("METAMESSAGE",message);
if (fieldList != null)
velocityContext.put("METAFIELDLIST",fieldList);
if (childSiteList != null)
velocityContext.put("METACHILDSITELIST",childSiteList);
if (childLibList != null)
velocityContext.put("METACHILDLIBLIST",childLibList);
if (childListList != null)
velocityContext.put("METACHILDLISTLIST",childListList);
}
/** Fill in paths tab */
protected static void fillInPathsTab(Map<String,Object> velocityContext, IHTTPOutput out, Specification ds)
{
List<Map<String,String>> rules = new ArrayList<Map<String,String>>();
for (int i = 0; i < ds.getChildCount(); i++)
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("startpoint"))
{
String site = sn.getAttributeValue("site");
String lib = sn.getAttributeValue("lib");
String siteLib = site + "/" + lib + "/";
// Go through all the file/folder rules for the startpoint, and generate new "rules" corresponding to each.
for (int j = 0; j < sn.getChildCount(); j++)
{
SpecificationNode node = sn.getChild(j);
if (node.getType().equals("include") || node.getType().equals("exclude"))
{
String matchPart = node.getAttributeValue("match");
String ruleType = node.getAttributeValue("type");
String theFlavor = node.getType();
String thePath = siteLib + matchPart;
Map<String,String> item = new HashMap<String,String>();
item.put("THEPATH",thePath);
item.put("THETYPE","file");
item.put("THEACTION",theFlavor);
rules.add(item);
if (ruleType.equals("file") && !matchPart.startsWith("*"))
{
thePath = siteLib + "*/" + matchPart;
item = new HashMap<String,String>();
item.put("THEPATH",thePath);
item.put("THETYPE","file");
item.put("THEACTION",theFlavor);
rules.add(item);
}
}
}
}
else if (sn.getType().equals("pathrule"))
{
String match = sn.getAttributeValue("match");
String type = sn.getAttributeValue("type");
String action = sn.getAttributeValue("action");
Map<String,String> item = new HashMap<String,String>();
item.put("THEPATH",match);
item.put("THETYPE",type);
item.put("THEACTION",action);
rules.add(item);
}
}
velocityContext.put("RULES",rules);
}
/** Fill in the transient portion of the Paths tab */
protected void fillInTransientPathsInfo(Map<String,Object> velocityContext, int connectionSequenceNumber)
{
String seqPrefix = "s"+connectionSequenceNumber+"_";
// The following variables may be in the thread context because postspec.jsp put them there:
// (1) "specpath", which contains the rule path as it currently stands;
// (2) "specpathstate", which describes what the current path represents. Values are "unknown", "site", "library", "list".
// Once the widget is in the state "unknown", it can only be reset, and cannot be further modified
// specsitepath may be in the thread context, put there by postspec.jsp
String pathSoFar = (String)currentContext.get(seqPrefix+"specpath");
String pathState = (String)currentContext.get(seqPrefix+"specpathstate");
String pathLibrary = (String)currentContext.get(seqPrefix+"specpathlibrary");
if (pathState == null)
{
pathState = "unknown";
pathLibrary = null;
}
if (pathSoFar == null)
{
pathSoFar = "/";
pathState = "site";
pathLibrary = null;
}
// Grab next site list and lib list
List<NameValue> childSiteList = null;
List<NameValue> childLibList = null;
List<NameValue> childListList = null;
String message = null;
if (pathState.equals("site"))
{
try
{
String queryPath = pathSoFar;
if (queryPath.equals("/"))
queryPath = "";
childSiteList = getSites(queryPath);
if (childSiteList == null)
{
// Illegal path - state becomes "unknown".
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
pathState = "unknown";
pathLibrary = null;
}
childLibList = getDocLibsBySite(queryPath);
if (childLibList == null)
{
// Illegal path - state becomes "unknown"
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
pathState = "unknown";
pathLibrary = null;
}
childListList = getListsBySite(queryPath);
if (childListList == null)
{
// Illegal path - state becomes "unknown"
if (queryPath.length() == 0)
throw new ManifoldCFException("Root site is unreachable, or user has no permissions");
pathState = "unknown";
pathLibrary = null;
}
}
catch (ManifoldCFException e)
{
Logging.connectors.warn(e.getMessage(),e);
message = e.getMessage();
}
catch (ServiceInterruption e)
{
message = "SharePoint unavailable: "+e.getMessage();
}
}
if (pathSoFar != null)
velocityContext.put("PATHSOFAR",pathSoFar);
if (pathState != null)
velocityContext.put("PATHSTATE",pathState);
if (pathLibrary != null)
velocityContext.put("PATHLIBRARY",pathLibrary);
if (message != null)
velocityContext.put("MESSAGE",message);
if (childSiteList != null)
velocityContext.put("CHILDSITELIST",childSiteList);
if (childLibList != null)
velocityContext.put("CHILDLIBLIST",childLibList);
if (childListList != null)
velocityContext.put("CHILDLISTLIST",childListList);
}
/** Fill in security tab */
protected static void fillInSecurityTab(Map<String,Object> velocityContext, IHTTPOutput out, Specification ds)
{
// Security tab
String security = "on";
List<String> accessTokens = new ArrayList<String>();
for (int i = 0; i < ds.getChildCount(); i++)
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("security"))
{
security = sn.getAttributeValue("value");
}
else if (sn.getType().equals("access"))
{
String token = sn.getAttributeValue("token");
accessTokens.add(token);
}
}
velocityContext.put("SECURITY",security);
velocityContext.put("ACCESSTOKENS",accessTokens);
}
/** Process a specification post.
* This method is called at the start of job's edit or view page, whenever there is a possibility that form
* data for a connection has been posted. Its purpose is to gather form information and modify the
* document specification accordingly. The name of the posted form is always "editjob".
* The connector will be connected before this method can be called.
*@param variableContext contains the post data, including binary file-upload information.
*@param locale is the locale the output is preferred to be in.
*@param ds is the current document specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@return null if all is well, or a string error message if there is an error that should prevent saving of
* the job (and cause a redirection to an error page).
*/
@Override
public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds,
int connectionSequenceNumber)
throws ManifoldCFException
{
String seqPrefix = "s"+connectionSequenceNumber+"_";
// Remove old-style rules, but only if the information would not be lost
if (variableContext.getParameter(seqPrefix+"specpathcount") != null && variableContext.getParameter(seqPrefix+"metapathcount") != null)
{
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("startpoint"))
ds.removeChild(i);
else
i++;
}
}
String x = variableContext.getParameter(seqPrefix+"specpathcount");
if (x != null)
{
// Delete all path rule entries first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("pathrule"))
ds.removeChild(i);
else
i++;
}
// Find out how many children were sent
int pathCount = Integer.parseInt(x);
// Gather up these
i = 0;
while (i < pathCount)
{
String pathDescription = "_"+Integer.toString(i);
String pathOpName = seqPrefix+"specop"+pathDescription;
x = variableContext.getParameter(pathOpName);
if (x != null && x.equals("Delete"))
{
// Skip to the next
i++;
continue;
}
// Get the stored information for this rule.
String path = variableContext.getParameter(seqPrefix+"specpath"+pathDescription);
String type = variableContext.getParameter(seqPrefix+"spectype"+pathDescription);
String action = variableContext.getParameter(seqPrefix+"specflav"+pathDescription);
SpecificationNode node = new SpecificationNode("pathrule");
node.setAttribute("match",path);
node.setAttribute("action",action);
node.setAttribute("type",type);
// If there was an insert operation, do it now
if (x != null && x.equals("Insert Here"))
{
// The global parameters are what are used to create the rule
path = variableContext.getParameter(seqPrefix+"specpath");
type = variableContext.getParameter(seqPrefix+"spectype");
action = variableContext.getParameter(seqPrefix+"specflavor");
SpecificationNode sn = new SpecificationNode("pathrule");
sn.setAttribute("match",path);
sn.setAttribute("action",action);
sn.setAttribute("type",type);
ds.addChild(ds.getChildCount(),sn);
}
ds.addChild(ds.getChildCount(),node);
i++;
}
// See if there's a global path rule operation
String op = variableContext.getParameter(seqPrefix+"specop");
if (op != null)
{
if (op.equals("Add"))
{
String match = variableContext.getParameter(seqPrefix+"specpath");
String action = variableContext.getParameter(seqPrefix+"specflavor");
String type = variableContext.getParameter(seqPrefix+"spectype");
SpecificationNode node = new SpecificationNode("pathrule");
node.setAttribute("match",match);
node.setAttribute("action",action);
node.setAttribute("type",type);
ds.addChild(ds.getChildCount(),node);
}
}
// See if there's a global pathbuilder operation
String pathop = variableContext.getParameter(seqPrefix+"specpathop");
if (pathop != null)
{
if (pathop.equals("Reset"))
{
currentContext.save(seqPrefix+"specpath","/");
currentContext.save(seqPrefix+"specpathstate","site");
currentContext.save(seqPrefix+"specpathlibrary",null);
}
else if (pathop.equals("AppendSite"))
{
String path = variableContext.getParameter(seqPrefix+"specpath");
String addon = variableContext.getParameter(seqPrefix+"specsite");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
}
currentContext.save(seqPrefix+"specpath",path);
currentContext.save(seqPrefix+"specpathstate","site");
currentContext.save(seqPrefix+"specpathlibrary",null);
}
else if (pathop.equals("AppendLibrary"))
{
String path = variableContext.getParameter(seqPrefix+"specpath");
String addon = variableContext.getParameter(seqPrefix+"speclibrary");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
currentContext.save(seqPrefix+"specpathstate","library");
currentContext.save(seqPrefix+"specpathlibrary",path);
}
currentContext.save(seqPrefix+"specpath",path);
}
else if (pathop.equals("AppendList"))
{
String path = variableContext.getParameter(seqPrefix+"specpath");
String addon = variableContext.getParameter(seqPrefix+"speclist");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
currentContext.save(seqPrefix+"specpathstate","list");
currentContext.save(seqPrefix+"specpathlibrary",path);
}
currentContext.save(seqPrefix+"specpath",path);
}
else if (pathop.equals("AppendText"))
{
String path = variableContext.getParameter(seqPrefix+"specpath");
String library = variableContext.getParameter(seqPrefix+"specpathlibrary");
String addon = variableContext.getParameter(seqPrefix+"specmatch");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
currentContext.save(seqPrefix+"specpathstate","unknown");
}
currentContext.save(seqPrefix+"specpath",path);
currentContext.save(seqPrefix+"specpathlibrary",library);
}
else if (pathop.equals("Remove"))
{
// Strip off end
String path = variableContext.getParameter(seqPrefix+"specpath");
int index = path.lastIndexOf("/");
path = path.substring(0,index);
if (path.length() == 0)
path = "/";
currentContext.save(seqPrefix+"specpath",path);
// Now, adjust state.
String pathState = variableContext.getParameter(seqPrefix+"specpathstate");
if (pathState.equals("library") || pathState.equals("list"))
pathState = "site";
currentContext.save(seqPrefix+"specpathstate",pathState);
}
}
}
x = variableContext.getParameter(seqPrefix+"metapathcount");
if (x != null)
{
// Delete all metadata rule entries first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("metadatarule"))
ds.removeChild(i);
else
i++;
}
// Find out how many children were sent
int pathCount = Integer.parseInt(x);
// Gather up these
i = 0;
while (i < pathCount)
{
String pathDescription = "_"+Integer.toString(i);
String pathOpName = seqPrefix+"metaop"+pathDescription;
x = variableContext.getParameter(pathOpName);
if (x != null && x.equals("Delete"))
{
// Skip to the next
i++;
continue;
}
// Get the stored information for this rule.
String path = variableContext.getParameter(seqPrefix+"metapath"+pathDescription);
String action = variableContext.getParameter(seqPrefix+"metaflav"+pathDescription);
String allmetadata = variableContext.getParameter(seqPrefix+"metaall"+pathDescription);
String[] metadataFields = variableContext.getParameterValues(seqPrefix+"metafields"+pathDescription);
SpecificationNode node = new SpecificationNode("metadatarule");
node.setAttribute("match",path);
node.setAttribute("action",action);
if (action.equals("include"))
{
if (allmetadata != null)
node.setAttribute("allmetadata",allmetadata);
if (metadataFields != null)
{
int j = 0;
while (j < metadataFields.length)
{
SpecificationNode sn = new SpecificationNode("metafield");
sn.setAttribute("value",metadataFields[j]);
node.addChild(j++,sn);
}
}
}
if (x != null && x.equals("Insert Here"))
{
// Insert the new global rule information now
path = variableContext.getParameter(seqPrefix+"metapath");
action = variableContext.getParameter(seqPrefix+"metaflavor");
allmetadata = variableContext.getParameter(seqPrefix+"metaall");
metadataFields = variableContext.getParameterValues(seqPrefix+"metafields");
SpecificationNode sn = new SpecificationNode("metadatarule");
sn.setAttribute("match",path);
sn.setAttribute("action",action);
if (action.equals("include"))
{
if (allmetadata != null)
node.setAttribute("allmetadata",allmetadata);
if (metadataFields != null)
{
int j = 0;
while (j < metadataFields.length)
{
SpecificationNode node2 = new SpecificationNode("metafield");
node2.setAttribute("value",metadataFields[j]);
sn.addChild(j++,node2);
}
}
}
ds.addChild(ds.getChildCount(),sn);
}
ds.addChild(ds.getChildCount(),node);
i++;
}
// See if there's a global path rule operation
String op = variableContext.getParameter(seqPrefix+"metaop");
if (op != null)
{
if (op.equals("Add"))
{
String match = variableContext.getParameter(seqPrefix+"metapath");
String action = variableContext.getParameter(seqPrefix+"metaflavor");
SpecificationNode node = new SpecificationNode("metadatarule");
node.setAttribute("match",match);
node.setAttribute("action",action);
if (action.equals("include"))
{
String allmetadata = variableContext.getParameter(seqPrefix+"metaall");
String[] metadataFields = variableContext.getParameterValues(seqPrefix+"metafields");
if (allmetadata != null)
node.setAttribute("allmetadata",allmetadata);
if (metadataFields != null)
{
int j = 0;
while (j < metadataFields.length)
{
SpecificationNode sn = new SpecificationNode("metafield");
sn.setAttribute("value",metadataFields[j]);
node.addChild(j++,sn);
}
}
}
ds.addChild(ds.getChildCount(),node);
}
}
// See if there's a global pathbuilder operation
String pathop = variableContext.getParameter(seqPrefix+"metapathop");
if (pathop != null)
{
if (pathop.equals("Reset"))
{
currentContext.save(seqPrefix+"metapath","/");
currentContext.save(seqPrefix+"metapathstate","site");
currentContext.save(seqPrefix+"metapathlibrary",null);
}
else if (pathop.equals("AppendSite"))
{
String path = variableContext.getParameter(seqPrefix+"metapath");
String addon = variableContext.getParameter(seqPrefix+"metasite");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
}
currentContext.save(seqPrefix+"metapath",path);
currentContext.save(seqPrefix+"metapathstate","site");
currentContext.save(seqPrefix+"metapathlibrary",null);
}
else if (pathop.equals("AppendLibrary"))
{
String path = variableContext.getParameter(seqPrefix+"metapath");
String addon = variableContext.getParameter(seqPrefix+"metalibrary");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
currentContext.save(seqPrefix+"metapathstate","library");
currentContext.save(seqPrefix+"metapathlibrary",path);
}
currentContext.save(seqPrefix+"metapath",path);
}
else if (pathop.equals("AppendList"))
{
String path = variableContext.getParameter(seqPrefix+"metapath");
String addon = variableContext.getParameter(seqPrefix+"metalist");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
currentContext.save(seqPrefix+"metapathstate","list");
currentContext.save(seqPrefix+"metapathlibrary",path);
// Automatically add on wildcard for list item part of the match
path += "/*";
}
currentContext.save(seqPrefix+"metapath",path);
}
else if (pathop.equals("AppendText"))
{
String path = variableContext.getParameter(seqPrefix+"metapath");
String library = variableContext.getParameter(seqPrefix+"metapathlibrary");
String addon = variableContext.getParameter(seqPrefix+"metamatch");
if (addon != null && addon.length() > 0)
{
if (path.equals("/"))
path = path + addon;
else
path = path + "/" + addon;
if (library != null)
currentContext.save(seqPrefix+"metapathstate","file");
else
currentContext.save(seqPrefix+"metapathstate","unknown");
}
currentContext.save(seqPrefix+"metapath",path);
currentContext.save(seqPrefix+"metapathlibrary",library);
}
else if (pathop.equals("Remove"))
{
String pathState = variableContext.getParameter(seqPrefix+"metapathstate");
String path;
if (pathState.equals("file"))
{
pathState = "library";
path = variableContext.getParameter(seqPrefix+"metapathlibrary");
}
else if (pathState.equals("list") || pathState.equals("library"))
{
pathState = "site";
path = variableContext.getParameter(seqPrefix+"metapathlibrary");
int index = path.lastIndexOf("/");
path = path.substring(0,index);
if (path.length() == 0)
path = "/";
currentContext.save(seqPrefix+"metapathlibrary",null);
}
else
{
path = variableContext.getParameter(seqPrefix+"metapath");
int index = path.lastIndexOf("/");
path = path.substring(0,index);
if (path.length() == 0)
path = "/";
}
currentContext.save(seqPrefix+"metapathstate",pathState);
currentContext.save(seqPrefix+"metapath",path);
}
}
}
String xc = variableContext.getParameter(seqPrefix+"specsecurity");
if (xc != null)
{
// Delete all security entries first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("security"))
ds.removeChild(i);
else
i++;
}
SpecificationNode node = new SpecificationNode("security");
node.setAttribute("value",xc);
ds.addChild(ds.getChildCount(),node);
}
xc = variableContext.getParameter(seqPrefix+"tokencount");
if (xc != null)
{
// Delete all file specs first
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("access"))
ds.removeChild(i);
else
i++;
}
int accessCount = Integer.parseInt(xc);
i = 0;
while (i < accessCount)
{
String accessDescription = "_"+Integer.toString(i);
String accessOpName = seqPrefix+"accessop"+accessDescription;
xc = variableContext.getParameter(accessOpName);
if (xc != null && xc.equals("Delete"))
{
// Next row
i++;
continue;
}
// Get the stuff we need
String accessSpec = variableContext.getParameter(seqPrefix+"spectoken"+accessDescription);
SpecificationNode node = new SpecificationNode("access");
node.setAttribute("token",accessSpec);
ds.addChild(ds.getChildCount(),node);
i++;
}
String op = variableContext.getParameter(seqPrefix+"accessop");
if (op != null && op.equals("Add"))
{
String accessspec = variableContext.getParameter(seqPrefix+"spectoken");
SpecificationNode node = new SpecificationNode("access");
node.setAttribute("token",accessspec);
ds.addChild(ds.getChildCount(),node);
}
}
xc = variableContext.getParameter(seqPrefix+"specpathnameattribute");
if (xc != null)
{
// Delete old one
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("pathnameattribute"))
ds.removeChild(i);
else
i++;
}
if (xc.length() > 0)
{
SpecificationNode node = new SpecificationNode("pathnameattribute");
node.setAttribute("value",xc);
ds.addChild(ds.getChildCount(),node);
}
}
xc = variableContext.getParameter(seqPrefix+"specmappingcount");
if (xc != null)
{
// Delete old spec
int i = 0;
while (i < ds.getChildCount())
{
SpecificationNode sn = ds.getChild(i);
if (sn.getType().equals("pathmap"))
ds.removeChild(i);
else
i++;
}
// Now, go through the data and assemble a new list.
int mappingCount = Integer.parseInt(xc);
// Gather up these
i = 0;
while (i < mappingCount)
{
String pathDescription = "_"+Integer.toString(i);
String pathOpName = seqPrefix+"specmappingop"+pathDescription;
xc = variableContext.getParameter(pathOpName);
if (xc != null && xc.equals("Delete"))
{
// Skip to the next
i++;
continue;
}
// Inserts won't happen until the very end
String match = variableContext.getParameter(seqPrefix+"specmatch"+pathDescription);
String replace = variableContext.getParameter(seqPrefix+"specreplace"+pathDescription);
SpecificationNode node = new SpecificationNode("pathmap");
node.setAttribute("match",match);
node.setAttribute("replace",replace);
ds.addChild(ds.getChildCount(),node);
i++;
}
// Check for add
xc = variableContext.getParameter(seqPrefix+"specmappingop");
if (xc != null && xc.equals("Add"))
{
String match = variableContext.getParameter(seqPrefix+"specmatch");
String replace = variableContext.getParameter(seqPrefix+"specreplace");
SpecificationNode node = new SpecificationNode("pathmap");
node.setAttribute("match",match);
node.setAttribute("replace",replace);
ds.addChild(ds.getChildCount(),node);
}
}
return null;
}
/** View specification.
* This method is called in the body section of a job's view page. Its purpose is to present the document
* specification information to the user. The coder can presume that the HTML that is output from
* this configuration will be within appropriate <html> and <body> tags.
* The connector will be connected before this method can be called.
*@param out is the output to which any HTML should be sent.
*@param locale is the locale the output is preferred to be in.
*@param ds is the current document specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*/
@Override
public void viewSpecification(IHTTPOutput out, Locale locale, Specification ds,
int connectionSequenceNumber)
throws ManifoldCFException, IOException
{
Map<String,Object> velocityContext = new HashMap<String,Object>();
velocityContext.put("SeqNum", Integer.toString(connectionSequenceNumber));
fillInSecurityTab(velocityContext,out,ds);
fillInPathsTab(velocityContext,out,ds);
fillInMetadataTab(velocityContext,out,ds);
Messages.outputResourceWithVelocity(out,locale,"viewSpecification.html",velocityContext);
}
protected static class ExecuteMethodThread extends Thread
{
protected final HttpClient httpClient;
protected final String url;
protected final OutputStream os;
protected Throwable exception = null;
protected int returnCode = 0;
public ExecuteMethodThread( HttpClient httpClient, String url, OutputStream os )
{
super();
setDaemon(true);
this.httpClient = httpClient;
this.url = url;
this.os = os;
}
public void run()
{
try
{
HttpGet method = new HttpGet( url );
// Try block to insure that the connection gets cleaned up
try
{
// Begin the fetch
HttpResponse response = httpClient.execute(method);
returnCode = response.getStatusLine().getStatusCode();
if (returnCode == 200)
{
// Process the data
HttpEntity entity = response.getEntity();
if (entity != null)
{
InputStream is = entity.getContent();
// Figure out what to do with the data.
byte[] transferBuffer = new byte[65536];
while (true)
{
int amt = is.read(transferBuffer);
if (amt == -1)
break;
os.write(transferBuffer,0,amt);
}
}
}
}
finally
{
// Consumes and closes the stream, releasing the connection
method.abort();
}
}
catch (Throwable e)
{
this.exception = e;
}
}
public int finishUp()
throws InterruptedException, IOException, org.apache.http.HttpException
{
join();
if (exception != null)
{
if (exception instanceof IOException)
throw (IOException)exception;
else if (exception instanceof Error)
throw (Error)exception;
else if (exception instanceof org.apache.http.HttpException)
throw (org.apache.http.HttpException)exception;
else if (exception instanceof RuntimeException)
throw (RuntimeException)exception;
else
throw new RuntimeException("Unexpected exception type thrown: "+exception.getClass().getName());
}
return returnCode;
}
}
/**
* Gets a list of field names of the given document library or list.
* @param parentSite - parent site path
* @param docLibrary name
* @return list of the fields
*/
public Map<String,String> getLibFieldList( String parentSite, String docLibrary )
throws ServiceInterruption, ManifoldCFException
{
getSession();
return proxy.getFieldList( encodePath(parentSite), proxy.getDocLibID( encodePath(parentSite), parentSite, docLibrary ) );
}
/**
* Gets a list of field names of the given document library or list.
* @param parentSite - parent site path
* @param docLibrary name
* @return list of the fields
*/
public Map<String,String> getListFieldList( String parentSite, String listName )
throws ServiceInterruption, ManifoldCFException
{
getSession();
return proxy.getFieldList( encodePath(parentSite), proxy.getListID( encodePath(parentSite), parentSite, listName ) );
}
/**
* Gets a list of sites/subsites of the given parent site
* @param parentSite the unencoded parent site path to search for subsites, empty for root.
* @return list of the sites
*/
public List<NameValue> getSites( String parentSite )
throws ServiceInterruption, ManifoldCFException
{
getSession();
return proxy.getSites( encodePath(parentSite) );
}
/**
* Gets a list of document libraries of the given parent site
* @param parentSite the unencoded parent site to search for libraries, empty for root.
* @return list of the libraries
*/
public List<NameValue> getDocLibsBySite( String parentSite )
throws ManifoldCFException, ServiceInterruption
{
getSession();
return proxy.getDocumentLibraries( encodePath(parentSite), parentSite );
}
/**
* Gets a list of lists of the given parent site
* @param parentSite the unencoded parent site to search for lists, empty for root.
* @return list of the lists
*/
public List<NameValue> getListsBySite( String parentSite )
throws ManifoldCFException, ServiceInterruption
{
getSession();
return proxy.getLists( encodePath(parentSite), parentSite );
}
// Protected static methods
/** Check if a library should be included, given a document specification.
*@param libraryPath is the unencoded canonical library name (including site path from root site), without any starting slash.
*@param documentSpecification is the specification.
*@return true if it should be included.
*/
protected boolean checkIncludeLibrary( String libraryPath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include library '" + libraryPath + "'" );
// Scan the specification, looking for the old-style "startpoint" matches and the new-style "libraryrule" matches.
int i = 0;
while (i < documentSpecification.getChildCount())
{
SpecificationNode sn = documentSpecification.getChild(i++);
if ( sn.getType().equals("startpoint") )
{
// Old style rule!
String site = sn.getAttributeValue( "site" );
String lib = sn.getAttributeValue( "lib" );
// Both site and lib are unencoded. See if they match the library path
String pathStart = site + "/" + lib;
// Old-style matches have a preceding "/" when there's no subsite...
if (libraryPath.equals(pathStart))
{
// Hey, the startpoint rule matches! It's an implicit inclusion, so we don't need to do anything else except return.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library path '"+libraryPath+"' matched old-style startpoint with site '"+site+"' and library '"+lib+"' - including");
return true;
}
}
else if (sn.getType().equals("pathrule"))
{
// New-style rule.
// Here's the trick: We do what the first matching rule tells us to do.
String pathMatch = sn.getAttributeValue("match");
String action = sn.getAttributeValue("action");
String ruleType = sn.getAttributeValue("type");
// First, find out if we match EXACTLY.
if (checkMatch(libraryPath,0,pathMatch))
{
// If this is true, the type also has to match if the rule is to apply.
if (ruleType.equals("library"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library '"+libraryPath+"' exactly matched rule path '"+pathMatch+"'");
if (action.equals("include"))
{
// For include rules, partial match is good enough to proceed.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Including library '"+libraryPath+"'");
return true;
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Excluding library '"+libraryPath+"'");
return false;
}
}
else if (ruleType.equals("file") && checkPartialPathMatch(libraryPath,0,pathMatch,1) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library '"+libraryPath+"' partially matched file rule path '"+pathMatch+"' - including");
return true;
}
else if (ruleType.equals("folder") && checkPartialPathMatch(libraryPath,0,pathMatch,1) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Library '"+libraryPath+"' partially matched folder rule path '"+pathMatch+"' - including");
return true;
}
}
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Not including library '"+libraryPath+"' because no matching rule");
return false;
}
/** Check if a list should be included, given a document specification.
*@param listPath is the unencoded canonical list name (including site path from root site), without any starting slash.
*@param documentSpecification is the specification.
*@return true if it should be included.
*/
protected boolean checkIncludeList( String listPath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include list '" + listPath + "'" );
// Scan the specification, looking for new-style "pathrule" matches.
int i = 0;
while (i < documentSpecification.getChildCount())
{
SpecificationNode sn = documentSpecification.getChild(i++);
if (sn.getType().equals("pathrule"))
{
// New-style rule.
// Here's the trick: We do what the first matching rule tells us to do.
String pathMatch = sn.getAttributeValue("match");
String action = sn.getAttributeValue("action");
String ruleType = sn.getAttributeValue("type");
// First, find out if we match EXACTLY.
if (checkMatch(listPath,0,pathMatch))
{
// If this is true, the type also has to match if the rule is to apply.
if (ruleType.equals("list"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: List '"+listPath+"' exactly matched rule path '"+pathMatch+"'");
if (action.equals("include"))
{
// For include rules, partial match is good enough to proceed.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Including list '"+listPath+"'");
return true;
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Excluding list '"+listPath+"'");
return false;
}
}
}
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Not including list '"+listPath+"' because no matching rule");
return false;
}
/** Check if a site should be included, given a document specification.
*@param sitePath is the unencoded canonical site path name from the root site level, without any starting slash.
*@param documentSpecification is the specification.
*@return true if it should be included.
*/
protected boolean checkIncludeSite( String sitePath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include site '" + sitePath + "'" );
// Scan the specification, looking for the old-style "startpoint" matches and the new-style "libraryrule" matches.
int i = 0;
while (i < documentSpecification.getChildCount())
{
SpecificationNode sn = documentSpecification.getChild(i++);
if ( sn.getType().equals("startpoint") )
{
// Old style rule!
String site = sn.getAttributeValue( "site" );
// Both site and lib are unencoded. See if they match part of the site path.
// Note well: We want a complete subsection match! That is, what's left in the path after the match must
// either start with "/" or be empty.
if (!site.startsWith("/"))
site = "/" + site;
// Old-style matches have a preceding "/" when there's no subsite...
if (site.startsWith(sitePath))
{
if (sitePath.length() == 1 || site.length() == sitePath.length() || site.charAt(sitePath.length()) == '/')
{
// Hey, the startpoint rule matches! It's an implicit inclusion, so we don't need to do anything else except return.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site path '"+sitePath+"' matched old-style startpoint with site '"+site+"' - including");
return true;
}
}
}
else if (sn.getType().equals("pathrule"))
{
// New-style rule.
String pathMatch = sn.getAttributeValue("match");
String action = sn.getAttributeValue("action");
String ruleType = sn.getAttributeValue("type");
// First, find out if we match EXACTLY.
if (checkMatch(sitePath,0,pathMatch))
{
// If this is true, the type also has to match if the rule is to apply.
if (ruleType.equals("site"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' exactly matched rule path '"+pathMatch+"'");
if (action.equals("include"))
{
// For include rules, partial match is good enough to proceed.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Including site '"+sitePath+"'");
return true;
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Excluding site '"+sitePath+"'");
return false;
}
}
else if (ruleType.equals("library") && checkPartialPathMatch(sitePath,0,pathMatch,1) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' partially matched library rule path '"+pathMatch+"' - including");
return true;
}
else if (ruleType.equals("list") && checkPartialPathMatch(sitePath,0,pathMatch,1) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' partially matched list rule path '"+pathMatch+"' - including");
return true;
}
else if (ruleType.equals("site") && checkPartialPathMatch(sitePath,0,pathMatch,0) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' partially matched site rule path '"+pathMatch+"' - including");
return true;
}
else if (ruleType.equals("file") && checkPartialPathMatch(sitePath,0,pathMatch,2) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' partially matched file rule path '"+pathMatch+"' - including");
return true;
}
else if (ruleType.equals("folder") && checkPartialPathMatch(sitePath,0,pathMatch,2) && action.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Site '"+sitePath+"' partially matched folder rule path '"+pathMatch+"' - including");
return true;
}
}
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Not including site '"+sitePath+"' because no matching rule");
return false;
}
/** Get a file or item's metadata specification, given a path and a document specification.
*@param filePath is the unencoded path to a file or item, including sites and library/list, beneath the root site.
*@param documentSpecification is the document specification.
*@return the metadata description appropriate to the file.
*/
protected MetadataInformation getMetadataSpecification( String filePath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Finding metadata to include for document/item '" + filePath + "'." );
MetadataInformation rval = new MetadataInformation();
// Scan the specification, looking for the old-style "startpoint" matches and the new-style "metadatarule" matches.
int i = 0;
while (i < documentSpecification.getChildCount())
{
SpecificationNode sn = documentSpecification.getChild(i++);
if ( sn.getType().equals("startpoint") )
{
// Old style rule!
String site = sn.getAttributeValue( "site" );
String lib = sn.getAttributeValue( "lib" );
// Both site and lib are unencoded. See if they match the first part of the filepath
String pathStart = site + "/" + lib + "/";
// Old-style matches have a preceding "/" when there's no subsite...
if (filePath.startsWith(pathStart))
{
// Hey, the startpoint rule matches! It's an implicit inclusion, so this is where we get the metadata from (and then return)
String allmetadata = sn.getAttributeValue("allmetadata");
if (allmetadata != null && allmetadata.equals("true"))
rval.setAllMetadata();
else
{
// Scan children looking for metadata nodes
int j = 0;
while (j < sn.getChildCount())
{
SpecificationNode node = sn.getChild(j++);
if (node.getType().equals("metafield"))
rval.addMetadataField(node.getAttributeValue("value"));
}
}
return rval;
}
}
else if (sn.getType().equals("metadatarule"))
{
// New-style rule.
// Here's the trick: We do what the first matching rule tells us to do.
String pathMatch = sn.getAttributeValue("match");
// First, find out if we match...
if (checkMatch(filePath,0,pathMatch))
{
// The rule "fired". Now, do what it tells us to.
String action = sn.getAttributeValue("action");
if (action.equals("include"))
{
// Include: Process the metadata specification, then return
String allMetadata = sn.getAttributeValue("allmetadata");
if (allMetadata != null && allMetadata.equals("true"))
rval.setAllMetadata();
else
{
// Scan children looking for metadata nodes
int j = 0;
while (j < sn.getChildCount())
{
SpecificationNode node = sn.getChild(j++);
if (node.getType().equals("metafield"))
rval.addMetadataField(node.getAttributeValue("value"));
}
}
}
return rval;
}
}
}
return rval;
}
/** Check if a file should be included.
*@param filePath is the path to the file, including sites and library, beneath the root site.
*@param documentSpecification is the document specification.
*@return true if file should be included.
*/
protected boolean checkIncludeFile( String filePath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include document '" + filePath + "'" );
// Break up the file/folder part of the path
int lastSlash = filePath.lastIndexOf("/");
String pathPart = filePath.substring(0,lastSlash);
String filePart = filePath.substring(lastSlash+1);
// Scan the spec rules looking for a library match, and extract the information if found.
// We need to understand both the old-style rules (startpoints), and the new style (matchrules)
int i = 0;
while (i < documentSpecification.getChildCount())
{
SpecificationNode sn = documentSpecification.getChild(i++);
if ( sn.getType().equals("startpoint") )
{
// Old style rule!
String site = sn.getAttributeValue( "site" );
String lib = sn.getAttributeValue( "lib" );
// Both site and lib are unencoded. The string we are matching starts with "/" if the site is empty.
String pathMatch = site + "/" + lib + "/";
if (filePath.startsWith(pathMatch))
{
// Hey, it matched!
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File path '"+filePath+"' matched old-style startpoint with site '"+site+"' and library '"+lib+"'");
int restOfPathIndex = pathMatch.length();
// We need to walk through the subrules and see whether it's in or out.
int j = 0;
while (j < sn.getChildCount())
{
SpecificationNode node = sn.getChild(j++);
String flavor = node.getType();
if (flavor.equals("include") || flavor.equals("exclude"))
{
String match = node.getAttributeValue("match");
String type = node.getAttributeValue("type");
String sourceMatch;
int sourceIndex;
if ( type.equals("file") )
{
sourceMatch = filePart;
sourceIndex = 0;
}
else
{
sourceMatch = pathPart;
sourceIndex = restOfPathIndex;
}
if ( checkMatch(sourceMatch,sourceIndex,match) )
{
// Our file path matched the rule.
if (flavor.equals("include"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File path '"+filePath+"' matched old-style inclusion rule '"+match+"' - including");
return true;
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File path '"+filePath+"' matched old-style exclusion rule '"+match+"' - excluding");
return false;
}
}
}
// Didn't match any of the file rules; therefore exclude.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File path '"+filePath+"' did not match any old-style inclusion/exclusion rules - excluding");
return false;
}
}
else if (sn.getType().equals("pathrule"))
{
// New style rule!
String pathMatch = sn.getAttributeValue("match");
String action = sn.getAttributeValue("action");
String ruleType = sn.getAttributeValue("type");
// Find out if we match EXACTLY. There are no "partial matches" for files.
if (checkMatch(filePath,0,pathMatch))
{
// If this is true, the type also has to match if the rule is to apply.
if (ruleType.equals("file"))
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File '"+filePath+"' exactly matched rule path '"+pathMatch+"'");
if (action.equals("include"))
{
// For include rules, partial match is good enough to proceed.
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Including file '"+filePath+"'");
return true;
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: Excluding file '"+filePath+"'");
return false;
}
}
}
}
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug("SharePoint: File path '"+filePath+"' does not match any rules - excluding");
return false;
}
/** Check if a list item attachment should be included.
*@param attachmentPath is the path to the attachment, including sites and list name, beneath the root site.
*@param documentSpecification is the document specification.
*@return true if file should be included.
*/
protected boolean checkIncludeListItemAttachment( String attachmentPath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include list item attachment '" + attachmentPath + "'" );
// There are no attachment rules, so they are always included
return true;
}
/** Check if a list item should be included.
*@param itemPath is the path to the item, including sites and list name, beneath the root site.
*@param documentSpecification is the document specification.
*@return true if file should be included.
*/
protected boolean checkIncludeListItem( String itemPath, Specification documentSpecification )
{
if (Logging.connectors.isDebugEnabled())
Logging.connectors.debug( "SharePoint: Checking whether to include list item '" + itemPath + "'" );
// There are no item rules, so they are always included
return true;
}
/** Match a sub-path. The sub-path must match the complete starting part of the full path, in a path
* sense. The returned value should point into the file name beyond the end of the matched path, or
* be -1 if there is no match.
*@param subPath is the sub path.
*@param fullPath is the full path.
*@return the index of the start of the remaining part of the full path, or -1.
*/
protected static int matchSubPath( String subPath, String fullPath )
{
if ( subPath.length() > fullPath.length() )
return -1;
if ( fullPath.startsWith( subPath ) == false )
return -1;
int rval = subPath.length();
if ( fullPath.length() == rval )
return rval;
char x = fullPath.charAt( rval );
if ( x == '/' )
rval++;
return rval;
}
/** Check for a partial path match between two strings with wildcards.
* Match allowance also must be made for the minimum path components in the rest of the path.
*/
protected static boolean checkPartialPathMatch( String sourceMatch, int sourceIndex, String match, int requiredExtraPathSections )
{
// The partial match must be of a complete path, with at least a specified number of trailing path components possible in what remains.
// Path components can include everything but the "/" character itself.
//
// The match string is the one containing the wildcards. Both the "*" wildcard and the "?" wildcard will match a "/", which is intended but is why this
// matcher is a little tricky to write.
//
// Note also that it is OK to return "true" more than strictly necessary, but it is never OK to return "false" incorrectly.
// This is a partial path match. That means that we don't have to completely use up the match string, but what's left on the match string after the source
// string is used up MUST either be capable of being null, or be capable of starting with a "/"integral path sections, and MUST include at least n of these sections.
//
boolean caseSensitive = true;
if (!sourceMatch.endsWith("/"))
sourceMatch = sourceMatch + "/";
return processPartialPathCheck( caseSensitive, sourceMatch, sourceIndex, match, 0, requiredExtraPathSections );
}
/** Recursive worker method for checkPartialPathMatch. Returns 'true' if there is a path that consumes the source string entirely,
* and leaves the remainder of the match string able to match the required followup.
*@param caseSensitive is true if file names are case sensitive.
*@param sourceMatch is the source string (w/o wildcards)
*@param sourceIndex is the current point in the source string.
*@param match is the match string (w/wildcards)
*@param matchIndex is the current point in the match string.
*@return true if there is a match.
*/
protected static boolean processPartialPathCheck(boolean caseSensitive, String sourceMatch, int sourceIndex, String match, int matchIndex,
int requiredExtraPathSections)
{
// Match up through the next * we encounter
while ( true )
{
// If we've reached the end of the source, verify that it's a match.
if ( sourceMatch.length() == sourceIndex)
{
// The "correct" way to code this is to recursively attempt to generate all different paths that correspond to the required extra sections. However,
// that's computationally very nasty. In practice, we'll simply distinguish between "some" and "none".
// If we've reached the end of the match string too, then it passes (or fails, if we need extra sections)
if (match.length() == matchIndex)
return (requiredExtraPathSections == 0);
// We can match a path separator, so we win
return true;
}
// If we have reached the end of the match (but not the source), match fails
if ( match.length() == matchIndex )
return false;
char x = sourceMatch.charAt( sourceIndex );
char y = match.charAt( matchIndex );
if ( !caseSensitive )
{
if ( x >= 'A' && x <= 'Z' )
x -= 'A'-'a';
if ( y >= 'A' && y <= 'Z' )
y -= 'A'-'a';
}
if ( y == '*' )
{
// Wildcard!
// We will recurse at this point.
// Basically, we want to combine the results for leaving the "*" in the match string
// at this point and advancing the source index, with skipping the "*" and leaving the source
// string alone.
return processPartialPathCheck( caseSensitive, sourceMatch, sourceIndex + 1, match, matchIndex, requiredExtraPathSections ) ||
processPartialPathCheck( caseSensitive, sourceMatch, sourceIndex, match, matchIndex + 1, requiredExtraPathSections );
}
if ( y == '?' || x == y )
{
sourceIndex++;
matchIndex++;
}
else
return false;
}
}
/** Check a match between two strings with wildcards.
*@param sourceMatch is the expanded string (no wildcards)
*@param sourceIndex is the starting point in the expanded string.
*@param match is the wildcard-based string.
*@return true if there is a match.
*/
protected static boolean checkMatch( String sourceMatch, int sourceIndex, String match )
{
// Note: The java regex stuff looks pretty heavyweight for this purpose.
// I've opted to try and do a simple recursive version myself, which is not compiled.
// Basically, the match proceeds by recursive descent through the string, so that all *'s cause
// recursion.
boolean caseSensitive = true;
return processCheck( caseSensitive, sourceMatch, sourceIndex, match, 0 );
}
/** Recursive worker method for checkMatch. Returns 'true' if there is a path that consumes both
* strings in their entirety in a matched way.
*@param caseSensitive is true if file names are case sensitive.
*@param sourceMatch is the source string (w/o wildcards)
*@param sourceIndex is the current point in the source string.
*@param match is the match string (w/wildcards)
*@param matchIndex is the current point in the match string.
*@return true if there is a match.
*/
protected static boolean processCheck(boolean caseSensitive, String sourceMatch, int sourceIndex, String match, int matchIndex )
{
// Match up through the next * we encounter
while ( true )
{
// If we've reached the end, it's a match.
if ( sourceMatch.length() == sourceIndex && match.length() == matchIndex )
return true;
// If one has reached the end but the other hasn't, no match
if ( match.length() == matchIndex )
return false;
if ( sourceMatch.length() == sourceIndex )
{
if ( match.charAt(matchIndex) != '*' )
return false;
matchIndex++;
continue;
}
char x = sourceMatch.charAt( sourceIndex );
char y = match.charAt( matchIndex );
if ( !caseSensitive )
{
if ( x >= 'A' && x <= 'Z' )
x -= 'A'-'a';
if ( y >= 'A' && y <= 'Z' )
y -= 'A'-'a';
}
if ( y == '*' )
{
// Wildcard!
// We will recurse at this point.
// Basically, we want to combine the results for leaving the "*" in the match string
// at this point and advancing the source index, with skipping the "*" and leaving the source
// string alone.
return processCheck( caseSensitive, sourceMatch, sourceIndex + 1, match, matchIndex ) ||
processCheck( caseSensitive, sourceMatch, sourceIndex, match, matchIndex + 1 );
}
if ( y == '?' || x == y )
{
sourceIndex++;
matchIndex++;
}
else
return false;
}
}
/** Grab forced acl out of document specification.
*@param spec is the document specification.
*@return the acls.
*/
protected static String[] getAcls(Specification spec)
{
HashMap map = new HashMap();
int i = 0;
boolean securityOn = true;
while (i < spec.getChildCount())
{
SpecificationNode sn = spec.getChild(i++);
if (sn.getType().equals("access"))
{
String token = sn.getAttributeValue("token");
map.put(token,token);
}
else if (sn.getType().equals("security"))
{
String value = sn.getAttributeValue("value");
if (value.equals("on"))
securityOn = true;
else if (value.equals("off"))
securityOn = false;
}
}
if (!securityOn)
return null;
String[] rval = new String[map.size()];
Iterator iter = map.keySet().iterator();
i = 0;
while (iter.hasNext())
{
rval[i++] = (String)iter.next();
}
return rval;
}
/** Decode a path item.
*/
public static String pathItemDecode(String pathItem)
{
return URLDecoder.decode(pathItem.replaceAll("\\%20","+"));
}
/** Encode a path item.
*/
public static String pathItemEncode(String pathItem)
{
String output = URLEncoder.encode(pathItem);
return output.replaceAll("\\+","%20");
}
/** Given a path that is /-separated, and otherwise encoded, decode properly to convert to
* unencoded form.
*/
public static String decodePath(String relPath)
{
StringBuilder sb = new StringBuilder();
String[] pathEntries = relPath.split("/");
int k = 0;
boolean isFirst = true;
while (k < pathEntries.length)
{
if (isFirst)
isFirst = false;
else
sb.append("/");
sb.append(pathItemDecode(pathEntries[k++]));
}
return sb.toString();
}
/** Given a path that is /-separated, and otherwise unencoded, encode properly for an actual
* URI
*/
public static String encodePath(String relPath)
{
StringBuilder sb = new StringBuilder();
String[] pathEntries = relPath.split("/");
int k = 0;
boolean isFirst = true;
while (k < pathEntries.length)
{
if (isFirst)
isFirst = false;
else
sb.append("/");
sb.append(pathItemEncode(pathEntries[k++]));
}
return sb.toString();
}
/** Metadata information gleaned from document paths and specification.
*/
protected static class MetadataInformation
{
protected boolean allMetadata = false;
protected Set<String> metadataFields = new HashSet<String>();
/** Constructor */
public MetadataInformation()
{
}
/** Set "all metadata" */
public void setAllMetadata()
{
allMetadata = true;
}
/** Add a metadata field */
public void addMetadataField(String fieldName)
{
metadataFields.add(fieldName);
}
/** Get whether "all metadata" is to be used */
public boolean getAllMetadata()
{
return allMetadata;
}
/** Get the set of metadata fields to use */
public String[] getMetadataFields()
{
String[] rval = new String[metadataFields.size()];
int i = 0;
for (String field : metadataFields)
{
rval[i++] = field;
}
return rval;
}
}
/** Class that tracks paths associated with id's, and the name
* of the metadata attribute to use for the path.
*/
protected class SystemMetadataDescription
{
// The path attribute name
protected final String pathAttributeName;
// The path name map
protected final MatchMap matchMap = new MatchMap();
/** Constructor */
public SystemMetadataDescription(Specification spec)
throws ManifoldCFException
{
String pathAttributeName = null;
for (int i = 0; i < spec.getChildCount(); i++)
{
SpecificationNode n = spec.getChild(i);
if (n.getType().equals("pathnameattribute"))
pathAttributeName = n.getAttributeValue("value");
else if (n.getType().equals("pathmap"))
{
String pathMatch = n.getAttributeValue("match");
String pathReplace = n.getAttributeValue("replace");
matchMap.appendMatchPair(pathMatch,pathReplace);
}
}
this.pathAttributeName = pathAttributeName;
}
/** Get the path attribute name.
*@return the path attribute name, or null if none specified.
*/
public String getPathAttributeName()
{
return pathAttributeName;
}
/** Given an identifier, get the translated string that goes into the metadata.
*/
public String getPathAttributeValue(String documentIdentifier)
throws ManifoldCFException
{
String path = getPathString(documentIdentifier);
return matchMap.translate(path);
}
/** For a given id, get the portion of its path which the mapping and ingestion
* should go against. Effectively this should include the whole identifer, so this
* is easy to calculate.
*/
public String getPathString(String documentIdentifier)
throws ManifoldCFException
{
// There will be a "//" somewhere in the string. Remove it!
int dslashIndex = documentIdentifier.indexOf("//");
if (dslashIndex == -1)
return documentIdentifier;
return documentIdentifier.substring(0,dslashIndex) + documentIdentifier.substring(dslashIndex+1);
}
}
}