Rename CONNECTORS-856-2 branch to CONNECTORS-856 git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-856@1613084 13f79535-47bb-0310-9956-ffa450edef68

commit: fa2d2c019201a43d0d4c65b0af773cedb17b3916 [log] [tgz]
author: Karl Wright <kwright@apache.org> Thu Jul 24 11:58:17 2014 +0000
committer: Karl Wright <kwright@apache.org> Thu Jul 24 11:58:17 2014 +0000
tree: 3b83d34bbc0c3558640fdce3099f1cce97921e94
parent: b75881b23f6be7c4dd40bc5c95eaa403a64d8fb2 [diff]
parent: 022045445f20ed21124076e5a08689e3829ed591 [diff]
diff --git a/CHANGES.txt b/CHANGES.txt
index d4461e9..6a8c6c1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt

@@ -3,6 +3,90 @@
 
 ======================= 1.7-dev =====================
 
+CONNECTORS-984: Add Tika extraction metadata, and also add
+ability to ignore tika exceptions.
+(Shinichiro Abe, Karl Wright)
+
+CONNECTORS-989: Introduce document sub-components, which
+is a way of having multiple indexed documents corresponding to a
+single repository document.
+(Matteo Grolla, Karl Wright)
+
+CONNECTORS-994: Make Alfresco connector pay attention to the
+scanOnly flag.
+(Prasad Perera, Karl Wright)
+
+CONNECTORS-997: Get CMIS connector working again.
+(Karl Wright)
+
+CONNECTORS-996: Catch CMIS object not found exception, and
+also enable CMIS derby IT test.
+(Prasad Perera, Karl Wright)
+
+CONNECTORS-995: Connectors dealing with non-indexable documents
+such as directories should call noDocument() on those documents if they
+want incremental behavior.
+(Karl Wright)
+
+CONNECTORS-993: Pipeline code not handling "no document" case properly.
+This problem was an oversight of the new pipeline code.  Essentially,
+transformation connectors could choose not to send a document onward
+to the next stage.  If this happened, then the document version information
+for the corresponding output connection would never get written, and
+no incremental-ness would be possible.
+I fixed this by introducing a "noDocument()" IOutputAddActivity method,
+which transformation connectors should call when they reject a document.
+(Karl Wright)
+
+CONNECTORS-992: Add a test to exercise ServiceInterruption within
+a connector.
+(Karl Wright)
+
+CONNECTORS-990: Revamp IRepositoryConnector API to no longer separate
+getDocumentVersions() and processDocuments().  This modification basically
+pushes responsibility to determine changes to the repository connector.
+Backwards compatibility is maintained via code in BaseRepositoryConnector,
+and new methods have been added to IProcessActivity.
+WorkerThread has been largely rewritten as a result.
+(Karl Wright)
+
+CONNECTORS-991: Make Jira connector perform pre-emptive basic auth
+since Jira supports guest users.
+(Daniel Aschauer, Karl Wright)
+
+CONNECTORS-988: Performance improvements for split pipeline crawls.
+(Karl Wright)
+
+CONNECTORS-985: Get UI tests working again.
+(Karl Wright)
+
+CONNECTORS-981: Add support for SolrInputDocument indexing in
+Solr connector.
+(Alessandro Benedetti, Karl Wright)
+
+CONNECTORS-979: Fix ant build so that documentum and filenet show
+up.
+(Karl Wright)
+
+CONNECTORS-980: Output connector gets notified now when job is deleted.
+(Karl Wright)
+
+CONNECTORS-954: Revamp AmazonCloudSearch output connector completely.
+(1) Remove Tika and field mapping, since that would be done upstream in the
+pipeline.
+(2) Revamped the document lifecycle so they are batched together (which isn't
+perfect; see CONNECTORS-980).
+(Karl Wright, Takumi Yoshida)
+
+CONNECTORS-971: Use a generic "seeding version string" to track the last
+seeding event for every job.  This abstracts away from time intervals and
+permits seeding based on things like transaction IDs.
+(Karl Wright, Piergiorgio Lucidi)
+
+CONNECTORS-967: Add links to Java 1.7 and ManifoldCF framework Javadoc
+for ManifoldCF Javadocs.
+(Shinichiro Abe)
+
 CONNECTORS-965: Update end-user documentation to reflect
 multiple outputs per job.
 (Karl Wright)

diff --git a/build.xml b/build.xml
index b768da1..5f44256 100644
--- a/build.xml
+++ b/build.xml

@@ -700,7 +700,7 @@
     
     <target name="download-derby">
         <mkdir dir="lib"/>
-        <property name="derby-version" value="10.10.1.1"/>
+        <property name="derby-version" value="10.10.2.0"/>
         <property name="derby-package" value="org/apache/derby"/>
         <antcall target="download-via-maven"><param name="project-path" value="${derby-package}"/><param name="artifact-version" value="${derby-version}"/><param name="target" value="lib"/>
             <param name="artifact-name" value="derby"/>

diff --git a/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java b/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
index e069bdf..583155b 100644
--- a/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java
+++ b/connectors/alfresco/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/alfresco/AlfrescoRepositoryConnector.java

@@ -898,81 +898,82 @@
       
       //a generic node in Alfresco could also have binaries content
       if (isDocument) {
-        // this is a content to ingest
-        InputStream is = null;
-        long fileLength = 0;
-        try {
-          //properties ingestion
-          RepositoryDocument rd = new RepositoryDocument();      
-          List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
-          PropertiesUtils.ingestProperties(rd, properties, contentProperties);
-
-          // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
-          for (NamedValue contentProperty : contentProperties) {
-            //we are ingesting all the binaries defined as d:content property in the Alfresco content model
-            Content binary = ContentReader.read(endpoint, username, password, socketTimeout, session, predicate, contentProperty.getName());
-            fileLength = binary.getLength();
-            is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout, session);
-            rd.setBinary(is, fileLength);
-            
-            //id is the node reference only if the node has an unique content stream
-            //For a node with a single d:content property: id = node reference
-            String id = PropertiesUtils.getNodeReference(properties);
-            
-            //For a node with multiple d:content properties: id = node reference;QName
-            //The QName of a property of type d:content will be appended to the node reference
-            if(contentProperties.size()>1){
-              id = id + INGESTION_SEPARATOR_FOR_MULTI_BINARY + contentProperty.getName();
-            }
-            
-            //version label
-            String version = PropertiesUtils.getVersionLabel(properties);
-            
-            //the document uri is related to the specific d:content property available in the node
-            //we want to ingest each content stream that are nested in a single node
-            String documentURI = binary.getUrl();
-            activities.ingestDocumentWithException(id, version, documentURI, rd);
-          }
-          
-          AuthenticationUtils.endSession();
-          
-        } catch (ParseException e) {
-          errorCode = "IO ERROR";
-          errorDesc = e.getMessage();
-          Logging.connectors.warn(
-              "Alfresco: Error during the reading process of dates: "
-                  + e.getMessage(), e);
-          handleParseException(e);
-        } catch (IOException e) {
-          Logging.connectors.warn(
-              "Alfresco: IOException: "
-                  + e.getMessage(), e);
-          handleIOException(e);
-        } finally {
+        if (!scanOnly[i]) {
+          // this is a content to ingest
+          InputStream is = null;
+          long fileLength = 0;
           try {
-            if(is!=null){
-              is.close();
+            //properties ingestion
+            RepositoryDocument rd = new RepositoryDocument();      
+            List<NamedValue> contentProperties = PropertiesUtils.getContentProperties(properties);
+            PropertiesUtils.ingestProperties(rd, properties, contentProperties);
+
+            // binaries ingestion - in Alfresco we could have more than one binary for each node (custom content models)
+            for (NamedValue contentProperty : contentProperties) {
+              //we are ingesting all the binaries defined as d:content property in the Alfresco content model
+              Content binary = ContentReader.read(endpoint, username, password, socketTimeout, session, predicate, contentProperty.getName());
+              fileLength = binary.getLength();
+              is = ContentReader.getBinary(endpoint, binary, username, password, socketTimeout, session);
+              rd.setBinary(is, fileLength);
+              
+              //id is the node reference only if the node has an unique content stream
+              //For a node with a single d:content property: id = node reference
+              String id = PropertiesUtils.getNodeReference(properties);
+              
+              //For a node with multiple d:content properties: id = node reference;QName
+              //The QName of a property of type d:content will be appended to the node reference
+              if(contentProperties.size()>1){
+                id = id + INGESTION_SEPARATOR_FOR_MULTI_BINARY + contentProperty.getName();
+              }
+              
+              //version label
+              String version = PropertiesUtils.getVersionLabel(properties);
+              
+              //the document uri is related to the specific d:content property available in the node
+              //we want to ingest each content stream that are nested in a single node
+              String documentURI = binary.getUrl();
+              activities.ingestDocumentWithException(id, version, documentURI, rd);
             }
-          } catch (InterruptedIOException e) {
-            errorCode = "Interrupted error";
-            errorDesc = e.getMessage();
-            throw new ManifoldCFException(e.getMessage(), e,
-                ManifoldCFException.INTERRUPTED);
-          } catch (IOException e) {
+            
+            AuthenticationUtils.endSession();
+            
+          } catch (ParseException e) {
             errorCode = "IO ERROR";
             errorDesc = e.getMessage();
             Logging.connectors.warn(
-                "Alfresco: IOException closing file input stream: "
+                "Alfresco: Error during the reading process of dates: "
+                    + e.getMessage(), e);
+            handleParseException(e);
+          } catch (IOException e) {
+            Logging.connectors.warn(
+                "Alfresco: IOException: "
                     + e.getMessage(), e);
             handleIOException(e);
+          } finally {
+            try {
+              if(is!=null){
+                is.close();
+              }
+            } catch (InterruptedIOException e) {
+              errorCode = "Interrupted error";
+              errorDesc = e.getMessage();
+              throw new ManifoldCFException(e.getMessage(), e,
+                  ManifoldCFException.INTERRUPTED);
+            } catch (IOException e) {
+              errorCode = "IO ERROR";
+              errorDesc = e.getMessage();
+              Logging.connectors.warn(
+                  "Alfresco: IOException closing file input stream: "
+                      + e.getMessage(), e);
+              handleIOException(e);
+            }
+                      
+            session = null;
+            
+            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+                fileLength, nodeReference, errorCode, errorDesc, null);
           }
-                    
-          session = null;
-          
-          activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-              fileLength, nodeReference, errorCode, errorDesc, null);
         }
-        
       }
       i++;
     }

diff --git a/connectors/alfresco/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/alfresco/tests/APISanityIT.java b/connectors/alfresco/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/alfresco/tests/APISanityIT.java
index 8c6fd85..2cc49d3 100644
--- a/connectors/alfresco/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/alfresco/tests/APISanityIT.java
+++ b/connectors/alfresco/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/alfresco/tests/APISanityIT.java

@@ -20,6 +20,7 @@
 package org.apache.manifoldcf.crawler.connectors.alfresco.tests;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.rmi.RemoteException;
 
 import org.alfresco.webservice.content.ContentServiceSoapBindingStub;
@@ -114,7 +115,7 @@
       
       //format
       ContentFormat contentFormat = new ContentFormat();
-      contentFormat.setEncoding("UTF-8");
+      contentFormat.setEncoding(StandardCharsets.UTF_8.name());
       contentFormat.setMimetype("text/plain");
       
       //the content
@@ -160,7 +161,7 @@
       reference.setUuid(row.getNode().getId());
       
       ContentFormat contentFormat = new ContentFormat();
-      contentFormat.setEncoding("UTF-8");
+      contentFormat.setEncoding(StandardCharsets.UTF_8.name());
       contentFormat.setMimetype("text/plain");
       
       ContentServiceSoapBindingStub contentService = WebServiceFactory.getContentService();

diff --git a/connectors/amazoncloudsearch/build.xml b/connectors/amazoncloudsearch/build.xml
index 189e0ca..62dabb2 100644
--- a/connectors/amazoncloudsearch/build.xml
+++ b/connectors/amazoncloudsearch/build.xml

@@ -37,42 +37,6 @@
             <include name="jackson-core*.jar"/>
             <include name="jackson-databind*.jar"/>
             <include name="jackson-annotations*.jar"/>
-            <include name="tika-core*.jar"/>
-            <include name="tika-parsers*.jar"/>
-            <include name="tagsoup*.jar"/>
-            <include name="poi*.jar"/>
-            <include name="vorbis-java-tika*.jar"/>
-            <include name="vorbis-java-core*.jar"/>
-            <include name="netcdf*.jar"/>
-            <include name="apache-mime4j-core*.jar"/>
-            <include name="apache-mime4j-dom*.jar"/>
-            <include name="commons-compress*.jar"/>
-            <include name="commons-codec*.jar"/>
-            <include name="pdfbox*.jar"/>
-            <include name="fontbox*.jar"/>
-            <include name="jempbox*.jar"/>
-            <include name="commons-logging*.jar"/>
-            <include name="bcmail-jdk15*.jar"/>
-            <include name="bcprov-jdk15*.jar"/>
-            <include name="poi-scratchpad*.jar"/>
-            <include name="poi-ooxml*.jar"/>
-            <include name="poi-ooxml-schemas*.jar"/>
-            <include name="xmlbeans*.jar"/>
-            <include name="dom4j*.jar"/>
-            <include name="geronimo-stax-api_1.0_spec*.jar"/>
-            <include name="asm-debug-all*.jar"/>
-            <include name="isoparser*.jar"/>
-            <include name="aspectjrt*.jar"/>
-            <include name="metadata-extractor*.jar"/>
-            <include name="xmpcore*.jar"/>
-            <include name="xml-apis*.jar"/>
-            <include name="boilerpipe*.jar"/>
-            <include name="rome*.jar"/>
-            <include name="jdom*.jar"/>
-            <include name="xercesImpl*.jar"/>
-            <include name="vorbis-java-core*.jar"/>
-            <include name="juniversalchardet*.jar"/>
-            <include name="jhighlight*.jar"/>
         </fileset>
     </path>
 
@@ -84,42 +48,6 @@
                 <include name="jackson-core*.jar"/>
                 <include name="jackson-databind*.jar"/>
                 <include name="jackson-annotations*.jar"/>
-                <include name="tika-core*.jar"/>
-                <include name="tika-parsers*.jar"/>
-                <include name="tagsoup*.jar"/>
-                <include name="poi*.jar"/>
-                <include name="vorbis-java-tika*.jar"/>
-                <include name="vorbis-java-core*.jar"/>
-                <include name="netcdf*.jar"/>
-                <include name="apache-mime4j-core*.jar"/>
-                <include name="apache-mime4j-dom*.jar"/>
-                <include name="commons-compress*.jar"/>
-                <include name="commons-codec*.jar"/>
-                <include name="pdfbox*.jar"/>
-                <include name="fontbox*.jar"/>
-                <include name="jempbox*.jar"/>
-                <include name="commons-logging*.jar"/>
-                <include name="bcmail-jdk15*.jar"/>
-                <include name="bcprov-jdk15*.jar"/>
-                <include name="poi-scratchpad*.jar"/>
-                <include name="poi-ooxml*.jar"/>
-                <include name="poi-ooxml-schemas*.jar"/>
-                <include name="xmlbeans*.jar"/>
-                <include name="dom4j*.jar"/>
-                <include name="geronimo-stax-api_1.0_spec*.jar"/>
-                <include name="asm-debug-all*.jar"/>
-                <include name="isoparser*.jar"/>
-                <include name="aspectjrt*.jar"/>
-                <include name="metadata-extractor*.jar"/>
-                <include name="xmpcore*.jar"/>
-                <include name="xml-apis*.jar"/>
-                <include name="boilerpipe*.jar"/>
-                <include name="rome*.jar"/>
-                <include name="jdom*.jar"/>
-                <include name="xercesImpl*.jar"/>
-                <include name="vorbis-java-core*.jar"/>
-                <include name="juniversalchardet*.jar"/>
-                <include name="jhighlight*.jar"/>
             </fileset>
         </copy>
     </target>

diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
index 80a9af7..9d916f0 100644
--- a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java

@@ -37,16 +37,5 @@
   public static final String PROXY_PORT_DEFAULT = "";
   
   // Specification nodes and values
-  public static final String NODE_MAXLENGTH = "maxlength";
-  public static final String MAXLENGTH_DEFAULT = "";
-  public static final String NODE_MIMETYPES = "mimetypes";
-  public static final String MIMETYPES_DEFAULT = "";
-  public static final String NODE_EXTENSIONS = "extensions";
-  public static final String EXTENSIONS_DEFAULT = "";
-  public static final String NODE_FIELDMAP = "fieldmap";
-  public static final String NODE_KEEPMETADATA = "keepAllMetadata";
-  public static final String ATTRIBUTE_SOURCE = "source";
-  public static final String ATTRIBUTE_TARGET = "target";
-  public static final String ATTRIBUTE_VALUE = "value";
   
 }

diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
index cd0b44c..3ab9f66 100644
--- a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java

@@ -17,58 +17,61 @@
 * limitations under the License.
 */
 package org.apache.manifoldcf.agents.output.amazoncloudsearch;
-

-import java.io.IOException;

-import java.io.InputStream;

+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
 import java.io.InterruptedIOException;
 import java.io.StringReader;
-import java.io.BufferedReader;

-import java.util.ArrayList;

-import java.util.HashMap;

+import java.io.BufferedReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
-import java.util.List;

+import java.util.List;
 import java.util.Map;
 import java.util.Locale;
 import java.util.Set;
 import java.util.HashSet;
+import java.util.Date;
 
-import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.input.ReaderInputStream;
+
 import org.apache.http.Consts;
 import org.apache.http.HttpEntity;
-import org.apache.http.HttpHost;

-import org.apache.http.HttpResponse;

-import org.apache.http.client.ClientProtocolException;

-import org.apache.http.client.config.RequestConfig;

-import org.apache.http.client.methods.HttpPost;

-import org.apache.http.entity.StringEntity;

+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.entity.InputStreamEntity;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.apache.http.util.EntityUtils;
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
+import org.apache.manifoldcf.agents.interfaces.IOutputCheckActivity;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
-import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
-import org.apache.manifoldcf.core.interfaces.ConfigParams;

+import org.apache.manifoldcf.core.interfaces.Specification;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
-import org.apache.manifoldcf.core.interfaces.ManifoldCFException;

+import org.apache.manifoldcf.core.interfaces.DBInterfaceFactory;
+import org.apache.manifoldcf.core.interfaces.IDBInterface;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
 import org.apache.manifoldcf.core.interfaces.IPostParameters;
 import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
-import org.apache.manifoldcf.core.system.ManifoldCF;
-import org.apache.manifoldcf.crawler.system.Logging;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.AutoDetectParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
+import org.apache.manifoldcf.core.interfaces.BinaryInput;
+import org.apache.manifoldcf.core.interfaces.TempFileInput;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
+import org.apache.manifoldcf.agents.system.ManifoldCF;
+import org.apache.manifoldcf.agents.system.Logging;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParseException;
@@ -79,6 +82,8 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 
+import org.apache.manifoldcf.core.jsongen.*;
+
 public class AmazonCloudSearchConnector extends BaseOutputConnector {
 
   /** Ingestion activity */
@@ -95,17 +100,16 @@
   /** Forward to the HTML template to view the configuration parameters */
   private static final String VIEW_CONFIGURATION_HTML = "viewConfiguration.html";
 
-  /** Forward to the javascript to check the specification parameters for the job */
-  private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
-  
-  private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
-  private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
-  
-  private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
-  
   /** Local connection */
   protected HttpPost poster = null;
   
+  // What we need for database keys
+  protected String serverHost = null;
+  protected String serverPath = null;
+  
+  /** Document Chunk Manager */
+  private DocumentChunkManager documentChunkManager = null;
+  
   /** cloudsearch field name for file body text. */
   private static final String FILE_BODY_TEXT_FIELDNAME = "f_bodytext";
   
@@ -114,6 +118,42 @@
   public AmazonCloudSearchConnector(){
   }
   
+  /** Clear out any state information specific to a given thread.
+  * This method is called when this object is returned to the connection pool.
+  */
+  @Override
+  public void clearThreadContext()
+  {
+    super.clearThreadContext();
+    documentChunkManager = null;
+  }
+
+  @Override
+  public void install(IThreadContext threadContext) 
+      throws ManifoldCFException
+  {
+    IDBInterface mainDatabase = DBInterfaceFactory.make(threadContext,
+      ManifoldCF.getMasterDatabaseName(),
+      ManifoldCF.getMasterDatabaseUsername(),
+      ManifoldCF.getMasterDatabasePassword());
+    
+    DocumentChunkManager dcmanager = new DocumentChunkManager(mainDatabase);
+    dcmanager.install();
+  }
+
+  @Override
+  public void deinstall(IThreadContext threadContext)
+      throws ManifoldCFException
+  {
+    IDBInterface mainDatabase = DBInterfaceFactory.make(threadContext,
+        ManifoldCF.getMasterDatabaseName(),
+        ManifoldCF.getMasterDatabaseUsername(),
+        ManifoldCF.getMasterDatabasePassword());
+      
+    DocumentChunkManager dcmanager = new DocumentChunkManager(mainDatabase);
+    dcmanager.deinstall();
+  }
+
   /** Return the list of activities that this connector supports (i.e. writes into the log).
   *@return the list.
   */
@@ -150,10 +190,9 @@
   public void disconnect()
     throws ManifoldCFException
   {
-    if (poster != null)
-    {
-      poster = null;
-    }
+    serverHost = null;
+    serverPath = null;
+    poster = null;
     super.disconnect();
   }
 
@@ -161,10 +200,19 @@
   protected void getSession()
     throws ManifoldCFException
   {
-    String serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+    if (documentChunkManager == null)
+    {
+      IDBInterface databaseHandle = DBInterfaceFactory.make(currentContext,
+        ManifoldCF.getMasterDatabaseName(),
+        ManifoldCF.getMasterDatabaseUsername(),
+        ManifoldCF.getMasterDatabasePassword());
+      documentChunkManager = new DocumentChunkManager(databaseHandle);
+    }
+
+    serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
     if (serverHost == null)
       throw new ManifoldCFException("Server host parameter required");
-    String serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+    serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
     if (serverPath == null)
       throw new ManifoldCFException("Server path parameter required");
     String proxyProtocol = params.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
@@ -193,7 +241,7 @@
     
     poster.addHeader("Content-Type", "application/json");
   }
-
+  
   /** Test the connection.  Returns a string describing the connection integrity.
   *@return the connection's status as a displayable string.
   */
@@ -201,132 +249,81 @@
   public String check() throws ManifoldCFException {
     try {
       getSession();
-      String responsbody = postData("[]");
+      String responsbody = postData(new ReaderInputStream(new StringReader("[]"),Consts.UTF_8));
       String status = "";
+      
       try
       {
         status = getStatusFromJsonResponse(responsbody);
       } catch (ManifoldCFException e)
       {
-        Logging.connectors.debug(e);
+        Logging.ingest.debug(e);
         return "Could not get status from response body. Check Access Policy setting of your domain of Amazon CloudSearch.: " + e.getMessage();
       }
-          
-      // check status message
-      String message = "";
-      if ("error".equals(status)) {
-        JsonParser parser = new JsonFactory().createJsonParser(responsbody);
-        while (parser.nextToken() != JsonToken.END_OBJECT) {
-          String name = parser.getCurrentName();
-          if ("errors".equalsIgnoreCase(name)) {
-            message = parseMessage(parser);
-            break;
-          }
-        }
-      }
-      if ("error".equalsIgnoreCase(status)
-          && "batch must contain at least one operation".equals(message)) {
-        return "Connection working.";
-      }
-      return "Connection NOT working.";
       
-    } catch (ClientProtocolException e) {
-      Logging.connectors.debug(e);
-      return "Protocol exception: "+e.getMessage();
-    } catch (IOException e) {
-      Logging.connectors.debug(e);
-      return "IO exception: "+e.getMessage();
+      if ("error".equalsIgnoreCase(status)) {
+        return "Connection working. responsbody : " + responsbody;
+      }
+      return "Connection NOT working. responsbody : " + responsbody;
+      
     } catch (ServiceInterruption e) {
-      Logging.connectors.debug(e);
+      Logging.ingest.debug(e);
       return "Transient exception: "+e.getMessage();
     }
-  }

-  

-  private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {

-    try {

-      JsonParser parser = new JsonFactory().createJsonParser(responsbody);

-      while (parser.nextToken() != JsonToken.END_OBJECT)

-      {

-        String name = parser.getCurrentName();

-        if("status".equalsIgnoreCase(name)){

-          parser.nextToken();

-          return parser.getText();

-        }

-      }

-    } catch (JsonParseException e) {

-      throw new ManifoldCFException(e);

-    } catch (IOException e) {

-      throw new ManifoldCFException(e);

-    }

-    return null;

-  }

-  

-  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {

-    while(parser.nextToken() != JsonToken.END_ARRAY){

-      String name = parser.getCurrentName();

-      if("message".equalsIgnoreCase(name)){

-        parser.nextToken();

-        return parser.getText();

-      }

-    }

-    return null;

+  }
+  
+  private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {
+    try {
+      JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+      while (parser.nextToken() != JsonToken.END_OBJECT)
+      {
+        String name = parser.getCurrentName();
+        if("status".equalsIgnoreCase(name)){
+          parser.nextToken();
+          return parser.getText();
+        }
+      }
+    } catch (JsonParseException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      throw new ManifoldCFException(e);
+    }
+    return null;
+  }
+  
+  private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
+    while(parser.nextToken() != JsonToken.END_ARRAY){
+      String name = parser.getCurrentName();
+      if("message".equalsIgnoreCase(name)){
+        parser.nextToken();
+        return parser.getText();
+      }
+    }
+    return null;
   }
 
-  /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
-  * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
-  * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
-  * is used to describe the version of the actual document.
-  *
-  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
-  * necessary.
-  *@param os is the current output specification for the job that is doing the crawling.
-  *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
-  * the document will not need to be sent again to the output data store.
-  */
-  @Override
-  public String getOutputDescription(OutputSpecification os)
-    throws ManifoldCFException, ServiceInterruption
+  private final static Set<String> acceptableMimeTypes = new HashSet<String>();
+  static
   {
-    SpecPacker sp = new SpecPacker(os);
-    return sp.toPackedString();
+    acceptableMimeTypes.add("text/plain;charset=utf-8");
+    acceptableMimeTypes.add("text/plain;charset=ascii");
+    acceptableMimeTypes.add("text/plain;charset=us-ascii");
+    acceptableMimeTypes.add("text/plain");
   }
-
+  
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
   * unusable documents that will be passed to this output connector.
   *@param outputDescription is the document's output version.
   *@param mimeType is the mime type of the document.
   *@return true if the mime type is indexable by this connector.
   */
-  public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
+  @Override
+  public boolean checkMimeTypeIndexable(VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
-    SpecPacker sp = new SpecPacker(outputDescription);
-    if (sp.checkMimeType(mimeType))
-      return super.checkMimeTypeIndexable(outputDescription, mimeType);
-    else
-      return false;
+    return acceptableMimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
   }
 
-  @Override
-  public boolean checkLengthIndexable(String outputDescription, long length)
-    throws ManifoldCFException, ServiceInterruption {
-    SpecPacker sp = new SpecPacker(outputDescription);
-    if (sp.checkLengthIndexable(length))
-      return super.checkLengthIndexable(outputDescription, length);
-    else
-      return false;
-  }
-
-  @Override
-  public boolean checkURLIndexable(String outputDescription, String url)
-    throws ManifoldCFException, ServiceInterruption {
-    SpecPacker sp = new SpecPacker(outputDescription);
-    if (sp.checkURLIndexable(url))
-      return super.checkURLIndexable(outputDescription, url);
-    else
-      return false;
-  }
-  
   /** Add (or replace) a document in the output data store using the connector.
   * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
   * necessary.
@@ -342,118 +339,67 @@
   *@return the document status (accepted or permanently rejected).
   */
   @Override
-  public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {

-    // Establish a session

+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
+  {
+    // Establish a session
     getSession();
     
-    SpecPacker sp = new SpecPacker(outputDescription);
-    
-    String jsondata = "";

-    try {
-      //build json..
-      SDFModel model = new SDFModel();
-      Document doc = model.new Document();
-      doc.setType("add");
-      doc.setId(ManifoldCF.hash(documentURI));
-      
-      HashMap fields = new HashMap();
-      Metadata metadata = extractBinaryFile(document, fields);
-      
-      Iterator<String> itr = document.getFields();
-      while(itr.hasNext())
-      {
-        String fName = itr.next();
-        Object[] value = document.getField(fName);
-        String target = sp.getMapping(fName);
-        if(target!=null)
-        {
-          fields.put(target, value);
-        }
-        else
-        {
-          if(sp.keepAllMetadata())
-          {
-            fields.put(fName, value);
-          }
-        }
-      }
-      
-      //metadata of binary files.
-      String[] metaNames = metadata.names();
-      for(String mName : metaNames){
-        String value = metadata.get(mName);
-        String target = sp.getMapping(mName);
-        if(target!=null)
-        {
-          fields.put(target, value);
-        }
-        else
-        {
-          if(sp.keepAllMetadata())
-          {
-            fields.put(mName, value);
-          }
-        }
-      }
-      doc.setFields(fields);
-      model.addDocument(doc);

-      

-      //generate json data.

-      jsondata = model.toJSON();

-    } 

-    catch (SAXException e) {

-      // if document data could not be converted to JSON by jackson.
-      Logging.connectors.debug(e);
-      throw new ManifoldCFException(e);

-    } catch (JsonProcessingException e) {

-      // if document data could not be converted to JSON by jackson.
-      Logging.connectors.debug(e);
-      throw new ManifoldCFException(e);

-    } catch (TikaException e) {

-      // if document could not be parsed by tika.
-      Logging.connectors.debug(e);

-      return DOCUMENTSTATUS_REJECTED;

-    } catch (IOException e) {

-      // if document data could not be read when the document parsing by tika.
-      Logging.connectors.debug(e);
-      throw new ManifoldCFException(e);

-    }

-    

-    //post data..

-    String responsbody = postData(jsondata);

-    

-    // check status

-    String status = getStatusFromJsonResponse(responsbody);

-    if("success".equals(status))

-    {

-      activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);

-      return DOCUMENTSTATUS_ACCEPTED;

-    }

-    else {

-      throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);

-    }

-  }
+    String uid = ManifoldCF.hash(documentURI);
 
-  private Metadata extractBinaryFile(RepositoryDocument document, HashMap fields)
-      throws IOException, SAXException, TikaException {
+    // Build a JSON generator
+    JSONObjectReader objectReader = new JSONObjectReader();
+    // Build the metadata field part
+    JSONObjectReader fieldReader = new JSONObjectReader();
+    // Add the type and ID
+    objectReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader("id"),new JSONStringReader(uid)))
+      .addNameValuePair(new JSONNameValueReader(new JSONStringReader("type"),new JSONStringReader("add")))
+      .addNameValuePair(new JSONNameValueReader(new JSONStringReader("fields"),fieldReader));
     
-    //extract body text and metadata fields from binary file.
-    InputStream is = document.getBinaryStream();
-    Parser parser = new AutoDetectParser();
-    ContentHandler handler = new BodyContentHandler();
-    Metadata metadata = new Metadata();
-    parser.parse(is, handler, metadata, new ParseContext());
-    String bodyStr = handler.toString();
-    if(bodyStr != null){
-      bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
-      fields.put(FILE_BODY_TEXT_FIELDNAME, bodyStr);
+    // Populate the fields...
+    Iterator<String> itr = document.getFields();
+    while (itr.hasNext())
+    {
+      String fieldName = itr.next();
+      Object[] fieldValues = document.getField(fieldName);
+      JSONReader[] elements = new JSONReader[fieldValues.length];
+      if (fieldValues instanceof Reader[])
+      {
+        for (int i = 0; i < elements.length; i++)
+        {
+          elements[i] = new JSONStringReader((Reader)fieldValues[i]);
+        }
+      }
+      else if (fieldValues instanceof Date[])
+      {
+        for (int i = 0; i < elements.length; i++)
+        {
+          elements[i] = new JSONStringReader(((Date)fieldValues[i]).toString());
+        }
+      }
+      else if (fieldValues instanceof String[])
+      {
+        for (int i = 0; i < elements.length; i++)
+        {
+          elements[i] = new JSONStringReader((String)fieldValues[i]);
+        }
+      }
+      else
+        throw new IllegalStateException("Unexpected metadata type: "+fieldValues.getClass().getName());
+      
+      fieldReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader(fieldName),new JSONArrayReader(elements)));
     }
-    return metadata;
-  }

-

-  /** Remove a document using the connector.

+    
+    // Add the primary content data in.
+    fieldReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader(FILE_BODY_TEXT_FIELDNAME),
+      new JSONStringReader(new InputStreamReader(document.getBinaryStream(),Consts.UTF_8))));
+    
+    documentChunkManager.recordDocument(uid, serverHost, serverPath, new ReaderInputStream(objectReader, Consts.UTF_8));
+    conditionallyFlushDocuments();
+    return DOCUMENTSTATUS_ACCEPTED;
+  }
+  
+  /** Remove a document using the connector.
   * Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
   *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
   * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
@@ -467,29 +413,107 @@
     // Establish a session
     getSession();
     
-    String jsonData = "";
-    try {
-      SDFModel model = new SDFModel();
-      SDFModel.Document doc = model.new Document();
-      doc.setType("delete");
-      doc.setId(documentURI);
-      model.addDocument(doc);
-      jsonData = model.toJSON();
-    } catch (JsonProcessingException e) {
-      throw new ManifoldCFException(e);
-    }

-    String responsbody = postData(jsonData);

-    

-    // check status

-    String status = getStatusFromJsonResponse(responsbody);

-    if("success".equals(status))

-    {

-      activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);

-    }

-    else {

-      throw new ManifoldCFException("recieved error status from service after feeding document.");

-    }

-  }

+    String uid = ManifoldCF.hash(documentURI);
+
+    // Build a JSON generator
+    JSONObjectReader objectReader = new JSONObjectReader();
+    // Add the type and ID
+    objectReader.addNameValuePair(new JSONNameValueReader(new JSONStringReader("id"),new JSONStringReader(uid)))
+      .addNameValuePair(new JSONNameValueReader(new JSONStringReader("type"),new JSONStringReader("delete")));
+
+    try
+    {
+      documentChunkManager.recordDocument(uid, serverHost, serverPath, new ReaderInputStream(objectReader, Consts.UTF_8));
+    }
+    catch (IOException e)
+    {
+      handleIOException(e);
+    }
+    conditionallyFlushDocuments();
+  }
+  
+  @Override
+  public void noteJobComplete(IOutputNotifyActivity activities)
+      throws ManifoldCFException, ServiceInterruption {
+    getSession();
+    flushDocuments();
+  }
+  
+  protected static final int CHUNK_SIZE = 1000;
+
+  protected void conditionallyFlushDocuments()
+    throws ManifoldCFException, ServiceInterruption
+  {
+    if (documentChunkManager.equalOrMoreThan(serverHost, serverPath, CHUNK_SIZE))
+      flushDocuments();
+  }
+  
+  protected void flushDocuments()
+    throws ManifoldCFException, ServiceInterruption
+  {
+    Logging.ingest.info("AmazonCloudSearch: Starting flush to Amazon");
+
+    // Repeat until we are empty of cached stuff
+    int chunkNumber = 0;
+    while (true)
+    {
+      DocumentRecord[] records = documentChunkManager.readChunk(serverHost, serverPath, CHUNK_SIZE);
+      try
+      {
+        if (records.length == 0)
+          break;
+        // The records consist of up to 1000 individual input streams, which must be all concatenated together into the post
+        // To do that, we go into and out of Reader space once again...
+        JSONArrayReader arrayReader = new JSONArrayReader();
+        for (DocumentRecord dr : records)
+        {
+          arrayReader.addArrayElement(new JSONValueReader(new InputStreamReader(dr.getDataStream(),Consts.UTF_8)));
+        }
+        
+        //post data..
+        String responsbody = postData(new ReaderInputStream(arrayReader,Consts.UTF_8));
+        // check status
+        String status = getStatusFromJsonResponse(responsbody);
+        if("success".equals(status))
+        {
+          Logging.ingest.info("AmazonCloudSearch: Successfully sent document chunk " + chunkNumber);
+          //remove documents from table..
+          documentChunkManager.deleteChunk(records);
+        }
+        else
+        {
+          Logging.ingest.error("AmazonCloudSearch: Error sending document chunk "+ chunkNumber+": "+ responsbody);
+          throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);
+        }
+      }
+      finally
+      {
+        Throwable exception = null;
+        for (DocumentRecord dr : records)
+        {
+          try
+          {
+            dr.close();
+          }
+          catch (Throwable e)
+          {
+            exception = e;
+          }
+        }
+        if (exception != null)
+        {
+          if (exception instanceof ManifoldCFException)
+            throw (ManifoldCFException)exception;
+          else if (exception instanceof Error)
+            throw (Error)exception;
+          else if (exception instanceof RuntimeException)
+            throw (RuntimeException)exception;
+          else
+            throw new RuntimeException("Unknown exception class thrown: "+exception.getClass().getName()+": "+exception.getMessage(),exception);
+        }
+      }
+    }
+  }
 
   /**
    * Fill in a Server tab configuration parameter map for calling a Velocity
@@ -635,492 +659,69 @@
 
     return null;
   }
-

-  private String postData(String jsonData) throws ServiceInterruption, ManifoldCFException {

-    CloseableHttpClient httpclient = HttpClients.createDefault();

-    try {

-      poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));

-      HttpResponse res = httpclient.execute(poster);
-      

-      HttpEntity resEntity = res.getEntity();

-      return EntityUtils.toString(resEntity);

-    } catch (ClientProtocolException e) {

-      throw new ManifoldCFException(e);

-    } catch (IOException e) {

-      handleIOException(e);

-    } finally {

-      try {

-        httpclient.close();

-      } catch (IOException e) {
-        //do nothing

-      }

-    }

-    return null;

-  }

-  

-  private static void handleIOException(IOException e)

-      throws ManifoldCFException, ServiceInterruption {

-    if (!(e instanceof java.net.SocketTimeoutException)

-        && (e instanceof InterruptedIOException)) {

-      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,

-          ManifoldCFException.INTERRUPTED);

-    }

-    Logging.connectors.warn(

-        "Amazon CloudSearch: IO exception: " + e.getMessage(), e);

-    long currentTime = System.currentTimeMillis();

-    throw new ServiceInterruption("IO exception: " + e.getMessage(), e,

-        currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);

-  }

-  
-  protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
-  {
-    // Prep for field mappings
-    List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
-    String keepAllMetadataValue = "true";
-    for (int i = 0; i < os.getChildCount(); i++)
-    {
-      SpecificationNode sn = os.getChild(i);
-      if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
-        String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
-        String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
-        String targetDisplay;
-        if (target == null)
-        {
-          target = "";
-          targetDisplay = "(remove)";
-        }
-        else
-          targetDisplay = target;
-        Map<String,String> fieldMapping = new HashMap<String,String>();
-        fieldMapping.put("SOURCE",source);
-        fieldMapping.put("TARGET",target);
-        fieldMapping.put("TARGETDISPLAY",targetDisplay);
-        fieldMappings.add(fieldMapping);
-      }
-      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
+
+  private String postData(InputStream jsonData) throws ServiceInterruption, ManifoldCFException {
+    CloseableHttpClient httpclient = HttpClients.createDefault();
+    try {
+      BinaryInput bi = new TempFileInput(jsonData);
+      try
       {
-        keepAllMetadataValue = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
-      }
-    }
-    paramMap.put("FIELDMAPPINGS",fieldMappings);
-    paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
-  }
-  
-  protected static void fillInContentsSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
-  {
-    String maxFileSize = AmazonCloudSearchConfig.MAXLENGTH_DEFAULT;
-    String allowedMimeTypes = AmazonCloudSearchConfig.MIMETYPES_DEFAULT;
-    String allowedFileExtensions = AmazonCloudSearchConfig.EXTENSIONS_DEFAULT;
-    for (int i = 0; i < os.getChildCount(); i++)
-    {
-      SpecificationNode sn = os.getChild(i);
-      if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
-        maxFileSize = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
-      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
-        allowedMimeTypes = sn.getValue();
-      else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
-        allowedFileExtensions = sn.getValue();
-    }
-    paramMap.put("MAXFILESIZE",maxFileSize);
-    paramMap.put("MIMETYPES",allowedMimeTypes);
-    paramMap.put("EXTENSIONS",allowedFileExtensions);
-  }
-  
-  /**
-   * Output the specification header section. This method is called in the head
-   * section of a job page which has selected an output connection of the
-   * current type. Its purpose is to add the required tabs to the list, and to
-   * output any javascript methods that might be needed by the job editing HTML.
-   * 
-   * @param out is the output to which any HTML should be sent.
-   * @param os is the current output specification for this job.
-   * @param tabsArray is an array of tab names. Add to this array any tab names
-   *        that are specific to the connector.
-   */
-  @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale,
-      OutputSpecification os, List<String> tabsArray)
-      throws ManifoldCFException, IOException
-  {
-    Map<String, Object> paramMap = new HashMap<String, Object>();
-
-    tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.FieldMappingTabName"));
-    tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ContentsTabName"));
-
-    // Fill in the specification header map, using data from all tabs.
-    fillInFieldMappingSpecificationMap(paramMap, os);
-    fillInContentsSpecificationMap(paramMap, os);
-
-    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
-  }
-  
-  /** Output the specification body section.
-  * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-  * form is "editjob".
-  *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
-  *@param tabName is the current tab name.
-  */
-  @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
-    throws ManifoldCFException, IOException
-  {
-    Map<String, Object> paramMap = new HashMap<String, Object>();
-
-    // Set the tab name
-    paramMap.put("TABNAME", tabName);
-
-    // Fill in the field mapping tab data
-    fillInFieldMappingSpecificationMap(paramMap, os);
-    fillInContentsSpecificationMap(paramMap, os);
-    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_CONTENTS_HTML,paramMap);
-    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
-  }
-
-  /** Process a specification post.
-  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-  * posted.  Its purpose is to gather form information and modify the output specification accordingly.
-  * The name of the posted form is "editjob".
-  *@param variableContext contains the post data, including binary file-upload information.
-  *@param os is the current output specification for this job.
-  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
-  */
-  @Override
-  public String processSpecificationPost(IPostParameters variableContext,
-    Locale locale, OutputSpecification os) throws ManifoldCFException {
-    String x;
+        poster.setEntity(new InputStreamEntity(bi.getStream(),bi.getLength()));
+        HttpResponse res = httpclient.execute(poster);
         
-    x = variableContext.getParameter("maxfilesize");
-    if (x != null)
-    {
-      int i = 0;
-      while (i < os.getChildCount())
-      {
-        SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
-          os.removeChild(i);
-        else
-          i++;
+        HttpEntity resEntity = res.getEntity();
+        return EntityUtils.toString(resEntity);
       }
-      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MAXLENGTH);
-      sn.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE,x);
-      os.addChild(os.getChildCount(),sn);
+      finally
+      {
+        bi.discard();
+      }
+    } catch (ClientProtocolException e) {
+      throw new ManifoldCFException(e);
+    } catch (IOException e) {
+      handleIOException(e);
+    } finally {
+      try {
+        httpclient.close();
+      } catch (IOException e) {
+        //do nothing
+      }
     }
-
-    x = variableContext.getParameter("mimetypes");
-    if (x != null)
-    {
-      int i = 0;
-      while (i < os.getChildCount())
-      {
-        SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
-          os.removeChild(i);
-        else
-          i++;
-      }
-      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MIMETYPES);
-      sn.setValue(x);
-      os.addChild(os.getChildCount(),sn);
-    }
-
-    x = variableContext.getParameter("extensions");
-    if (x != null)
-    {
-      int i = 0;
-      while (i < os.getChildCount())
-      {
-        SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
-          os.removeChild(i);
-        else
-          i++;
-      }
-      SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_EXTENSIONS);
-      sn.setValue(x);
-      os.addChild(os.getChildCount(),sn);
-    }
-    
-    x = variableContext.getParameter("cloudsearch_fieldmapping_count");
-    if (x != null && x.length() > 0)
-    {
-      // About to gather the fieldmapping nodes, so get rid of the old ones.
-      int i = 0;
-      while (i < os.getChildCount())
-      {
-        SpecificationNode node = os.getChild(i);
-        if (node.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP) || node.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
-          os.removeChild(i);
-        else
-          i++;
-      }
-      int count = Integer.parseInt(x);
-      i = 0;
-      while (i < count)
-      {
-        String prefix = "cloudsearch_fieldmapping_";
-        String suffix = "_"+Integer.toString(i);
-        String op = variableContext.getParameter(prefix+"op"+suffix);
-        if (op == null || !op.equals("Delete"))
-        {
-          // Gather the fieldmap etc.
-          String source = variableContext.getParameter(prefix+"source"+suffix);
-          String target = variableContext.getParameter(prefix+"target"+suffix);
-          if (target == null)
-            target = "";
-          SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
-          node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
-          node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
-          os.addChild(os.getChildCount(),node);
-        }
-        i++;
-      }
-      
-      String addop = variableContext.getParameter("cloudsearch_fieldmapping_op");
-      if (addop != null && addop.equals("Add"))
-      {
-        String source = variableContext.getParameter("cloudsearch_fieldmapping_source");
-        String target = variableContext.getParameter("cloudsearch_fieldmapping_target");
-        if (target == null)
-          target = "";
-        SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
-        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
-        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
-        os.addChild(os.getChildCount(),node);
-      }
-      
-      // Gather the keep all metadata parameter to be the last one
-      SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_KEEPMETADATA);
-      String keepAll = variableContext.getParameter("cloudsearch_keepallmetadata");
-      if (keepAll != null)
-      {
-        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, keepAll);
-      }
-      else
-      {
-        node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, "false");
-      }
-      // Add the new keepallmetadata config parameter 
-      os.addChild(os.getChildCount(), node);
-    }
-    
     return null;
   }
   
-
-  /** View specification.
-  * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
-  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-  *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  private static void handleIOException(IOException e)
+      throws ManifoldCFException, ServiceInterruption {
+    if (!(e instanceof java.net.SocketTimeoutException)
+        && (e instanceof InterruptedIOException)) {
+      throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+          ManifoldCFException.INTERRUPTED);
+    }
+    Logging.ingest.warn(
+        "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
+    long currentTime = System.currentTimeMillis();
+    throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+        currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
+  }
+  
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
   */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
-    throws ManifoldCFException, IOException
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
   {
-    Map<String, Object> paramMap = new HashMap<String, Object>();
-
-    // Fill in the map with data from all tabs
-    fillInFieldMappingSpecificationMap(paramMap, os);
-    fillInContentsSpecificationMap(paramMap, os);
-
-    Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
-    
+    return "s"+connectionSequenceNumber+"_checkSpecification";
   }
-  
-  protected static void fillSet(Set<String> set, String input) {
-    try
-    {
-      StringReader sr = new StringReader(input);
-      BufferedReader br = new BufferedReader(sr);
-      String line = null;
-      while ((line = br.readLine()) != null)
-      {
-        line = line.trim();
-        if (line.length() > 0)
-          set.add(line);
-      }
-    }
-    catch (IOException e)
-    {
-      // Should never happen
-      throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
-    }
-  }
-  
-  protected static class SpecPacker {
-    
-    private final Map<String,String> sourceTargets = new HashMap<String,String>();
-    private final boolean keepAllMetadata;
-    private final Set<String> extensions = new HashSet<String>();
-    private final Set<String> mimeTypes = new HashSet<String>();
-    private final Long lengthCutoff;
-    
-    public SpecPacker(OutputSpecification os) {
-      boolean keepAllMetadata = true;
-      Long lengthCutoff = null;
-      String extensions = null;
-      String mimeTypes = null;
-      for (int i = 0; i < os.getChildCount(); i++) {
-        SpecificationNode sn = os.getChild(i);
-        
-        if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
-          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
-          keepAllMetadata = Boolean.parseBoolean(value);
-        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
-          String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
-          String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
-          
-          if (target == null) {
-            target = "";
-          }
-          sourceTargets.put(source, target);
-        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
-          mimeTypes = sn.getValue();
-        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
-          extensions = sn.getValue();
-        } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH)) {
-          String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
-          lengthCutoff = new Long(value);
-        }
-      }
-      this.keepAllMetadata = keepAllMetadata;
-      this.lengthCutoff = lengthCutoff;
-      fillSet(this.extensions, extensions);
-      fillSet(this.mimeTypes, mimeTypes);
-    }
-    
-    public SpecPacker(String packedString) {
-      
-      int index = 0;
-      
-      // Mappings
-      final List<String> packedMappings = new ArrayList<String>();
-      index = unpackList(packedMappings,packedString,index,'+');
-      String[] fixedList = new String[2];
-      for (String packedMapping : packedMappings) {
-        unpackFixedList(fixedList,packedMapping,0,':');
-        sourceTargets.put(fixedList[0], fixedList[1]);
-      }
-      
-      // Keep all metadata
-      if (packedString.length() > index)
-        keepAllMetadata = (packedString.charAt(index++) == '+');
-      else
-        keepAllMetadata = true;
-      
-      // Max length
-      final StringBuilder sb = new StringBuilder();
-      if (packedString.length() > index) {
-        if (packedString.charAt(index++) == '+') {
-          index = unpack(sb,packedString,index,'+');
-          this.lengthCutoff = new Long(sb.toString());
-        } else
-          this.lengthCutoff = null;
-      } else
-        this.lengthCutoff = null;
-      
-      // Mime types
-      final List<String> mimeBuffer = new ArrayList<String>();
-      index = unpackList(mimeBuffer,packedString,index,'+');
-      for (String mimeType : mimeBuffer) {
-        this.mimeTypes.add(mimeType);
-      }
-      
-      // Extensions
-      final List<String> extensionsBuffer = new ArrayList<String>();
-      index = unpackList(extensionsBuffer,packedString,index,'+');
-      for (String extension : extensionsBuffer) {
-        this.extensions.add(extension);
-      }
-    }
-    
-    public String toPackedString() {
-      StringBuilder sb = new StringBuilder();
-      int i;
-      
-      // Mappings
-      final String[] sortArray = new String[sourceTargets.size()];
-      i = 0;
-      for (String source : sourceTargets.keySet()) {
-        sortArray[i++] = source;
-      }
-      java.util.Arrays.sort(sortArray);
-      
-      List<String> packedMappings = new ArrayList<String>();
-      String[] fixedList = new String[2];
-      for (String source : sortArray) {
-        String target = sourceTargets.get(source);
-        StringBuilder localBuffer = new StringBuilder();
-        fixedList[0] = source;
-        fixedList[1] = target;
-        packFixedList(localBuffer,fixedList,':');
-        packedMappings.add(localBuffer.toString());
-      }
-      packList(sb,packedMappings,'+');
 
-      // Keep all metadata
-      if (keepAllMetadata)
-        sb.append('+');
-      else
-        sb.append('-');
-      
-      // Max length
-      if (lengthCutoff == null)
-        sb.append('-');
-      else {
-        sb.append('+');
-        pack(sb,lengthCutoff.toString(),'+');
-      }
-      
-      // Mime types
-      String[] mimeTypes = new String[this.mimeTypes.size()];
-      i = 0;
-      for (String mimeType : this.mimeTypes) {
-        mimeTypes[i++] = mimeType;
-      }
-      java.util.Arrays.sort(mimeTypes);
-      packList(sb,mimeTypes,'+');
-      
-      // Extensions
-      String[] extensions = new String[this.extensions.size()];
-      i = 0;
-      for (String extension : this.extensions) {
-        extensions[i++] = extension;
-      }
-      java.util.Arrays.sort(extensions);
-      packList(sb,extensions,'+');
-      
-      return sb.toString();
-    }
-    
-    public boolean checkLengthIndexable(long length) {
-      if (lengthCutoff == null)
-        return true;
-      return (length <= lengthCutoff.longValue());
-    }
-    
-    public boolean checkMimeType(String mimeType) {
-      if (mimeType == null)
-        mimeType = "application/unknown";
-      return mimeTypes.contains(mimeType);
-    }
-    
-    public boolean checkURLIndexable(String url) {
-      String extension = FilenameUtils.getExtension(url);
-      if (extension == null || extension.length() == 0)
-        extension = ".";
-      return extensions.contains(extension);
-    }
-    
-    public String getMapping(String source) {
-      return sourceTargets.get(source);
-    }
-    
-    public boolean keepAllMetadata() {
-      return keepAllMetadata;
-    }
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
   }
-  

+
 }

diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentChunkManager.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentChunkManager.java
new file mode 100644
index 0000000..c3d6bd4
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentChunkManager.java

@@ -0,0 +1,323 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Iterator;
+
+import java.io.InputStream;
+import java.io.IOException;
+
+import org.apache.manifoldcf.core.interfaces.ColumnDescription;
+import org.apache.manifoldcf.core.interfaces.IndexDescription;
+import org.apache.manifoldcf.core.interfaces.IDBInterface;
+import org.apache.manifoldcf.core.interfaces.IResultRow;
+import org.apache.manifoldcf.core.interfaces.IResultSet;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.BinaryInput;
+import org.apache.manifoldcf.core.interfaces.TempFileInput;
+import org.apache.manifoldcf.core.interfaces.ClauseDescription;
+import org.apache.manifoldcf.core.interfaces.UnitaryClause;
+
+public class DocumentChunkManager extends org.apache.manifoldcf.core.database.BaseTable
+{
+  // Database fields
+  private final static String UID_FIELD = "uid";                        // This is the document key, which is a dochash value
+  private final static String HOST_FIELD = "serverhost";            // The host and path are there to make sure we don't collide between connections
+  private final static String PATH_FIELD = "serverpath";
+  private final static String SDF_DATA_FIELD = "sdfdata";
+  
+  public DocumentChunkManager(
+      IDBInterface database)
+  {
+    super(database, "amazoncloudsearch_documentdata");
+  }
+
+  /** Install the manager 
+   * @throws ManifoldCFException 
+   */
+  public void install() throws ManifoldCFException
+  {
+    // Standard practice: outer loop on install methods, no transactions
+    while (true)
+    {
+      Map existing = getTableSchema(null,null);
+      if (existing == null)
+      {
+        // Install the table.
+        HashMap map = new HashMap();
+        map.put(UID_FIELD,new ColumnDescription("VARCHAR(40)",false,false,null,null,false));
+        map.put(HOST_FIELD,new ColumnDescription("VARCHAR(255)",false,false,null,null,false));
+        map.put(PATH_FIELD,new ColumnDescription("VARCHAR(255)",false,false,null,null,false));
+        map.put(SDF_DATA_FIELD,new ColumnDescription("BLOB",false,true,null,null,false));
+        performCreate(map,null);
+      }
+      else
+      {
+        // Upgrade code, if needed, goes here
+      }
+
+      // Handle indexes, if needed
+      IndexDescription keyIndex = new IndexDescription(true,new String[]{HOST_FIELD,PATH_FIELD,UID_FIELD});
+
+      Map indexes = getTableIndexes(null,null);
+      Iterator iter = indexes.keySet().iterator();
+      while (iter.hasNext())
+      {
+        String indexName = (String)iter.next();
+        IndexDescription id = (IndexDescription)indexes.get(indexName);
+
+        if (keyIndex != null && id.equals(keyIndex))
+          keyIndex = null;
+        else if (indexName.indexOf("_pkey") == -1)
+          // This index shouldn't be here; drop it
+          performRemoveIndex(indexName);
+      }
+
+      // Add the ones we didn't find
+      if (keyIndex != null)
+        performAddIndex(null,keyIndex);
+
+
+      break;
+    }
+  }
+  
+  /** Uninstall the manager.
+  */
+  public void deinstall()
+    throws ManifoldCFException
+  {
+    performDrop(null);
+  }
+  
+  /**
+   * Record document information for later trasmission to Amazon.
+   * @param uid documentuid
+   * @param sdfData document SDF data.
+   * @throws ManifoldCFException
+   */
+  public void recordDocument(String uid, String host, String path, InputStream sdfData) 
+      throws ManifoldCFException, IOException
+  {
+    TempFileInput tfi = null;
+    try
+    {
+      // This downloads all the data from upstream!
+      try
+      {
+        tfi = new TempFileInput(sdfData);
+      }
+      catch (ManifoldCFException e)
+      {
+        if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+          throw e;
+        throw new IOException("Fetch failed: "+e.getMessage());
+      }
+      
+      while (true)
+      {
+        long sleepAmt = 0L;
+        try
+        {
+          beginTransaction();
+          try
+          {
+
+            ArrayList params = new ArrayList();
+            String query = buildConjunctionClause(params,new ClauseDescription[]{
+              new UnitaryClause(HOST_FIELD,host),
+              new UnitaryClause(PATH_FIELD,path),
+              new UnitaryClause(UID_FIELD,uid)});
+
+            IResultSet set = performQuery("SELECT "+UID_FIELD+" FROM "+getTableName()+" WHERE "+
+              query+" FOR UPDATE",params,null,null);
+            
+            Map<String,Object> parameterMap = new HashMap<String,Object>();
+            parameterMap.put(SDF_DATA_FIELD, tfi);
+            
+            //if record exists on table, update record.
+            if(set.getRowCount() > 0)
+            {
+              performUpdate(parameterMap, " WHERE "+query, params, null);
+            }
+            else
+            {
+              parameterMap.put(UID_FIELD, uid);
+              parameterMap.put(HOST_FIELD, host);
+              parameterMap.put(PATH_FIELD, path);
+              performInsert(parameterMap, null);
+            }
+      
+            break;
+          }
+          catch (ManifoldCFException e)
+          {
+            signalRollback();
+            throw e;
+          }
+          catch (RuntimeException e)
+          {
+            signalRollback();
+            throw e;
+          }
+          catch (Error e)
+          {
+            signalRollback();
+            throw e;
+          }
+          finally
+          {
+            endTransaction();
+          }
+        }
+        catch (ManifoldCFException e)
+        {
+          // Look for deadlock and retry if so
+          if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+          {
+            sleepAmt = getSleepAmt();
+            continue;
+          }
+          throw e;
+        }
+      }
+
+    }
+    finally
+    {
+      if (tfi != null)
+        tfi.discard();
+    }
+  }
+  
+  /** Determine if there are N documents or more.
+  */
+  public boolean equalOrMoreThan(String host, String path, int maximumNumber)
+    throws ManifoldCFException
+  {
+    ArrayList params = new ArrayList();
+    String query = buildConjunctionClause(params,new ClauseDescription[]{
+      new UnitaryClause(HOST_FIELD,host),
+      new UnitaryClause(PATH_FIELD,path)});
+    IResultSet set = performQuery("SELECT "+constructCountClause(UID_FIELD)+" AS countval FROM "+getTableName()+" WHERE "+query+" "+constructOffsetLimitClause(0,maximumNumber),params,null,null);
+    long count;
+    if (set.getRowCount() > 0)
+    {
+      IResultRow row = set.getRow(0);
+      Long countVal = (Long)row.getValue("countval");
+      count = countVal.longValue();
+    }
+    else
+      count = 0L;
+    
+    return count >= maximumNumber;
+  }
+  
+  /** Read a chunk of documents.
+  */
+  public DocumentRecord[] readChunk(String host, String path, int maximumNumber)
+    throws ManifoldCFException
+  {
+    ArrayList params = new ArrayList();
+    String query = buildConjunctionClause(params,new ClauseDescription[]{
+      new UnitaryClause(HOST_FIELD,host),
+      new UnitaryClause(PATH_FIELD,path)});
+
+    IResultSet set = performQuery("SELECT * FROM "+getTableName()+" WHERE "+query+" "+constructOffsetLimitClause(0,maximumNumber),params,null,null);
+    DocumentRecord[] rval = new DocumentRecord[set.getRowCount()];
+    for (int i = 0; i < set.getRowCount(); i++)
+    {
+      IResultRow row = set.getRow(i);
+      rval[i] = new DocumentRecord(host,path,
+        (String)row.getValue(UID_FIELD),
+        (BinaryInput)row.getValue(SDF_DATA_FIELD));
+    }
+    return rval;
+  }
+  
+  /** Delete the chunk of documents (presumably because we processed them successfully)
+  */
+  public void deleteChunk(DocumentRecord[] records)
+    throws ManifoldCFException
+  {
+    // Do the whole thing in a transaction -- if we mess up, we'll have to try everything again
+    while (true)
+    {
+      long sleepAmt = 0L;
+      try
+      {
+        beginTransaction();
+        try
+        {
+
+          // Theoretically we could aggregate the records, but for now delete one at a time.
+          for (DocumentRecord dr : records)
+          {
+            String host = dr.getHost();
+            String path = dr.getPath();
+            String uid = dr.getUid();
+            ArrayList params = new ArrayList();
+            String query = buildConjunctionClause(params,new ClauseDescription[]{
+              new UnitaryClause(HOST_FIELD,host),
+              new UnitaryClause(PATH_FIELD,path),
+              new UnitaryClause(UID_FIELD,uid)});
+            performDelete("WHERE "+query,params,null);
+          }
+          
+          break;
+        }
+        catch (ManifoldCFException e)
+        {
+          signalRollback();
+          throw e;
+        }
+        catch (RuntimeException e)
+        {
+          signalRollback();
+          throw e;
+        }
+        catch (Error e)
+        {
+          signalRollback();
+          throw e;
+        }
+        finally
+        {
+          endTransaction();
+        }
+      }
+      catch (ManifoldCFException e)
+      {
+        // Look for deadlock and retry if so
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          sleepAmt = getSleepAmt();
+          continue;
+        }
+        throw e;
+      }
+    }
+
+  }
+  
+}

diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentRecord.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentRecord.java
new file mode 100644
index 0000000..88170b9
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/DocumentRecord.java

@@ -0,0 +1,78 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+import org.apache.manifoldcf.core.interfaces.*;
+
+import java.io.*;
+
+public class DocumentRecord {
+  
+  protected final String host;
+  protected final String path;
+  protected final String uid;
+  protected final BinaryInput data;
+  
+  public DocumentRecord(String host, String path, String uid, BinaryInput data)
+  {
+    this.host = host;
+    this.path = path;
+    this.uid = uid;
+    this.data = data;
+  }
+
+  public String getHost()
+  {
+    return host;
+  }
+  
+  public String getPath()
+  {
+    return path;
+  }
+  
+  public String getUid()
+  {
+    return uid;
+  }
+  
+  public long getStreamLength()
+    throws ManifoldCFException
+  {
+    if (data != null)
+      return data.getLength();
+    return 0L;
+  }
+  
+  public InputStream getDataStream()
+    throws ManifoldCFException
+  {
+    if (data != null)
+      return data.getStream();
+    return null;
+  }
+  
+  public void close()
+    throws ManifoldCFException
+  {
+    if (data != null)
+      data.discard();
+  }
+  
+}

diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
deleted file mode 100644
index c1ce94c..0000000
--- a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
+++ /dev/null

@@ -1,75 +0,0 @@
-/* $Id$ */
-
-/**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.manifoldcf.agents.output.amazoncloudsearch;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import com.fasterxml.jackson.annotation.JsonInclude.Include;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.SerializationFeature;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize.Inclusion;
-
-public class SDFModel {
-  
-  private List<Document> documentList = new ArrayList<Document>();
-  
-  public void addDocument(Document doc){
-    documentList.add(doc);
-  }
-
-  public String toJSON() throws JsonProcessingException{
-    ObjectMapper mapper = new ObjectMapper();
-    mapper.setSerializationInclusion(Include.NON_NULL);
-    return mapper.writeValueAsString(documentList);
-  }
-  
-  public class Document {
-    private String type;
-    private String id;
-    private Map<String,Object> fields;
-    
-    public String getType() {
-      return type;
-    }
-
-    public void setType(String type) {
-      this.type = type;
-    }
-
-    public String getId() {
-      return id;
-    }
-
-    public void setId(String id) {
-      this.id = id;
-    }
-
-    public Map getFields() {
-      return fields;
-    }
-
-    public void setFields(Map<String,Object> fields) {
-      this.fields = fields;
-    }
-  }
-}

diff --git a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties
index af5cdab..c1c1dfe 100644
--- a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties
+++ b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties

@@ -22,16 +22,3 @@
 AmazonCloudSearchOutputConnector.ServerHostCannotBeNull=Server host cannot be null
 AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash=Server path must start with a '/'
 AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger=Proxy port must be an integer
-AmazonCloudSearchOutputConnector.FieldMappingTabName=CloudSearch Field Mapping
-AmazonCloudSearchOutputConnector.ContentsTabName=CloudSearch Contents
-AmazonCloudSearchOutputConnector.FieldMappings=Field Mappings
-AmazonCloudSearchOutputConnector.MetadataFieldName=Metadata Field Name
-AmazonCloudSearchOutputConnector.CloudSearchFieldName=CloudSearch Field Name
-AmazonCloudSearchOutputConnector.DeleteFieldMapping=Delete field mapping
-AmazonCloudSearchOutputConnector.AddFieldMapping=Add field mapping
-AmazonCloudSearchOutputConnector.KeepAllMetadata=Keep all metadata:
-AmazonCloudSearchOutputConnector.Add=Add
-AmazonCloudSearchOutputConnector.NoFieldMappingSpecified=No field mapping specified
-AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon=Max file size (bytes):
-AmazonCloudSearchOutputConnector.AllowedMIMETypesColon=Allowed MIME types:
-AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon=Allowed file extensions:

diff --git a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties
index e7c2898..315c309 100644
--- a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties
+++ b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties

@@ -22,16 +22,3 @@
 AmazonCloudSearchOutputConnector.ServerHostCannotBeNull=サーバー名は必須です。
 AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash=パスは / から入力してください。
 AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger=プロキシ ポートは数値を入力してください。
-AmazonCloudSearchOutputConnector.FieldMappingTabName=CloudSearch フィールドマッピング
-AmazonCloudSearchOutputConnector.ContentsTabName=CloudSearch コンテンツ
-AmazonCloudSearchOutputConnector.FieldMappings=フィールドマッピング
-AmazonCloudSearchOutputConnector.MetadataFieldName=メタデータフィールド名
-AmazonCloudSearchOutputConnector.CloudSearchFieldName=CloudSearch フィールド名
-AmazonCloudSearchOutputConnector.DeleteFieldMapping=フィールドマッピングを削除
-AmazonCloudSearchOutputConnector.AddFieldMapping=フィールドマッピングを追加
-AmazonCloudSearchOutputConnector.KeepAllMetadata=全てのメタデータを保持する:
-AmazonCloudSearchOutputConnector.Add=追加
-AmazonCloudSearchOutputConnector.NoFieldMappingSpecified=フィールドマッピングを入力してください
-AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon=最大ファイルサイズ (バイト):
-AmazonCloudSearchOutputConnector.AllowedMIMETypesColon=利用可能なMIMEタイプ：
-AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon=利用可能なファイル拡張子：

diff --git a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_zh_CH.properties b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_zh_CH.properties
new file mode 100644
index 0000000..c1c1dfe
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_zh_CH.properties

@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AmazonCloudSearchOutputConnector.ServerTabName=Server
+AmazonCloudSearchOutputConnector.ServerHostColon=Server host:
+AmazonCloudSearchOutputConnector.ServerPathColon=Server path:
+AmazonCloudSearchOutputConnector.ProxyProtocolColon=Proxy protocol:
+AmazonCloudSearchOutputConnector.ProxyHostColon=Proxy host:
+AmazonCloudSearchOutputConnector.ProxyPortColon=Proxy port:
+AmazonCloudSearchOutputConnector.ServerHostCannotBeNull=Server host cannot be null
+AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash=Server path must start with a '/'
+AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger=Proxy port must be an integer

diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
deleted file mode 100644
index b8695e5..0000000
--- a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
+++ /dev/null

@@ -1,51 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<script type="text/javascript">
-<!--
-function checkOutputSpecification()
-{
-  return true;
-}
-
-function addFieldMapping()
-{
-  if (editjob.cloudsearch_fieldmapping_source.value == "")
-  {
-    alert("$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))");
-    editjob.solr_fieldmapping_source.focus();
-    return;
-  }
-  editjob.cloudsearch_fieldmapping_op.value="Add";
-  postFormSetAnchor("cloudsearch_fieldmapping");
-}
-
-function deleteFieldMapping(i)
-{
-  // Set the operation
-  eval("editjob.cloudsearch_fieldmapping_op_"+i+".value=\"Delete\"");
-  // Submit
-  if (editjob.cloudsearch_fieldmapping_count.value==i)
-    postFormSetAnchor("cloudsearch_fieldmapping");
-  else
-    postFormSetAnchor("cloudsearch_fieldmapping_"+i)
-  // Undo, so we won't get two deletes next time
-  eval("editjob.cloudsearch_fieldmapping_op_"+i+".value=\"Continue\"");
-}
-
-//-->
-</script>

diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html
deleted file mode 100644
index ce1962e..0000000
--- a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html
+++ /dev/null

@@ -1,50 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-#if($TABNAME == $ResourceBundle.getString('AmazonCloudSearchOutputConnector.ContentsTabName'))
-
-<table class="displaytable">
-  <tr>
-    <td class="description">
-      <nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon'))</nobr>
-    </td>
-    <td class="value"><input name="maxfilesize" type="text"
-      value="$Encoder.attributeEscape($MAXFILESIZE)" size="24" /></td>
-  </tr>
-  <tr>
-    <td class="description">
-      <nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedMIMETypesColon'))</nobr>
-    </td>
-    <td class="value">
-      <textarea rows="10" cols="64" name="mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
-    </td>
-  </tr>
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon'))</nobr></td>
-    <td class="value">
-      <textarea rows="10" cols="12" name="extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
-    </td>
-  </tr>
-</table>
-
-#else
-
-<input type="hidden" name="maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
-<input type="hidden" name="mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
-<input type="hidden" name="extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
-
-#end

diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html
deleted file mode 100644
index 1e242ba..0000000
--- a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html
+++ /dev/null

@@ -1,107 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-#if($TABNAME == $ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappingTabName'))
-
-<table class="displaytable">
-  <tr><td class="separator" colspan="2"><hr/></td></tr>
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappings'))</nobr></td>
-    <td class="boxcell">
-      <table class="formtable">
-        <tr class="formheaderrow">
-          <td class="formcolumnheader"></td>
-          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MetadataFieldName'))</nobr></td>
-          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.CloudSearchFieldName'))</nobr></td>
-        </tr>
-
-  #set($fieldcounter = 0)
-  #foreach($fieldmapping in $FIELDMAPPINGS)
-    #set($fieldcounterdisplay = $fieldcounter + 1)
-    #if(($fieldcounter % 2) == 0)
-        <tr class="evenformrow">
-    #else
-        <tr class="oddformrow">
-    #end
-          <td class="formcolumncell">
-            <a name="cloudsearch_fieldmapping_$fieldcounter">
-              <input type="button" value="Delete" alt="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.DeleteFieldMapping'))$fieldcounterdisplay" onclick='javascript:deleteFieldMapping("$fieldcounter");'/>
-              <input type="hidden" name="cloudsearch_fieldmapping_op_$fieldcounter" value="Continue"/>
-              <input type="hidden" name="cloudsearch_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
-              <input type="hidden" name="cloudsearch_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
-            </a>
-          </td>
-          <td class="formcolumncell">
-            <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
-          </td>
-          <td class="formcolumncell">
-            <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
-          </td>
-        </tr>
-    #set($fieldcounter = $fieldcounter + 1)
-  #end
-  
-  #if($fieldcounter == 0)
-        <tr class="formrow"><td class="formmessage" colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))</td></tr>
-  #end
-      
-        <tr class="formrow"><td class="formseparator" colspan="3"><hr/></td></tr>
-        <tr class="formrow">
-          <td class="formcolumncell">
-            <a name="cloudsearch_fieldmapping">
-              <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AddFieldMapping'))" onclick="javascript:addFieldMapping();"/>
-            </a>
-            <input type="hidden" name="cloudsearch_fieldmapping_count" value="$fieldcounter"/>
-            <input type="hidden" name="cloudsearch_fieldmapping_op" value="Continue"/>
-          </td>
-          <td class="formcolumncell">
-            <nobr><input type="text" size="15" name="cloudsearch_fieldmapping_source" value=""/></nobr>
-          </td>
-          <td class="formcolumncell">
-            <nobr><input type="text" size="15" name="cloudsearch_fieldmapping_target" value=""/></nobr>
-          </td>
-        </tr>
-      </table>
-    </td>
-  </tr>
-  
-  <tr><td class="separator" colspan="2"><hr/></td></tr>
-  
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.KeepAllMetadata'))</nobr></td>
-    <td class="value">
-  #if($KEEPALLMETADATA == 'true')
-       <input type="checkbox" checked="true" name="cloudsearch_keepallmetadata" value="true"/>
-  #else
-       <input type="checkbox" name="cloudsearch_keepallmetadata" value="true"/>
-  #end
-    </td>
-  </tr>
-</table>
-      
-#else
-
-  #set($fieldcounter = 0)
-  #foreach($fieldmapping in $FIELDMAPPINGS)
-<input type="hidden" name="cloudsearch_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
-<input type="hidden" name="cloudsearch_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
-    #set($fieldcounter = $fieldcounter + 1)
-  #end
-<input type="hidden" name="cloudsearch_fieldmapping_count" value="$fieldcounter"/>
-<input type="hidden" name="cloudsearch_keepallmetadata" value="$Encoder.bodyEscape($KEEPALLMETADATA)"/>
-
-#end
\ No newline at end of file

diff --git a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java
deleted file mode 100644
index ccfc6dc..0000000
--- a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java
+++ /dev/null

@@ -1,118 +0,0 @@
-/* $Id$ */
-
-/**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.manifoldcf.agents.output.amazoncloudsearch.tests;
-
-import org.junit.Test;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel;
-import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.apache.tika.parser.html.HtmlParser;
-import org.apache.tika.sax.BodyContentHandler;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-public class AmazonCloudSearchConnectorTest {
-  
-  @Test
-  public void testStub()
-  {
-  }
-  
-  public static void main(String[] args){
-    InputStream is;
-    try {
-
-      
-      
-      is = new FileInputStream(new File("000407.htm"));
-      Parser parser = new HtmlParser();
-      ContentHandler handler = new BodyContentHandler();
-      Metadata metadata = new Metadata();
-      parser.parse(is, handler, metadata, new ParseContext());
-      
-      //build json..
-      SDFModel model = new SDFModel();
-      Document doc = model.new Document();
-      doc.setType("add");
-      doc.setId("aabbcc");
-      
-      //set body text.
-      Map<String,Object> fields = new HashMap<String,Object>();
-      String bodyStr = handler.toString();
-      if(bodyStr != null){
-        bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
-        fields.put("body", bodyStr);
-      }
-      
-      //mapping metadata to SDF fields.
-      String contenttype = metadata.get("Content-Style-Type");
-      String title = metadata.get("dc.title");
-      String size = metadata.get("Content-Length");
-      String description = metadata.get("description");
-      String keywords = metadata.get("keywords");
-      if(contenttype != null && !"".equals(contenttype)) fields.put("content_type", contenttype);
-      if(title != null && !"".equals(title)) fields.put("title", title);
-      if(size != null && !"".equals(size)) fields.put("size", size);
-      if(description != null && !"".equals(description)) fields.put("description", description);
-      if(keywords != null && !"".equals(keywords))
-      {
-        List<String> keywordList = new ArrayList<String>();
-        for(String tmp : keywords.split(",")){
-          keywordList.add(tmp);
-        }
-        fields.put("keywords", keywordList);
-      }
-      doc.setFields(fields);
-      model.addDocument(doc);
-      
-      //generate json data.
-      String jsondata = model.toJSON();
-      System.out.println(jsondata);
-      
-    } catch (FileNotFoundException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } catch (IOException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } catch (SAXException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    } catch (TikaException e) {
-      // TODO Auto-generated catch block
-      e.printStackTrace();
-    }
-    
-  }
-  
-}

diff --git a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java
deleted file mode 100644
index 6d0baa9..0000000
--- a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java
+++ /dev/null

@@ -1,69 +0,0 @@
-/* $Id$ */
-
-/**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.manifoldcf.agents.output.amazoncloudsearch.tests;
-
-import static org.junit.Assert.*;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel;
-import org.junit.Test;
-import org.junit.Ignore;
-
-import com.fasterxml.jackson.core.JsonProcessingException;
-
-public class SDFModelTest {
-
-  @Test
-  @Ignore
-  // Hash ordering dependency makes this test unreliable.
-  public void testToJSON() {
-    SDFModel model = new SDFModel();
-    
-    SDFModel.Document doc = model.new Document();
-    doc.setType("add");
-    doc.setId("aaaabbbbcccc");
-    Map fields = new HashMap();
-    fields.put("title", "The Seeker: The Dark Is Rising");
-    fields.put("director", "Cunningham, David L.");
-    String[] genre = {"Adventure","Drama","Fantasy","Thriller"};
-    fields.put("genre", genre);
-    doc.setFields(fields);
-    
-    model.addDocument(doc);
-    
-    SDFModel.Document doc2 = model.new Document();
-    doc2.setType("delete");
-    doc2.setId("xxxxxffffddddee");
-    model.addDocument(doc2);
-    
-    try {
-      String jsonStr = model.toJSON();
-      System.out.println(jsonStr);
-      String expect = "[{\"type\":\"add\",\"id\":\"aaaabbbbcccc\",\"fields\":{\"genre\":[\"Adventure\",\"Drama\",\"Fantasy\",\"Thriller\"],\"title\":\"The Seeker: The Dark Is Rising\",\"director\":\"Cunningham, David L.\"}},{\"type\":\"delete\",\"id\":\"xxxxxffffddddee\"}]";
-      assertEquals(expect, jsonStr);
-      
-    } catch (JsonProcessingException e) {
-      e.printStackTrace();
-      fail();
-    }
-  }
-
-}

diff --git a/connectors/amazoncloudsearch/pom.xml b/connectors/amazoncloudsearch/pom.xml
index c015929..111ea9a 100644
--- a/connectors/amazoncloudsearch/pom.xml
+++ b/connectors/amazoncloudsearch/pom.xml

@@ -199,11 +199,6 @@
       <version>${project.version}</version>
     </dependency>
     <dependency>
-      <groupId>${project.groupId}</groupId>
-      <artifactId>mcf-pull-agent</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
       <groupId>org.apache.httpcomponents</groupId>
       <artifactId>httpclient</artifactId>
       <version>${httpcomponent.httpclient.version}</version>
@@ -223,16 +218,6 @@
 	  <artifactId>jackson-annotations</artifactId>
 	  <version>2.3.0</version>
     </dependency>
-    <dependency>
-	  <groupId>org.apache.tika</groupId>
-	  <artifactId>tika-core</artifactId>
-	  <version>1.5</version>
-    </dependency>
-    <dependency>
-	  <groupId>org.apache.tika</groupId>
-	  <artifactId>tika-parsers</artifactId>
-	  <version>1.5</version>
-    </dependency>
     
     <!-- Testing dependencies -->
     
@@ -260,6 +245,12 @@
       <groupId>${project.groupId}</groupId>
       <artifactId>mcf-pull-agent</artifactId>
       <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>

diff --git a/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java b/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
index e93d99a..931e535 100644
--- a/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java
+++ b/connectors/cmis/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/cmis/CmisRepositoryConnector.java

@@ -50,6 +50,8 @@
 import org.apache.chemistry.opencmis.commons.exceptions.CmisConnectionException;
 import org.apache.chemistry.opencmis.commons.exceptions.CmisPermissionDeniedException;
 import org.apache.chemistry.opencmis.commons.impl.Constants;
+import org.apache.chemistry.opencmis.commons.exceptions.CmisObjectNotFoundException;
+
 import org.apache.commons.io.input.NullInputStream;
 import org.apache.commons.lang.StringUtils;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
@@ -151,6 +153,16 @@
     super();
   }
 
+  /** Tell the world what model this connector uses for getDocumentIdentifiers().
+  * This must return a model value as specified above.
+  *@return the model type value.
+  */
+  @Override
+  public int getConnectorModel()
+  {
+    return MODEL_CHAINED_ADD_CHANGE;
+  }
+
   /** 
    * Return the list of activities that this connector supports (i.e. writes into the log).
    * @return the list.
@@ -1055,17 +1067,24 @@
 
     getSession();
     Logging.connectors.debug("CMIS: Inside processDocuments");
-    int i = 0;
-
-    while (i < documentIdentifiers.length) {
+        
+    for (int i = 0; i < documentIdentifiers.length; i++) {
       long startTime = System.currentTimeMillis();
       String nodeId = documentIdentifiers[i];
+      String version = versions[i];
 
       if (Logging.connectors.isDebugEnabled())
         Logging.connectors.debug("CMIS: Processing document identifier '"
             + nodeId + "'");
 
-      CmisObject cmisObject = session.getObject(nodeId);
+      CmisObject cmisObject;
+      try {
+        cmisObject = session.getObject(nodeId);
+      } catch (CmisObjectNotFoundException e) {
+        // Delete it
+        activities.deleteDocument(nodeId);
+        continue;
+      }
       
       String errorCode = "OK";
       String errorDesc = StringUtils.EMPTY;
@@ -1081,166 +1100,162 @@
           activities.addDocumentReference(child.getId(), nodeId,
               RELATIONSHIP_CHILD);
         }
-
       } else if(baseTypeId.equals(CMIS_DOCUMENT_BASE_TYPE)){
+        if (!scanOnly[i]) {
+          // content ingestion
 
-        // content ingestion
-
-        Document document = (Document) cmisObject;
-        long fileLength = document.getContentStreamLength();
-        InputStream is = null;
-        
-        try {
-          RepositoryDocument rd = new RepositoryDocument();
-          Date createdDate = document.getCreationDate().getTime();
-          Date modifiedDate = document.getLastModificationDate().getTime();
-          
-          rd.setFileName(document.getContentStreamFileName());
-          rd.setMimeType(document.getContentStreamMimeType());
-          rd.setCreatedDate(createdDate);
-          rd.setModifiedDate(modifiedDate);
-          
-          //binary
-          if(fileLength>0 && document.getContentStream()!=null){
-            is = document.getContentStream().getStream();
-            rd.setBinary(is, fileLength);
-          } else {
-            rd.setBinary(new NullInputStream(0),0);
-          }
-
-          //properties
-          List<Property<?>> properties = document.getProperties();
-          String id = StringUtils.EMPTY;
-          for (Property<?> property : properties) {
-            String propertyId = property.getId();
-            
-            if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
-              
-              if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
-                id = (String) property.getValue();
-  
-                if (property.getValue() !=null 
-                    || property.getValues() != null) {
-                  PropertyType propertyType = property.getType();
-    
-                  switch (propertyType) {
-    
-                  case STRING:
-                  case ID:
-                  case URI:
-                  case HTML:
-                    if(property.isMultiValued()){
-                      List<String> htmlPropertyValues = (List<String>) property.getValues();
-                      for (String htmlPropertyValue : htmlPropertyValues) {
-                        rd.addField(propertyId, htmlPropertyValue);
-                      }
-                    } else {
-                      String stringValue = (String) property.getValue();
-                      if(StringUtils.isNotEmpty(stringValue)){
-                        rd.addField(propertyId, stringValue);
-                      }
-                    }
-                    break;
-         
-                  case BOOLEAN:
-                    if(property.isMultiValued()){
-                      List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
-                      for (Boolean booleanPropertyValue : booleanPropertyValues) {
-                        rd.addField(propertyId, booleanPropertyValue.toString());
-                      }
-                    } else {
-                      Boolean booleanValue = (Boolean) property.getValue();
-                      if(booleanValue!=null){
-                        rd.addField(propertyId, booleanValue.toString());
-                      }
-                    }
-                    break;
-    
-                  case INTEGER:
-                    if(property.isMultiValued()){
-                      List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
-                      for (BigInteger integerPropertyValue : integerPropertyValues) {
-                        rd.addField(propertyId, integerPropertyValue.toString());
-                      }
-                    } else {
-                      BigInteger integerValue = (BigInteger) property.getValue();
-                      if(integerValue!=null){
-                        rd.addField(propertyId, integerValue.toString());
-                      }
-                    }
-                    break;
-    
-                  case DECIMAL:
-                    if(property.isMultiValued()){
-                      List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
-                      for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
-                        rd.addField(propertyId, decimalPropertyValue.toString());
-                      }
-                    } else {
-                      BigDecimal decimalValue = (BigDecimal) property.getValue();
-                      if(decimalValue!=null){
-                        rd.addField(propertyId, decimalValue.toString());
-                      }
-                    }
-                    break;
-    
-                  case DATETIME:
-                    if(property.isMultiValued()){
-                      List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
-                      for (GregorianCalendar datePropertyValue : datePropertyValues) {
-                        rd.addField(propertyId,
-                            ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
-                      }
-                    } else {
-                      GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
-                      if(dateValue!=null){
-                        rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
-                      }
-                    }
-                    break;
-    
-                  default:
-                    break;
-                  }
-                }
-                
-              }
-            
-          }
-          
-          //ingestion
-          
-          //version label
-          String version = document.getVersionLabel();
-          if(StringUtils.isEmpty(version))
-            version = StringUtils.EMPTY;
-          
-          //documentURI
-          String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+          Document document = (Document) cmisObject;
+          long fileLength = document.getContentStreamLength();
+          InputStream is = null;
           
           try {
-            activities.ingestDocumentWithException(id, version, documentURI, rd);
-          } catch (IOException e) {
-            errorCode = "IO ERROR";
-            errorDesc = e.getMessage();
-            handleIOException(e, "reading file input stream");
-          }
-        } finally {
-          try {
-            if(is!=null){
-              is.close();
+            RepositoryDocument rd = new RepositoryDocument();
+            Date createdDate = document.getCreationDate().getTime();
+            Date modifiedDate = document.getLastModificationDate().getTime();
+            
+            rd.setFileName(document.getContentStreamFileName());
+            rd.setMimeType(document.getContentStreamMimeType());
+            rd.setCreatedDate(createdDate);
+            rd.setModifiedDate(modifiedDate);
+            
+            //binary
+            if(fileLength>0 && document.getContentStream()!=null){
+              is = document.getContentStream().getStream();
+              rd.setBinary(is, fileLength);
+            } else {
+              rd.setBinary(new NullInputStream(0),0);
             }
-          } catch (IOException e) {
-            errorCode = "IO ERROR";
-            errorDesc = e.getMessage();
-            handleIOException(e, "closing file input stream");
+
+            //properties
+            List<Property<?>> properties = document.getProperties();
+            String id = StringUtils.EMPTY;
+            for (Property<?> property : properties) {
+              String propertyId = property.getId();
+              
+              if(CmisRepositoryConnectorUtils.existsInSelectClause(cmisQuery, propertyId)){
+                
+                if (propertyId.endsWith(Constants.PARAM_OBJECT_ID))
+                  id = (String) property.getValue();
+    
+                  if (property.getValue() !=null 
+                      || property.getValues() != null) {
+                    PropertyType propertyType = property.getType();
+      
+                    switch (propertyType) {
+      
+                    case STRING:
+                    case ID:
+                    case URI:
+                    case HTML:
+                      if(property.isMultiValued()){
+                        List<String> htmlPropertyValues = (List<String>) property.getValues();
+                        for (String htmlPropertyValue : htmlPropertyValues) {
+                          rd.addField(propertyId, htmlPropertyValue);
+                        }
+                      } else {
+                        String stringValue = (String) property.getValue();
+                        if(StringUtils.isNotEmpty(stringValue)){
+                          rd.addField(propertyId, stringValue);
+                        }
+                      }
+                      break;
+           
+                    case BOOLEAN:
+                      if(property.isMultiValued()){
+                        List<Boolean> booleanPropertyValues = (List<Boolean>) property.getValues();
+                        for (Boolean booleanPropertyValue : booleanPropertyValues) {
+                          rd.addField(propertyId, booleanPropertyValue.toString());
+                        }
+                      } else {
+                        Boolean booleanValue = (Boolean) property.getValue();
+                        if(booleanValue!=null){
+                          rd.addField(propertyId, booleanValue.toString());
+                        }
+                      }
+                      break;
+      
+                    case INTEGER:
+                      if(property.isMultiValued()){
+                        List<BigInteger> integerPropertyValues = (List<BigInteger>) property.getValues();
+                        for (BigInteger integerPropertyValue : integerPropertyValues) {
+                          rd.addField(propertyId, integerPropertyValue.toString());
+                        }
+                      } else {
+                        BigInteger integerValue = (BigInteger) property.getValue();
+                        if(integerValue!=null){
+                          rd.addField(propertyId, integerValue.toString());
+                        }
+                      }
+                      break;
+      
+                    case DECIMAL:
+                      if(property.isMultiValued()){
+                        List<BigDecimal> decimalPropertyValues = (List<BigDecimal>) property.getValues();
+                        for (BigDecimal decimalPropertyValue : decimalPropertyValues) {
+                          rd.addField(propertyId, decimalPropertyValue.toString());
+                        }
+                      } else {
+                        BigDecimal decimalValue = (BigDecimal) property.getValue();
+                        if(decimalValue!=null){
+                          rd.addField(propertyId, decimalValue.toString());
+                        }
+                      }
+                      break;
+      
+                    case DATETIME:
+                      if(property.isMultiValued()){
+                        List<GregorianCalendar> datePropertyValues = (List<GregorianCalendar>) property.getValues();
+                        for (GregorianCalendar datePropertyValue : datePropertyValues) {
+                          rd.addField(propertyId,
+                              ISO8601_DATE_FORMATTER.format(datePropertyValue.getTime()));
+                        }
+                      } else {
+                        GregorianCalendar dateValue = (GregorianCalendar) property.getValue();
+                        if(dateValue!=null){
+                          rd.addField(propertyId, ISO8601_DATE_FORMATTER.format(dateValue.getTime()));
+                        }
+                      }
+                      break;
+      
+                    default:
+                      break;
+                    }
+                  }
+                  
+                }
+              
+            }
+            
+            //ingestion
+            
+            //documentURI
+            String documentURI = CmisRepositoryConnectorUtils.getDocumentURL(document, session);
+            
+            try {
+              activities.ingestDocumentWithException(nodeId, version, documentURI, rd);
+            } catch (IOException e) {
+              errorCode = "IO ERROR";
+              errorDesc = e.getMessage();
+              handleIOException(e, "reading file input stream");
+            }
           } finally {
-            activities.recordActivity(new Long(startTime), ACTIVITY_READ,
-              fileLength, nodeId, errorCode, errorDesc, null);
+            try {
+              if(is!=null){
+                is.close();
+              }
+            } catch (IOException e) {
+              errorCode = "IO ERROR";
+              errorDesc = e.getMessage();
+              handleIOException(e, "closing file input stream");
+            } finally {
+              activities.recordActivity(new Long(startTime), ACTIVITY_READ,
+                fileLength, nodeId, errorCode, errorDesc, null);
+            }
           }
         }
       }
-      i++;
+      else
+        activities.deleteDocument(nodeId);
     }
   }
   
@@ -1276,9 +1291,15 @@
     getSession();
     
     String[] rval = new String[documentIdentifiers.length];
-    int i = 0;
-    while (i < rval.length){
-      CmisObject cmisObject = session.getObject(documentIdentifiers[i]);
+    for (int i = 0; i < rval.length; i++) {
+      CmisObject cmisObject;
+      try {
+        cmisObject = session.getObject(documentIdentifiers[i]);
+      } catch (CmisObjectNotFoundException e) {
+        rval[i] = null;
+        continue;
+      }
+
       if (cmisObject.getBaseType().getId().equals(CMIS_DOCUMENT_BASE_TYPE)) {
         Document document = (Document) cmisObject;
         
@@ -1294,7 +1315,6 @@
         //a CMIS folder will always be processed
         rval[i] = StringUtils.EMPTY;
       }
-      i++;
     }
     return rval;
   }

diff --git a/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityIT.java b/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityDerbyIT.java
similarity index 99%
rename from connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityIT.java
rename to connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityDerbyIT.java
index 7558dbc..fe1f075 100644
--- a/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityIT.java
+++ b/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/APISanityDerbyIT.java

@@ -51,7 +51,7 @@
 /**
  * @author Piergiorgio Lucidi
  */
-public class APISanityIT extends BaseITDerby
+public class APISanityDerbyIT extends BaseITDerby
 {
   private static final String REPLACER = "?";
   private static final String CMIS_TEST_QUERY_CHANGE_DOC = "SELECT * FROM cmis:document WHERE cmis:name='"+REPLACER+"'";

diff --git a/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/BaseITDerby.java b/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/BaseITDerby.java
index 2484a13..5cb4de3 100644
--- a/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/BaseITDerby.java
+++ b/connectors/cmis/connector/src/test/java/org/apache/manifoldcf/crawler/connectors/cmis/tests/BaseITDerby.java

@@ -58,7 +58,7 @@
   public void setUpCMIS()
     throws Exception
   {
-    String openCmisServerWarPath = "../../lib/chemistry-opencmis-server-inmemory.war";
+    String openCmisServerWarPath = "../../../lib/chemistry-opencmis-server-inmemory.war";
 
     if (System.getProperty("openCmisServerWarPath") != null)
       openCmisServerWarPath = System.getProperty("openCmisServerWarPath");

diff --git a/connectors/documentfilter/build.xml b/connectors/documentfilter/build.xml
new file mode 100644
index 0000000..f788dfe
--- /dev/null
+++ b/connectors/documentfilter/build.xml

@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="documentfilter" default="all">
+
+    <property environment="env"/>
+    <condition property="mcf-dist" value="${env.MCFDISTPATH}">
+        <isset property="env.MCFDISTPATH"/>
+    </condition>
+    <property name="abs-dist" location="../../dist"/>
+    <condition property="mcf-dist" value="${abs-dist}">
+        <not>
+            <isset property="env.MCFDISTPATH"/>
+        </not>
+    </condition>
+
+    <import file="${mcf-dist}/connector-build.xml"/>
+
+    <target name="deliver-connector" depends="mcf-connector-build.deliver-connector">
+        <antcall target="general-add-transformation-connector">
+            <param name="connector-label" value="Allowed documents"/>
+            <param name="connector-class" value="org.apache.manifoldcf.agents.transformation.documentfilter.DocumentFilter"/>
+        </antcall>
+    </target>
+
+</project>

diff --git a/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java
new file mode 100644
index 0000000..3a87d9b
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilter.java

@@ -0,0 +1,443 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation.documentfilter;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+
+import org.apache.manifoldcf.agents.system.ManifoldCF;
+import org.apache.manifoldcf.agents.system.Logging;
+
+import org.apache.commons.io.FilenameUtils;
+
+import java.io.*;
+import java.util.*;
+
+public class DocumentFilter extends org.apache.manifoldcf.agents.transformation.BaseTransformationConnector {
+
+  /** Forward to the javascript to check the specification parameters for the job */
+  private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+  
+  private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
+  
+  private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+  
+  /** Constructor.
+   */
+  public DocumentFilter(){
+  }
+  
+  /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
+  * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
+  * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
+  * is used to describe the version of the actual document.
+  *
+  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+  * necessary.
+  *@param os is the current output specification for the job that is doing the crawling.
+  *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
+  * the document will not need to be sent again to the output data store.
+  */
+  @Override
+  public VersionContext getPipelineDescription(Specification os)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    SpecPacker sp = new SpecPacker(os);
+    return new VersionContext(sp.toPackedString(),params,os);
+  }
+
+  /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
+  * unusable documents that will be passed to this output connector.
+  *@param outputDescription is the document's output version.
+  *@param mimeType is the mime type of the document.
+  *@return true if the mime type is indexable by this connector.
+  */
+  @Override
+  public boolean checkMimeTypeIndexable(VersionContext outputDescription, String mimeType, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
+    if (sp.checkMimeType(mimeType))
+      return super.checkMimeTypeIndexable(outputDescription, mimeType, activities);
+    else
+      return false;
+  }
+
+  @Override
+  public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
+    if (sp.checkLengthIndexable(length))
+      return super.checkLengthIndexable(outputDescription, length, activities);
+    else
+      return false;
+  }
+
+  @Override
+  public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
+    throws ManifoldCFException, ServiceInterruption {
+    SpecPacker sp = new SpecPacker(outputDescription.getVersionString());
+    if (sp.checkURLIndexable(url))
+      return super.checkURLIndexable(outputDescription, url, activities);
+    else
+      return false;
+  }
+  
+  /** Add (or replace) a document in the output data store using the connector.
+  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+  * necessary.
+  * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
+  * output description, since that was what was partly used to determine if output should be taking place.  So it may be necessary for this method to decode
+  * an output description string in order to determine what should be done.
+  *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
+  * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+  *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
+  *@param document is the document data to be processed (handed to the output data store).
+  *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document.  May be null.
+  *@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
+  *@return the document status (accepted or permanently rejected).
+  */
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
+  {
+    return activities.sendDocument(documentURI, document);
+  }
+  
+  protected static void fillInContentsSpecificationMap(Map<String,Object> paramMap, Specification os)
+  {
+    String maxFileSize = DocumentFilterConfig.MAXLENGTH_DEFAULT;
+    String allowedMimeTypes = DocumentFilterConfig.MIMETYPES_DEFAULT;
+    String allowedFileExtensions = DocumentFilterConfig.EXTENSIONS_DEFAULT;
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(DocumentFilterConfig.NODE_MAXLENGTH))
+        maxFileSize = sn.getAttributeValue(DocumentFilterConfig.ATTRIBUTE_VALUE);
+      else if (sn.getType().equals(DocumentFilterConfig.NODE_MIMETYPES))
+        allowedMimeTypes = sn.getValue();
+      else if (sn.getType().equals(DocumentFilterConfig.NODE_EXTENSIONS))
+        allowedFileExtensions = sn.getValue();
+    }
+    paramMap.put("MAXFILESIZE",maxFileSize);
+    paramMap.put("MIMETYPES",allowedMimeTypes);
+    paramMap.put("EXTENSIONS",allowedFileExtensions);
+  }
+  
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+
+    tabsArray.add(Messages.getString(locale, "DocumentFilter.ContentsTabName"));
+
+    // Fill in the specification header map, using data from all tabs.
+    fillInContentsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
+  }
+  
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
+  @Override
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Set the tab name
+    paramMap.put("TABNAME", tabName);
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SELECTEDNUM",Integer.toString(actualSequenceNumber));
+
+    // Fill in the field mapping tab data
+    fillInContentsSpecificationMap(paramMap, os);
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_CONTENTS_HTML,paramMap);
+  }
+
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
+  @Override
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+
+    String x;
+        
+    x = variableContext.getParameter(seqPrefix+"maxfilesize");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(DocumentFilterConfig.NODE_MAXLENGTH))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(DocumentFilterConfig.NODE_MAXLENGTH);
+      sn.setAttribute(DocumentFilterConfig.ATTRIBUTE_VALUE,x);
+      os.addChild(os.getChildCount(),sn);
+    }
+
+    x = variableContext.getParameter(seqPrefix+"mimetypes");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(DocumentFilterConfig.NODE_MIMETYPES))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(DocumentFilterConfig.NODE_MIMETYPES);
+      sn.setValue(x);
+      os.addChild(os.getChildCount(),sn);
+    }
+
+    x = variableContext.getParameter(seqPrefix+"extensions");
+    if (x != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(DocumentFilterConfig.NODE_EXTENSIONS))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      SpecificationNode sn = new SpecificationNode(DocumentFilterConfig.NODE_EXTENSIONS);
+      sn.setValue(x);
+      os.addChild(os.getChildCount(),sn);
+    }
+    
+    return null;
+  }
+  
+
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+
+    // Fill in the map with data from all tabs
+    fillInContentsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
+    
+  }
+  
+  protected static void fillSet(Set<String> set, String input) {
+    try
+    {
+      StringReader sr = new StringReader(input);
+      BufferedReader br = new BufferedReader(sr);
+      String line = null;
+      while ((line = br.readLine()) != null)
+      {
+        line = line.trim();
+        if (line.length() > 0)
+          set.add(line.toLowerCase(Locale.ROOT));
+      }
+    }
+    catch (IOException e)
+    {
+      // Should never happen
+      throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
+    }
+  }
+  
+  protected static class SpecPacker {
+    
+    private final Set<String> extensions = new HashSet<String>();
+    private final Set<String> mimeTypes = new HashSet<String>();
+    private final Long lengthCutoff;
+    
+    public SpecPacker(Specification os) {
+      Long lengthCutoff = null;
+      String extensions = null;
+      String mimeTypes = null;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if (sn.getType().equals(DocumentFilterConfig.NODE_MIMETYPES)) {
+          mimeTypes = sn.getValue();
+        } else if (sn.getType().equals(DocumentFilterConfig.NODE_EXTENSIONS)) {
+          extensions = sn.getValue();
+        } else if (sn.getType().equals(DocumentFilterConfig.NODE_MAXLENGTH)) {
+          String value = sn.getAttributeValue(DocumentFilterConfig.ATTRIBUTE_VALUE);
+          lengthCutoff = new Long(value);
+        }
+      }
+      this.lengthCutoff = lengthCutoff;
+      fillSet(this.extensions, extensions);
+      fillSet(this.mimeTypes, mimeTypes);
+    }
+    
+    public SpecPacker(String packedString) {
+      
+      int index = 0;
+      
+      // Max length
+      final StringBuilder sb = new StringBuilder();
+      if (packedString.length() > index) {
+        if (packedString.charAt(index++) == '+') {
+          index = unpack(sb,packedString,index,'+');
+          this.lengthCutoff = new Long(sb.toString());
+        } else
+          this.lengthCutoff = null;
+      } else
+        this.lengthCutoff = null;
+      
+      // Mime types
+      final List<String> mimeBuffer = new ArrayList<String>();
+      index = unpackList(mimeBuffer,packedString,index,'+');
+      for (String mimeType : mimeBuffer) {
+        this.mimeTypes.add(mimeType);
+      }
+      
+      // Extensions
+      final List<String> extensionsBuffer = new ArrayList<String>();
+      index = unpackList(extensionsBuffer,packedString,index,'+');
+      for (String extension : extensionsBuffer) {
+        this.extensions.add(extension);
+      }
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      int i;
+      
+      // Max length
+      if (lengthCutoff == null)
+        sb.append('-');
+      else {
+        sb.append('+');
+        pack(sb,lengthCutoff.toString(),'+');
+      }
+      
+      // Mime types
+      String[] mimeTypes = new String[this.mimeTypes.size()];
+      i = 0;
+      for (String mimeType : this.mimeTypes) {
+        mimeTypes[i++] = mimeType;
+      }
+      java.util.Arrays.sort(mimeTypes);
+      packList(sb,mimeTypes,'+');
+      
+      // Extensions
+      String[] extensions = new String[this.extensions.size()];
+      i = 0;
+      for (String extension : this.extensions) {
+        extensions[i++] = extension;
+      }
+      java.util.Arrays.sort(extensions);
+      packList(sb,extensions,'+');
+      
+      return sb.toString();
+    }
+    
+    public boolean checkLengthIndexable(long length) {
+      if (lengthCutoff == null)
+        return true;
+      return (length <= lengthCutoff.longValue());
+    }
+    
+    public boolean checkMimeType(String mimeType) {
+      if (mimeType == null)
+        mimeType = "application/unknown";
+      return mimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
+    }
+    
+    public boolean checkURLIndexable(String url) {
+      String extension = FilenameUtils.getExtension(url);
+      if (extension == null || extension.length() == 0)
+        extension = ".";
+      return extensions.contains(extension.toLowerCase(Locale.ROOT));
+    }
+    
+  }
+  
+}

diff --git a/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java
new file mode 100644
index 0000000..eea645f
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/DocumentFilterConfig.java

@@ -0,0 +1,53 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.agents.transformation.documentfilter;
+
+/** Parameters for DocumentFilter transformation connector.
+ */
+public class DocumentFilterConfig {
+
+  // Configuration parameters
+  
+  // Specification nodes and values
+  public static final String NODE_MAXLENGTH = "maxlength";
+  public static final String MAXLENGTH_DEFAULT = "16777216";
+  public static final String NODE_MIMETYPES = "mimetypes";
+  public static final String MIMETYPES_DEFAULT =
+                        "application/msword\n"
+		        + "application/vnd.ms-excel\n"
+		        + "application/vnd.openxmlformats-officedocument.wordprocessingml.document\n"
+		        + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet\n"
+		        + "text/html\n"
+		        + "application/pdf\n"
+		        + "application/vnd.ms-powerpoint\n"
+		        + "application/vnd.openxmlformats-officedocument.presentationml.presentation\n"
+		        + "application/vnd.oasis.opendocument.text\n"
+		        + "application/vnd.oasis.opendocument.spreadsheet\n"
+		        + "application/vnd.oasis.opendocument.formula\n"
+		        + "application/rtf\n" + "text/plain\n" + "audio/mpeg\n"
+		        + "audio/x-wav\n" + "audio/ogg\n" + "audio/flac\n"
+		        + "application/x-bittorrent";
+  public static final String NODE_EXTENSIONS = "extensions";
+  public static final String EXTENSIONS_DEFAULT =
+                    "doc\n" + "docx\n" + "xls\n" + "xlsx\n" + "ppt\n" + "pptx\n"
+		    + "html\n" + "pdf\n" + "odt\n" + "ods\n" + "rtf\n" + "txt\n" + "mp3\n"
+		    + "mp4\n" + "wav\n" + "ogg\n" + "flac\n" + "torrent";
+  public static final String ATTRIBUTE_VALUE = "value";
+}

diff --git a/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/Messages.java b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/Messages.java
new file mode 100644
index 0000000..635daa5
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/java/org/apache/manifoldcf/agents/transformation/documentfilter/Messages.java

@@ -0,0 +1,141 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation.documentfilter;
+
+import java.util.Locale;
+import java.util.Map;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+  public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.documentfilter.common";
+  public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.documentfilter";
+  
+  /** Constructor - do no instantiate
+  */
+  protected Messages()
+  {
+  }
+  
+  public static String getString(Locale locale, String messageKey)
+  {
+    return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getAttributeString(Locale locale, String messageKey)
+  {
+    return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getBodyString(Locale locale, String messageKey)
+  {
+    return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getAttributeJavascriptString(Locale locale, String messageKey)
+  {
+    return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getBodyJavascriptString(Locale locale, String messageKey)
+  {
+    return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getString(Locale locale, String messageKey, Object[] args)
+  {
+    return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+  
+  public static String getBodyString(Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  // More general methods which allow bundlenames and class loaders to be specified.
+  
+  public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+  }
+  
+  public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  // Resource output
+  
+  public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,String> substitutionParameters, boolean mapToUpperCase)
+    throws ManifoldCFException
+  {
+    outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+      substitutionParameters,mapToUpperCase);
+  }
+  
+  public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,String> substitutionParameters, boolean mapToUpperCase)
+    throws ManifoldCFException
+  {
+    outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+      substitutionParameters,mapToUpperCase);
+  }
+
+  public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,Object> contextObjects)
+    throws ManifoldCFException
+  {
+    outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+      contextObjects);
+  }
+  
+}
+

diff --git a/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties
new file mode 100644
index 0000000..12151ee
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_en_US.properties

@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DocumentFilter.ContentsTabName=Allowed contents
+DocumentFilter.MaxFileSizeBytesColon=Max file size (bytes):
+DocumentFilter.AllowedMIMETypesColon=Allowed MIME types:
+DocumentFilter.AllowedFileExtensionsColon=Allowed file extensions:

diff --git a/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties
new file mode 100644
index 0000000..3cb092d
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_ja_JP.properties

@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DocumentFilter.ContentsTabName=コンテンツ
+DocumentFilter.MaxFileSizeBytesColon=最大ファイルサイズ (バイト):
+DocumentFilter.AllowedMIMETypesColon=利用可能なMIMEタイプ：
+DocumentFilter.AllowedFileExtensionsColon=利用可能なファイル拡張子：

diff --git a/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CH.properties b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CH.properties
new file mode 100644
index 0000000..12151ee
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/documentfilter/common_zh_CH.properties

@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DocumentFilter.ContentsTabName=Allowed contents
+DocumentFilter.MaxFileSizeBytesColon=Max file size (bytes):
+DocumentFilter.AllowedMIMETypesColon=Allowed MIME types:
+DocumentFilter.AllowedFileExtensionsColon=Allowed file extensions:

diff --git a/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification.js b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification.js
new file mode 100644
index 0000000..1d6aa8b
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification.js

@@ -0,0 +1,26 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function s${SEQNUM}_checkSpecification()
+{
+  return true;
+}
+
+//-->
+</script>

diff --git a/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html
new file mode 100644
index 0000000..8a32ae4
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/editSpecification_Contents.html

@@ -0,0 +1,50 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('DocumentFilter.ContentsTabName') && ${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+  <tr>
+    <td class="description">
+      <nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MaxFileSizeBytesColon'))</nobr>
+    </td>
+    <td class="value"><input name="s${SEQNUM}_maxfilesize" type="text"
+      value="$Encoder.attributeEscape($MAXFILESIZE)" size="24" /></td>
+  </tr>
+  <tr>
+    <td class="description">
+      <nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.AllowedMIMETypesColon'))</nobr>
+    </td>
+    <td class="value">
+      <textarea rows="10" cols="64" name="s${SEQNUM}_mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
+    </td>
+  </tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.AllowedFileExtensionsColon'))</nobr></td>
+    <td class="value">
+      <textarea rows="10" cols="12" name="s${SEQNUM}_extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
+    </td>
+  </tr>
+</table>
+
+#else
+
+<input type="hidden" name="s${SEQNUM}_maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
+<input type="hidden" name="s${SEQNUM}_mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
+<input type="hidden" name="s${SEQNUM}_extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
+
+#end

diff --git a/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html
new file mode 100644
index 0000000..82c2dd3
--- /dev/null
+++ b/connectors/documentfilter/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/documentfilter/viewSpecification.html

@@ -0,0 +1,32 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.MaxFileSizeBytesColon'))</nobr></td>
+    <td class="value">$Encoder.bodyEscape($MAXFILESIZE)</td>
+  </tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.AllowedMIMETypesColon'))</nobr></td>
+    <td class="value">$Encoder.bodyEscape($MIMETYPES)</td>
+  </tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('DocumentFilter.AllowedFileExtensionsColon'))</nobr></td>
+    <td class="value">$Encoder.bodyEscape($EXTENSIONS)</td>
+  </tr>
+  
+</table>

diff --git a/connectors/documentfilter/pom.xml b/connectors/documentfilter/pom.xml
new file mode 100644
index 0000000..31d1470
--- /dev/null
+++ b/connectors/documentfilter/pom.xml

@@ -0,0 +1,361 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.manifoldcf</groupId>
+    <artifactId>mcf-connectors</artifactId>
+    <version>1.7-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+  </properties>
+
+  <artifactId>mcf-documentfilter-connector</artifactId>
+  <name>ManifoldCF - Connectors - Document Filter</name>
+
+  <build>
+    <defaultGoal>integration-test</defaultGoal>
+    <sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+    <testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${basedir}/connector/src/main/native2ascii</directory>
+        <includes>
+          <include>**/*.properties</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>${basedir}/connector/src/main/resources</directory>
+        <includes>
+          <include>**/*.html</include>
+          <include>**/*.js</include>
+        </includes>
+      </resource>
+    </resources> 
+    <testResources>
+      <testResource>
+        <directory>${basedir}/connector/src/test/resources</directory>
+      </testResource>
+    </testResources>
+
+    <plugins>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>native2ascii-maven-plugin</artifactId>
+        <version>1.0-beta-1</version>
+        <configuration>
+            <workDir>target/classes</workDir>
+        </configuration>
+        <executions>
+            <execution>
+                <id>native2ascii-utf8</id>
+                <goals>
+                    <goal>native2ascii</goal>
+                </goals>
+                <configuration>
+                    <encoding>UTF8</encoding>
+                    <includes>
+                      <include>**/*.properties</include>
+                    </includes>
+                </configuration>
+            </execution>
+        </executions>
+      </plugin>
+
+      <!-- Test plugin configuration -->
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+           <execution>
+            <id>copy-war</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>copy</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/dependency</outputDirectory>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-api-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-api-service.war</destFileName>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-authority-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-authority-service.war</destFileName>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-crawler-ui</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-crawler-ui.war</destFileName>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>**/*Postgresql*.java</exclude>
+            <exclude>**/*MySQL*.java</exclude>
+          </excludes>
+          <forkMode>always</forkMode>
+          <workingDirectory>target/test-output</workingDirectory>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>2.12.3</version>
+        <configuration>
+          <skipTests>${skipITs}</skipTests>
+          <systemPropertyVariables>
+            <crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+            <authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+            <apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+          </systemPropertyVariables>
+          <excludes>
+            <exclude>**/*Postgresql*.java</exclude>
+            <exclude>**/*MySQL*.java</exclude>
+          </excludes>
+          <forkMode>always</forkMode>
+          <workingDirectory>target/test-output</workingDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <id>integration-test</id>
+            <goals>
+              <goal>integration-test</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>verify</id>
+            <goals>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-ui-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    
+    <!-- Testing dependencies -->
+    
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+      <type>jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>postgresql</groupId>
+      <artifactId>postgresql</artifactId>
+      <version>${postgresql.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>${hsqldb.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.derby</groupId>
+      <artifactId>derby</artifactId>
+      <version>${derby.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>mysql</groupId>
+      <artifactId>mysql-connector-java</artifactId>
+      <version>${mysql.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-api-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-authority-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-crawler-ui</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-webapp</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-io</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-security</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-continuation</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-xml</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-api-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+      <scope>test</scope>
+    </dependency>    
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+      <scope>test</scope>
+    </dependency>
+    
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+</project>

diff --git a/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java b/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
index dcf6aa8..b206472 100644
--- a/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java
+++ b/connectors/documentum/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/DCTM/DCTM.java

@@ -1866,7 +1866,7 @@
                 }
                 
                 if (rd == null)
-                  activities.deleteDocument(documentIdentifier,versionString);
+                  activities.noDocument(documentIdentifier,versionString);
                 
                 // Abort the retry loop and go on to the next document
                 break;

diff --git a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
index d9178d1..4562eff 100644
--- a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java
+++ b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchAction.java

@@ -22,6 +22,8 @@
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.HttpClient;
 
+import java.io.IOException;
+
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.crawler.system.Logging;
@@ -52,4 +54,21 @@
     setResult(Result.ERROR, error);
     Logging.connectors.warn("ES: Commit failed: "+getResponse());
   }
+  
+  @Override
+  protected void handleIOException(IOException e)
+    throws ManifoldCFException, ServiceInterruption {
+    // We want a quicker failure here!!
+    if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+      throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED);
+    setResult(Result.ERROR, e.getMessage());
+    long currentTime = System.currentTimeMillis();
+    // One notification attempt, then we're done.
+    throw new ServiceInterruption("IO exception: "+e.getMessage(),e,
+        currentTime + 60000L,
+        currentTime + 1L * 60L * 60000L,
+        1,
+        false);
+  }
+
 }

diff --git a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
index c2749a3..bee7adb 100644
--- a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java
+++ b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnection.java

@@ -149,6 +149,26 @@
         exception = e;
       }
     }
+
+    public void finishUp()
+      throws HttpException, IOException, InterruptedException
+    {
+      join();
+      Throwable t = exception;
+      if (t != null)
+      {
+        if (t instanceof HttpException)
+          throw (HttpException)t;
+        else if (t instanceof IOException)
+          throw (IOException)t;
+        else if (t instanceof RuntimeException)
+          throw (RuntimeException)t;
+        else if (t instanceof Error)
+          throw (Error)t;
+        else
+          throw new RuntimeException("Unexpected exception thrown: "+t.getMessage(),t);
+      }
+    }
     
     public int getResultCode()
     {
@@ -178,22 +198,7 @@
       ct.start();
       try
       {
-        ct.join();
-        Throwable t = ct.getException();
-        if (t != null)
-        {
-          if (t instanceof HttpException)
-            throw (HttpException)t;
-          else if (t instanceof IOException)
-            throw (IOException)t;
-          else if (t instanceof RuntimeException)
-            throw (RuntimeException)t;
-          else if (t instanceof Error)
-            throw (Error)t;
-          else
-            throw new RuntimeException("Unexpected exception thrown: "+t.getMessage(),t);
-        }
-        
+        ct.finishUp();
         response = ct.getResponse();
         return handleResultCode(ct.getResultCode(), response);
       }
@@ -248,14 +253,16 @@
     throw new ManifoldCFException("Unexpected HTTP result code: "+code+": "+response);
   }
 
-  private void handleHttpException(HttpException e)
+  protected void handleHttpException(HttpException e)
     throws ManifoldCFException, ServiceInterruption {
     setResult(Result.ERROR, e.getMessage());
     throw new ManifoldCFException(e);
   }
   
-  private void handleIOException(IOException e)
+  protected void handleIOException(IOException e)
     throws ManifoldCFException, ServiceInterruption {
+    if (e instanceof java.io.InterruptedIOException && !(e instanceof java.net.SocketTimeoutException))
+      throw new ManifoldCFException(e.getMessage(),ManifoldCFException.INTERRUPTED);
     setResult(Result.ERROR, e.getMessage());
     long currentTime = System.currentTimeMillis();
     // All IO exceptions are treated as service interruptions, retried for an hour

diff --git a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
index 37e1d46..9a930f4 100644
--- a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java
+++ b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchConnector.java

@@ -26,6 +26,7 @@
 import java.util.Locale;
 import java.util.Map;
 import java.util.Iterator;
+import java.util.HashMap;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.http.conn.HttpClientConnectionManager;
@@ -45,12 +46,13 @@
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
+import org.apache.manifoldcf.agents.interfaces.IOutputCheckActivity;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
 import org.apache.manifoldcf.agents.output.elasticsearch.ElasticSearchAction.CommandEnum;
 import org.apache.manifoldcf.agents.output.elasticsearch.ElasticSearchConnection.Result;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -58,6 +60,7 @@
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
 import org.json.JSONException;
 import org.json.JSONObject;
 
@@ -214,7 +217,8 @@
    * @param out
    * @throws ManifoldCFException */
   private static void outputResource(String resName, IHTTPOutput out,
-      Locale locale, ElasticSearchParam params, String tabName) throws ManifoldCFException
+      Locale locale, ElasticSearchParam params,
+      String tabName, Integer sequenceNumber, Integer currentSequenceNumber) throws ManifoldCFException
   {
     Map<String,String> paramMap = null;
     if (params != null) {
@@ -222,7 +226,16 @@
       if (tabName != null) {
         paramMap.put("TabName", tabName);
       }
+      if (currentSequenceNumber != null)
+        paramMap.put("SelectedNum",currentSequenceNumber.toString());
     }
+    else
+    {
+      paramMap = new HashMap<String,String>();
+    }
+    if (sequenceNumber != null)
+      paramMap.put("SeqNum",sequenceNumber.toString());
+
     Messages.outputResourceWithVelocity(out, locale, resName, paramMap, true);
   }
 
@@ -234,7 +247,7 @@
     super.outputConfigurationHeader(threadContext, out, locale, parameters,
         tabsArray);
     tabsArray.add(Messages.getString(locale, ELASTICSEARCH_TAB_PARAMETERS));
-    outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null);
+    outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null, null, null);
   }
 
   @Override
@@ -245,20 +258,50 @@
     super.outputConfigurationBody(threadContext, out, locale, parameters,
         tabName);
     ElasticSearchConfig config = this.getConfigParameters(parameters);
-    outputResource(EDIT_CONFIG_FORWARD_PARAMETERS, out, locale, config, tabName);
+    outputResource(EDIT_CONFIG_FORWARD_PARAMETERS, out, locale, config, tabName, null, null);
   }
 
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale,
-      OutputSpecification os, List<String> tabsArray)
-      throws ManifoldCFException, IOException
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
   {
-    super.outputSpecificationHeader(out, locale, os, tabsArray);
-    tabsArray.add(Messages.getString(locale, ELASTICSEARCH_TAB_ELASTICSEARCH));
-    outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, null, null);
+    return "s"+connectionSequenceNumber+"_checkSpecification";
   }
 
-  final private SpecificationNode getSpecNode(OutputSpecification os)
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException
+  {
+    super.outputSpecificationHeader(out, locale, os, connectionSequenceNumber, tabsArray);
+    tabsArray.add(Messages.getString(locale, ELASTICSEARCH_TAB_ELASTICSEARCH));
+    
+    outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, null, null, new Integer(connectionSequenceNumber), null);
+  }
+
+  final private SpecificationNode getSpecNode(Specification os)
   {
     int l = os.getChildCount();
     for (int i = 0; i < l; i++)
@@ -272,19 +315,42 @@
     return null;
   }
 
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale,
-      OutputSpecification os, String tabName) throws ManifoldCFException,
-      IOException
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException
   {
-    super.outputSpecificationBody(out, locale, os, tabName);
+
     ElasticSearchSpecs specs = getSpecParameters(os);
-    outputResource(EDIT_SPEC_FORWARD_ELASTICSEARCH, out, locale, specs, tabName);
+    
+    outputResource(EDIT_SPEC_FORWARD_ELASTICSEARCH, out, locale, specs, tabName, new Integer(connectionSequenceNumber), new Integer(actualSequenceNumber));
   }
 
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext,
-      Locale locale, OutputSpecification os) throws ManifoldCFException
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException
   {
     ConfigurationNode specNode = getSpecNode(os);
     boolean bAdd = (specNode == null);
@@ -293,12 +359,30 @@
       specNode = new SpecificationNode(
           ElasticSearchSpecs.ELASTICSEARCH_SPECS_NODE);
     }
-    ElasticSearchSpecs.contextToSpecNode(variableContext, specNode);
+    ElasticSearchSpecs.contextToSpecNode(variableContext, specNode, connectionSequenceNumber);
     if (bAdd)
       os.addChild(os.getChildCount(), specNode);
     return null;
   }
 
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException
+  {
+    outputResource(VIEW_SPEC_FORWARD, out, locale, getSpecParameters(os), null, new Integer(connectionSequenceNumber),null);
+
+  }
+
+
   /** Build a Set of ElasticSearch parameters. If configParams is null,
    * getConfiguration() is used.
    * 
@@ -311,7 +395,7 @@
     return new ElasticSearchConfig(configParams);
   }
 
-  final private ElasticSearchSpecs getSpecParameters(OutputSpecification os)
+  final private ElasticSearchSpecs getSpecParameters(Specification os)
       throws ManifoldCFException
   {
     return new ElasticSearchSpecs(getSpecNode(os));
@@ -330,26 +414,26 @@
   }
 
   @Override
-  public String getOutputDescription(OutputSpecification os)
+  public VersionContext getPipelineDescription(Specification os)
       throws ManifoldCFException
   {
     ElasticSearchSpecs specs = new ElasticSearchSpecs(getSpecNode(os));
-    return specs.toJson().toString();
+    return new VersionContext(specs.toJson().toString(),params,os);
   }
 
   @Override
-  public boolean checkLengthIndexable(String outputDescription, long length)
+  public boolean checkLengthIndexable(VersionContext outputDescription, long length, IOutputCheckActivity activities)
       throws ManifoldCFException, ServiceInterruption
   {
-    ElasticSearchSpecs specs = getSpecsCache(outputDescription);
+    ElasticSearchSpecs specs = getSpecsCache(outputDescription.getVersionString());
     long maxFileSize = specs.getMaxFileSize();
     if (length > maxFileSize)
       return false;
-    return super.checkLengthIndexable(outputDescription, length);
+    return super.checkLengthIndexable(outputDescription, length, activities);
   }
 
   @Override
-  public boolean checkDocumentIndexable(String outputDescription, File localFile)
+  public boolean checkDocumentIndexable(VersionContext outputDescription, File localFile, IOutputCheckActivity activities)
       throws ManifoldCFException, ServiceInterruption
   {
     // No filtering here; we don't look inside the file and don't know its extension.  That's done via the url
@@ -364,10 +448,10 @@
   *@return true if the file is indexable.
   */
   @Override
-  public boolean checkURLIndexable(String outputDescription, String url)
+  public boolean checkURLIndexable(VersionContext outputDescription, String url, IOutputCheckActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
-    ElasticSearchSpecs specs = getSpecsCache(outputDescription);
+    ElasticSearchSpecs specs = getSpecsCache(outputDescription.getVersionString());
     return specs.checkExtension(FilenameUtils.getExtension(url));
   }
 
@@ -385,14 +469,7 @@
       IOException
   {
     outputResource(VIEW_CONFIG_FORWARD, out, locale,
-        getConfigParameters(parameters), null);
-  }
-
-  @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale,
-      OutputSpecification os) throws ManifoldCFException, IOException
-  {
-    outputResource(VIEW_SPEC_FORWARD, out, locale, getSpecParameters(os), null);
+        getConfigParameters(parameters), null, null, null);
   }
 
   @Override

diff --git a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java
index 351bc5f..aca4d4c 100644
--- a/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java
+++ b/connectors/elasticsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/elasticsearch/ElasticSearchSpecs.java

@@ -95,11 +95,11 @@
   }
 
   public static void contextToSpecNode(IPostParameters variableContext,
-      ConfigurationNode specNode)
+      ConfigurationNode specNode, int sequenceNumber)
   {
     for (ParameterEnum param : SPECIFICATIONLIST)
     {
-      String p = variableContext.getParameter(param.name().toLowerCase());
+      String p = variableContext.getParameter("s"+sequenceNumber+"_"+param.name().toLowerCase());
       if (p != null)
         specNode.setAttribute(param.name(), p);
     }

diff --git a/connectors/elasticsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/elasticsearch/editSpecification_ElasticSearch.html b/connectors/elasticsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/elasticsearch/editSpecification_ElasticSearch.html
index b85e8b2..23d221b 100644
--- a/connectors/elasticsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/elasticsearch/editSpecification_ElasticSearch.html
+++ b/connectors/elasticsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/elasticsearch/editSpecification_ElasticSearch.html

@@ -15,31 +15,31 @@
  limitations under the License.
 -->
 
-#if($TABNAME == $ResourceBundle.getString('ElasticSearchConnector.ElasticSearch'))
+#if($TABNAME == $ResourceBundle.getString('ElasticSearchConnector.ElasticSearch') && ${SEQNUM} == ${SELECTEDNUM})
 
 <table class="displaytable">
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ElasticSearchConnector.MaxFileSizeBytesColon'))</nobr></td>
-    <td class="value"><input name="maxfilesize" type="text"
+    <td class="value"><input name="s${SEQNUM}_maxfilesize" type="text"
       value="$Encoder.attributeEscape($MAXFILESIZE)" size="24" /></td>
   </tr>
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ElasticSearchConnector.AllowedMIMETypesColon'))</nobr>
     </td>
-    <td class="value"><textarea rows="10" cols="64" name="mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
+    <td class="value"><textarea rows="10" cols="64" name="s${SEQNUM}_mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
     </td>
   </tr>
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ElasticSearchConnector.AllowedFileExtensionsColon'))</nobr></td>
-    <td class="value"><textarea rows="10" cols="12" name="extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
+    <td class="value"><textarea rows="10" cols="12" name="s${SEQNUM}_extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
     </td>
   </tr>
 </table>
 
 #else
 
-<input type="hidden" name="maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
-<input type="hidden" name="mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
-<input type="hidden" name="extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
+<input type="hidden" name="s${SEQNUM}_maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
+<input type="hidden" name="s${SEQNUM}_mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
+<input type="hidden" name="s${SEQNUM}_extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
 
 #end

diff --git a/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java b/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
index 0ea911b..e2ed4ba 100644
--- a/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java
+++ b/connectors/filenet/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filenet/FilenetConnector.java

@@ -1267,7 +1267,7 @@
                     null,documentIdentifier,"Authorization error",e.getMessage(),null);
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("FileNet: Removing file '"+documentIdentifier+"' because: "+e.getMessage(),e);
-                  activities.deleteDocument(documentIdentifier,documentVersion);
+                  activities.noDocument(documentIdentifier,documentVersion);
                   i++;
                   continue;
                 }
@@ -1350,7 +1350,7 @@
                 }
               }
               else
-                activities.deleteDocument(documentIdentifier,documentVersion);
+                activities.noDocument(documentIdentifier,documentVersion);
             }
             finally
             {
@@ -1384,7 +1384,7 @@
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because it seems to no longer exist");
 
-            activities.deleteDocument(documentIdentifier,documentVersion);
+            activities.noDocument(documentIdentifier,documentVersion);
             i++;
             continue;
           }
@@ -1414,7 +1414,7 @@
           {
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("FileNet: Removing version '"+documentIdentifier+"' because: "+e.getMessage(),e);
-            activities.deleteDocument(documentIdentifier,documentVersion);
+            activities.noDocument(documentIdentifier,documentVersion);
             i++;
             continue;
           }

diff --git a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
index 33d61b7..bd1bb15 100644
--- a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java
+++ b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputConnector.java

@@ -35,14 +35,15 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.HashMap;
 
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
 import org.apache.manifoldcf.agents.system.Logging;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -50,6 +51,7 @@
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
 import org.json.JSONException;
 
 public class FileOutputConnector extends BaseOutputConnector {
@@ -143,9 +145,9 @@
    * the document will not need to be sent again to the output data store.
    */
   @Override
-  public String getOutputDescription(OutputSpecification spec) throws ManifoldCFException, ServiceInterruption {
+  public VersionContext getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
     FileOutputSpecs specs = new FileOutputSpecs(getSpecNode(spec));
-    return specs.toJson().toString();
+    return new VersionContext(specs.toJson().toString(),params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -163,7 +165,7 @@
    *@return the document status (accepted or permanently rejected).
    */
   @Override
-  public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities) throws ManifoldCFException, ServiceInterruption {
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities) throws ManifoldCFException, ServiceInterruption, IOException {
     // Establish a session
     getSession();
 
@@ -172,7 +174,7 @@
     FileOutputSpecs specs = null;
     StringBuffer path = new StringBuffer();
     try {
-      specs = new FileOutputSpecs(outputDescription);
+      specs = new FileOutputSpecs(outputDescription.getVersionString());
 
       /*
        * make file path
@@ -450,101 +452,84 @@
     activities.recordActivity(null, REMOVE_ACTIVITY, null, documentURI, "OK", null);
   }
 
-  /** Output the configuration header section.
-   * This method is called in the head section of the connector's configuration page.  Its purpose is to add the required tabs to the list, and to output any
-   * javascript methods that might be needed by the configuration editing HTML.
-   *@param threadContext is the local thread context.
-   *@param out is the output to which any HTML should be sent.
-   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
-   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
-   */
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
   @Override
-  public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray) throws ManifoldCFException, IOException {
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
   }
 
-  /** Output the configuration body section.
-   * This method is called in the body section of the connector's configuration page.  Its purpose is to present the required form elements for editing.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-   * form is "editconnection".
-   *@param threadContext is the local thread context.
-   *@param out is the output to which any HTML should be sent.
-   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
-   *@param tabName is the current tab name.
-   */
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
   @Override
-  public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) throws ManifoldCFException, IOException {
-  }
-
-  /** Process a configuration post.
-   * This method is called at the start of the connector's configuration page, whenever there is a possibility that form data for a connection has been
-   * posted.  Its purpose is to gather form information and modify the configuration parameters accordingly.
-   * The name of the posted form is "editconnection".
-   *@param threadContext is the local thread context.
-   *@param variableContext is the set of variables available from the post, including binary file post information.
-   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
-   *@return null if all is well, or a string error message if there is an error that should prevent saving of the connection (and cause a redirection to an error page).
-   */
-  @Override
-  public String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext, Locale locale, ConfigParams parameters) throws ManifoldCFException {
-    return null;
-  }
-
-  /** View configuration.
-   * This method is called in the body section of the connector's view configuration page.  Its purpose is to present the connection information to the user.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-   *@param threadContext is the local thread context.
-   *@param out is the output to which any HTML should be sent.
-   *@param parameters are the configuration parameters, as they currently exist, for this connection being configured.
-   */
-  @Override
-  public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
   }
 
   /** Output the specification header section.
-   * This method is called in the head section of a job page which has selected an output connection of the current type.  Its purpose is to add the required tabs
-   * to the list, and to output any javascript methods that might be needed by the job editing HTML.
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
-   */
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os, List<String> tabsArray) throws ManifoldCFException, IOException {
-    super.outputSpecificationHeader(out, locale, os, tabsArray);
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException {
+    super.outputSpecificationHeader(out, locale, os, connectionSequenceNumber, tabsArray);
     tabsArray.add(Messages.getString(locale, "FileConnector.PathTabName"));
-    outputResource(EDIT_SPECIFICATION_JS, out, locale, null, null);
+    outputResource(EDIT_SPECIFICATION_JS, out, locale, null, null, new Integer(connectionSequenceNumber), null);
   }
 
   /** Output the specification body section.
-   * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-   * form is "editjob".
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   *@param tabName is the current tab name.
-   */
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName) throws ManifoldCFException, IOException {
-    super.outputSpecificationBody(out, locale, os, tabName);
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException {
+    super.outputSpecificationBody(out, locale, os, connectionSequenceNumber, actualSequenceNumber, tabName);
     FileOutputSpecs specs = getSpecParameters(os);
-    outputResource(EDIT_SPECIFICATION_HTML, out, locale, specs, tabName);
+    outputResource(EDIT_SPECIFICATION_HTML, out, locale, specs, tabName, new Integer(connectionSequenceNumber), new Integer(actualSequenceNumber));
   }
 
   /** Process a specification post.
-   * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-   * posted.  Its purpose is to gather form information and modify the output specification accordingly.
-   * The name of the posted form is "editjob".
-   *@param variableContext contains the post data, including binary file-upload information.
-   *@param os is the current output specification for this job.
-   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
-   */
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os) throws ManifoldCFException {
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
     ConfigurationNode specNode = getSpecNode(os);
     boolean bAdd = (specNode == null);
     if (bAdd) {
       specNode = new SpecificationNode(FileOutputConstant.PARAM_ROOTPATH);
     }
-    FileOutputSpecs.contextToSpecNode(variableContext, specNode);
+    FileOutputSpecs.contextToSpecNode(variableContext, specNode, connectionSequenceNumber);
     if (bAdd) {
       os.addChild(os.getChildCount(), specNode);
     }
@@ -553,21 +538,25 @@
   }
 
   /** View specification.
-   * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   */
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os) throws ManifoldCFException, IOException {
-    outputResource(VIEW_SPECIFICATION_HTML, out, locale, getSpecParameters(os), null);
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException {
+    outputResource(VIEW_SPECIFICATION_HTML, out, locale, getSpecParameters(os), null, new Integer(connectionSequenceNumber), null);
   }
 
   /**
    * @param os
    * @return
    */
-  final private SpecificationNode getSpecNode(OutputSpecification os)
+  final private SpecificationNode getSpecNode(Specification os)
   {
     int l = os.getChildCount();
     for (int i = 0; i < l; i++) {
@@ -584,7 +573,7 @@
    * @return
    * @throws ManifoldCFException
    */
-  final private FileOutputSpecs getSpecParameters(OutputSpecification os) throws ManifoldCFException {
+  final private FileOutputSpecs getSpecParameters(Specification os) throws ManifoldCFException {
     return new FileOutputSpecs(getSpecNode(os));
   }
 
@@ -604,14 +593,23 @@
    * @param resName
    * @param out
    * @throws ManifoldCFException */
-  private static void outputResource(String resName, IHTTPOutput out, Locale locale, FileOutputParam params, String tabName) throws ManifoldCFException {
+  private static void outputResource(String resName, IHTTPOutput out, Locale locale, FileOutputParam params, String tabName,
+    Integer sequenceNumber, Integer currentSequenceNumber) throws ManifoldCFException {
     Map<String,String> paramMap = null;
     if (params != null) {
       paramMap = params.buildMap();
       if (tabName != null) {
         paramMap.put("TabName", tabName);
       }
+      if (currentSequenceNumber != null)
+        paramMap.put("SelectedNum", currentSequenceNumber.toString());
     }
+    else
+    {
+      paramMap = new HashMap<String,String>();
+    }
+    if (sequenceNumber != null)
+      paramMap.put("SeqNum", sequenceNumber.toString());
     Messages.outputResourceWithVelocity(out, locale, resName, paramMap, true);
   }
 

diff --git a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputSpecs.java b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputSpecs.java
index 08dd152..d5ee5d3 100644
--- a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputSpecs.java
+++ b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/agents/output/filesystem/FileOutputSpecs.java

@@ -100,9 +100,9 @@
    * @param variableContext
    * @param specNode
    */
-  public static void contextToSpecNode(IPostParameters variableContext, ConfigurationNode specNode) {
+  public static void contextToSpecNode(IPostParameters variableContext, ConfigurationNode specNode, int sequenceNumber) {
     for (ParameterEnum param : SPECIFICATIONLIST) {
-      String p = variableContext.getParameter(param.name().toLowerCase());
+      String p = variableContext.getParameter("s"+sequenceNumber+"_"+param.name().toLowerCase());
       if (p != null) {
         specNode.setAttribute(param.name(), p);
       }

diff --git a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
index 9c04a85..4ff00cc 100644
--- a/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java
+++ b/connectors/filesystem/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/filesystem/FileConnector.java

@@ -220,13 +220,14 @@
       {
         if (file.isDirectory())
         {
-          // It's a directory.  The version ID will be the
-          // last modified date.
-          long lastModified = file.lastModified();
-          rval[i] = new Long(lastModified).toString();
+          // It's a directory.  The version ID would be the
+          // last modified date, except that doesn't work on Windows
+          // because modified dates are not transitive.
+          //long lastModified = file.lastModified();
+          //rval[i] = new Long(lastModified).toString();
 
-          // Signal that we don't have any versioning.
-          // rval[i] = "";
+          // Signal that we don't have any versioning and that we should recheck always.
+          rval[i] = "";
         }
         else
         {
@@ -289,6 +290,7 @@
       {
         if (file.isDirectory())
         {
+          // Chained connectors scan parent nodes always
           // Queue up stuff for directory
           long startTime = System.currentTimeMillis();
           String errorCode = "OK";

diff --git a/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.html b/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.html
index ca2a5e3..12c1c69 100644
--- a/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.html
+++ b/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.html

@@ -15,18 +15,18 @@
  limitations under the License.
 -->
 
-#if($TABNAME == $ResourceBundle.getString('FileConnector.PathTabName'))
+#if($TABNAME == $ResourceBundle.getString('FileConnector.PathTabName') && ${SEQNUM} == ${SELECTEDNUM})
 
 <table class="displaytable">
   <tr><td class="separator" colspan="2"><hr/></td></tr>
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('FileConnector.RootPath'))</nobr></td>
-    <td class="value"><input type="text" name="rootpath" size="64" value="$Encoder.attributeEscape($ROOTPATH)" /></td>
+    <td class="value"><input type="text" name="s${SEQNUM}_rootpath" size="64" value="$Encoder.attributeEscape($ROOTPATH)" /></td>
   </tr>
 </table>
 
 #else
 
-<input type="hidden" name="rootpath" value="$Encoder.attributeEscape($ROOTPATH)" />
+<input type="hidden" name="s${SEQNUM}_rootpath" value="$Encoder.attributeEscape($ROOTPATH)" />
 
 #end

diff --git a/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.js b/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.js
index 9ce70ec..cbc9d08 100644
--- a/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.js
+++ b/connectors/filesystem/connector/src/main/resources/org/apache/manifoldcf/agents/output/filesystem/editSpecification.js

@@ -17,13 +17,13 @@
 
 <script type="text/javascript">
 <!--
-function checkOutputSpecificationForSave()
+function s${SEQNUM}_checkSpecificationForSave()
 {
-  if (editjob.rootpath.value == "")
+  if (editjob.s${SEQNUM}_rootpath.value == "")
   {
     alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('FileConnector.RootPathCannotBeNull'))");
-    SelectTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('FileConnector.PathTabName'))");
-    editjob.rootpath.focus();
+    SelectSequencedTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('FileConnector.PathTabName'))",${SEQNUM});
+    editjob.s${SEQNUM}_rootpath.focus();
     return false;
   }
   return true;

diff --git a/connectors/forcedmetadata/build.xml b/connectors/forcedmetadata/build.xml
index 80367b2..6c64367 100644
--- a/connectors/forcedmetadata/build.xml
+++ b/connectors/forcedmetadata/build.xml

@@ -32,7 +32,7 @@
 
     <target name="deliver-connector" depends="mcf-connector-build.deliver-connector">
         <antcall target="general-add-transformation-connector">
-            <param name="connector-label" value="Forced metadata"/>
+            <param name="connector-label" value="Metadata adjuster"/>
             <param name="connector-class" value="org.apache.manifoldcf.agents.transformation.forcedmetadata.ForcedMetadataConnector"/>
         </antcall>
     </target>

diff --git a/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java b/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
index 2edf375..c657edb 100644
--- a/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java
+++ b/connectors/forcedmetadata/connector/src/main/java/org/apache/manifoldcf/agents/transformation/forcedmetadata/ForcedMetadataConnector.java

@@ -35,14 +35,19 @@
   // There will be node for every parameter/value pair.
   
   public static final String NODE_PAIR = "pair";
-  public static final String ATTR_PARAMETER = "parameter";
-  public static final String ATTR_VALUE = "value";
-  
+  public static final String ATTRIBUTE_PARAMETER = "parameter";
+  public static final String NODE_FIELDMAP = "fieldmap";
+  public static final String NODE_KEEPMETADATA = "keepAllMetadata";
+  public static final String ATTRIBUTE_SOURCE = "source";
+  public static final String ATTRIBUTE_TARGET = "target";
+  public static final String ATTRIBUTE_VALUE = "value";
+
   // Templates
   
   private static final String VIEW_SPEC = "viewSpecification.html";
   private static final String EDIT_SPEC_HEADER = "editSpecification.js";
   private static final String EDIT_SPEC_FORCED_METADATA = "editSpecification_ForcedMetadata.html";
+  private static final String EDIT_SPEC_FIELDMAPPING = "editSpecification_FieldMapping.html";
 
   /** Get a pipeline version string, given a pipeline specification object.  The version string is used to
   * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector 
@@ -55,56 +60,12 @@
   *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
-  public String getPipelineDescription(Specification spec)
+  @Override
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    // Pull out the forced metadata, and pack them.
-    // We *could* use XML or JSON, but then we'd have to parse it later, and that's far more expensive than what we actually are going to do.
-    // Plus, these must be ordered to make strings comparable.
-    Map<String,Set<String>> parameters = new HashMap<String,Set<String>>();
-    for (int i = 0; i < spec.getChildCount(); i++)
-    {
-      SpecificationNode sn = spec.getChild(i);
-      if (sn.getType().equals(NODE_PAIR))
-      {
-        String parameter = sn.getAttributeValue(ATTR_PARAMETER);
-        String value = sn.getAttributeValue(ATTR_VALUE);
-        Set<String> params = parameters.get(parameter);
-        if (params == null)
-        {
-          params = new HashSet<String>();
-          parameters.put(parameter,params);
-        }
-        params.add(value);
-      }
-    }
-    
-    // Construct the string
-    StringBuilder sb = new StringBuilder();
-    // Get the keys and sort them
-    String[] keys = new String[parameters.size()];
-    int j = 0;
-    for (String key : parameters.keySet())
-    {
-      keys[j++] = key;
-    }
-    java.util.Arrays.sort(keys);
-    // Pack the list of keys
-    packList(sb,keys,'+');
-    // Now, go through each key and individually pack the values
-    for (String key : keys)
-    {
-      Set<String> values = parameters.get(key);
-      String[] valueArray = new String[values.size()];
-      j = 0;
-      for (String value : values)
-      {
-        valueArray[j++] = value;
-      }
-      java.util.Arrays.sort(valueArray);
-      packList(sb,valueArray,'+');
-    }
-    return sb.toString();
+    SpecPacker sp = new SpecPacker(spec);
+    return new VersionContext(sp.toPackedString(),params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -123,30 +84,55 @@
   *@return the document status (accepted or permanently rejected).
   *@throws IOException only if there's a stream error reading the document data.
   */
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
-    // Unpack the forced metadata and add it to the document
-    int index = 0;
-    List<String> keys = new ArrayList<String>();
-    index = unpackList(keys,pipelineDescription,index,'+');
-    // For each key, unpack its list of values
-    for (String key : keys)
+    // Unpack the forced metadata
+    SpecPacker sp = new SpecPacker(pipelineDescription.getVersionString());
+    // We have to create a copy of the Repository Document, since we might be rearranging things
+    RepositoryDocument docCopy = document.duplicate();
+    docCopy.clearFields();
+    // Do the mapping first!!
+    Iterator<String> fields = document.getFields();
+    while (fields.hasNext())
     {
-      List<String> values = new ArrayList<String>();
-      index = unpackList(values,pipelineDescription,index,'+');
-      String[] valueArray = (String[])values.toArray(new String[0]);
-      // Go through the value list and modify the repository document.
-      // This blows away existing values for the fields, if any.
-      // NOTE WELL: Upstream callers who set Reader metadata values (or anything that needs to be closed)
-      // are responsible for closing those resources, whether or not they remain in the RepositoryDocument
-      // object after indexing is done!!
-      document.addField(key,valueArray);
+      String field = fields.next();
+      Object[] fieldData = document.getField(field);
+      String target = sp.getMapping(field);
+      if (target != null)
+      {
+        if (fieldData instanceof Date[])
+          docCopy.addField(target,(Date[])fieldData);
+        else if (fieldData instanceof Reader[])
+          docCopy.addField(target,(Reader[])fieldData);
+        else if (fieldData instanceof String[])
+          docCopy.addField(target,(String[])fieldData);
+      }
+      else
+      {
+        if (sp.keepAllMetadata())
+        {
+          if (fieldData instanceof Date[])
+            docCopy.addField(field,(Date[])fieldData);
+          else if (fieldData instanceof Reader[])
+            docCopy.addField(field,(Reader[])fieldData);
+          else if (fieldData instanceof String[])
+            docCopy.addField(field,(String[])fieldData);
+        }
+      }
+    }
+
+    Iterator<String> keys = sp.getParameterKeys();
+    while (keys.hasNext())
+    {
+      String key = keys.next();
+      docCopy.addField(key,sp.getParameterValues(key));
     }
     // Finally, send the modified repository document onward to the next pipeline stage.
     // If we'd done anything to the stream, we'd have needed to create a new RepositoryDocument object and copied the
     // data into it, and closed the new stream after sendDocument() was called.
-    return activities.sendDocument(documentURI,document,authorityNameString);
+    return activities.sendDocument(documentURI,docCopy);
   }
 
   // UI support methods.
@@ -162,6 +148,7 @@
   *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@return the name of the form check javascript method.
   */
+  @Override
   public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
   {
     return "s"+connectionSequenceNumber+"_checkSpecification";
@@ -171,6 +158,7 @@
   *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@return the name of the form presave check javascript method.
   */
+  @Override
   public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
   {
     return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
@@ -185,6 +173,7 @@
   *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
   */
+  @Override
   public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
     int connectionSequenceNumber, List<String> tabsArray)
     throws ManifoldCFException, IOException
@@ -192,10 +181,11 @@
     // Output specification header
     
     // Add Forced Metadata to tab array
+    tabsArray.add(Messages.getString(locale, "ForcedMetadata.FieldMappingTabName"));
     tabsArray.add(Messages.getString(locale, "ForcedMetadata.ForcedMetadata"));
 
     Map<String, Object> paramMap = new HashMap<String, Object>();
-    paramMap.put("SeqNum",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
 
     Messages.outputResourceWithVelocity(out,locale,EDIT_SPEC_HEADER,paramMap);
   }
@@ -211,18 +201,22 @@
   *@param actualSequenceNumber is the connection within the job that has currently been selected.
   *@param tabName is the current tab name.
   */
+  @Override
   public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
     int connectionSequenceNumber, int actualSequenceNumber, String tabName)
     throws ManifoldCFException, IOException
   {
     // Output specification body
     Map<String, Object> paramMap = new HashMap<String, Object>();
-    paramMap.put("TabName", tabName);
-    paramMap.put("SeqNum",Integer.toString(connectionSequenceNumber));
-    paramMap.put("SelectedNum",Integer.toString(actualSequenceNumber));
+    paramMap.put("TABNAME", tabName);
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SELECTEDNUM",Integer.toString(actualSequenceNumber));
 
     fillInForcedMetadataTab(paramMap, os);
+    fillInFieldMappingSpecificationMap(paramMap, os);
+
     Messages.outputResourceWithVelocity(out,locale,EDIT_SPEC_FORCED_METADATA,paramMap);
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPEC_FIELDMAPPING,paramMap);
   }
   
   /** Process a specification post.
@@ -235,13 +229,14 @@
   *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
   */
+  @Override
   public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
     int connectionSequenceNumber)
     throws ManifoldCFException
   {
     // Process specification post
-    String prefix = "s"+connectionSequenceNumber+"_";
-    String forcedCount = variableContext.getParameter(prefix+"forcedmetadata_count");
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+    String forcedCount = variableContext.getParameter(seqPrefix+"forcedmetadata_count");
     if (forcedCount != null)
     {
       int count = Integer.parseInt(forcedCount);
@@ -258,28 +253,92 @@
       // Now, go through form data
       for (int j = 0; j < count; j++)
       {
-        String op = variableContext.getParameter(prefix+"forcedmetadata_"+j+"_op");
+        String op = variableContext.getParameter(seqPrefix+"forcedmetadata_"+j+"_op");
         if (op != null && op.equals("Delete"))
           continue;
-        String paramName = variableContext.getParameter(prefix+"forcedmetadata_"+j+"_name");
-        String paramValue = variableContext.getParameter(prefix+"forcedmetadata_"+j+"_value");
+        String paramName = variableContext.getParameter(seqPrefix+"forcedmetadata_"+j+"_name");
+        String paramValue = variableContext.getParameter(seqPrefix+"forcedmetadata_"+j+"_value");
         SpecificationNode sn = new SpecificationNode(NODE_PAIR);
-        sn.setAttribute(ATTR_PARAMETER,paramName);
-        sn.setAttribute(ATTR_VALUE,paramValue);
+        sn.setAttribute(ATTRIBUTE_PARAMETER,paramName);
+        sn.setAttribute(ATTRIBUTE_VALUE,paramValue);
         os.addChild(os.getChildCount(),sn);
       }
       // Look for add operation
-      String addOp = variableContext.getParameter(prefix+"forcedmetadata_op");
+      String addOp = variableContext.getParameter(seqPrefix+"forcedmetadata_op");
       if (addOp != null && addOp.equals("Add"))
       {
-        String paramName = variableContext.getParameter(prefix+"forcedmetadata_name");
-        String paramValue = variableContext.getParameter(prefix+"forcedmetadata_value");
+        String paramName = variableContext.getParameter(seqPrefix+"forcedmetadata_name");
+        String paramValue = variableContext.getParameter(seqPrefix+"forcedmetadata_value");
         SpecificationNode sn = new SpecificationNode(NODE_PAIR);
-        sn.setAttribute(ATTR_PARAMETER,paramName);
-        sn.setAttribute(ATTR_VALUE,paramValue);
+        sn.setAttribute(ATTRIBUTE_PARAMETER,paramName);
+        sn.setAttribute(ATTRIBUTE_VALUE,paramValue);
         os.addChild(os.getChildCount(),sn);
       }
     }
+    
+    String x = variableContext.getParameter(seqPrefix+"fieldmapping_count");
+    if (x != null && x.length() > 0)
+    {
+      // About to gather the fieldmapping nodes, so get rid of the old ones.
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(NODE_FIELDMAP) || node.getType().equals(NODE_KEEPMETADATA))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      int count = Integer.parseInt(x);
+      i = 0;
+      while (i < count)
+      {
+        String prefix = seqPrefix+"fieldmapping_";
+        String suffix = "_"+Integer.toString(i);
+        String op = variableContext.getParameter(prefix+"op"+suffix);
+        if (op == null || !op.equals("Delete"))
+        {
+          // Gather the fieldmap etc.
+          String source = variableContext.getParameter(prefix+"source"+suffix);
+          String target = variableContext.getParameter(prefix+"target"+suffix);
+          if (target == null)
+            target = "";
+          SpecificationNode node = new SpecificationNode(NODE_FIELDMAP);
+          node.setAttribute(ATTRIBUTE_SOURCE,source);
+          node.setAttribute(ATTRIBUTE_TARGET,target);
+          os.addChild(os.getChildCount(),node);
+        }
+        i++;
+      }
+      
+      String addop = variableContext.getParameter(seqPrefix+"fieldmapping_op");
+      if (addop != null && addop.equals("Add"))
+      {
+        String source = variableContext.getParameter(seqPrefix+"fieldmapping_source");
+        String target = variableContext.getParameter(seqPrefix+"fieldmapping_target");
+        if (target == null)
+          target = "";
+        SpecificationNode node = new SpecificationNode(NODE_FIELDMAP);
+        node.setAttribute(ATTRIBUTE_SOURCE,source);
+        node.setAttribute(ATTRIBUTE_TARGET,target);
+        os.addChild(os.getChildCount(),node);
+      }
+      
+      // Gather the keep all metadata parameter to be the last one
+      SpecificationNode node = new SpecificationNode(NODE_KEEPMETADATA);
+      String keepAll = variableContext.getParameter(seqPrefix+"keepallmetadata");
+      if (keepAll != null)
+      {
+        node.setAttribute(ATTRIBUTE_VALUE, keepAll);
+      }
+      else
+      {
+        node.setAttribute(ATTRIBUTE_VALUE, "false");
+      }
+      // Add the new keepallmetadata config parameter 
+      os.addChild(os.getChildCount(), node);
+    }
+
     return null;
   }
   
@@ -291,21 +350,57 @@
   *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@param os is the current pipeline specification for this job.
   */
+  @Override
   public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
     int connectionSequenceNumber)
     throws ManifoldCFException, IOException
   {
     // View specification
     Map<String, Object> paramMap = new HashMap<String, Object>();
-    paramMap.put("SeqNum",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
     
     // Fill in the map with data from all tabs
     fillInForcedMetadataTab(paramMap, os);
+    fillInFieldMappingSpecificationMap(paramMap, os);
 
     Messages.outputResourceWithVelocity(out,locale,VIEW_SPEC,paramMap);
   }
 
-  protected void fillInForcedMetadataTab(Map<String,Object> paramMap, Specification os)
+  protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, Specification os)
+  {
+    // Prep for field mappings
+    List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
+    String keepAllMetadataValue = "true";
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(NODE_FIELDMAP)) {
+        String source = sn.getAttributeValue(ATTRIBUTE_SOURCE);
+        String target = sn.getAttributeValue(ATTRIBUTE_TARGET);
+        String targetDisplay;
+        if (target == null)
+        {
+          target = "";
+          targetDisplay = "(remove)";
+        }
+        else
+          targetDisplay = target;
+        Map<String,String> fieldMapping = new HashMap<String,String>();
+        fieldMapping.put("SOURCE",source);
+        fieldMapping.put("TARGET",target);
+        fieldMapping.put("TARGETDISPLAY",targetDisplay);
+        fieldMappings.add(fieldMapping);
+      }
+      else if (sn.getType().equals(NODE_KEEPMETADATA))
+      {
+        keepAllMetadataValue = sn.getAttributeValue(ATTRIBUTE_VALUE);
+      }
+    }
+    paramMap.put("FIELDMAPPINGS",fieldMappings);
+    paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+  }
+
+  protected static void fillInForcedMetadataTab(Map<String,Object> paramMap, Specification os)
   {
     // First, sort everything
     Map<String,Set<String>> params = new HashMap<String,Set<String>>();
@@ -314,8 +409,8 @@
       SpecificationNode sn = os.getChild(i);
       if (sn.getType().equals(NODE_PAIR))
       {
-        String parameter = sn.getAttributeValue(ATTR_PARAMETER);
-        String value = sn.getAttributeValue(ATTR_VALUE);
+        String parameter = sn.getAttributeValue(ATTRIBUTE_PARAMETER);
+        String value = sn.getAttributeValue(ATTRIBUTE_VALUE);
         Set<String> values = params.get(parameter);
         if (values == null)
         {
@@ -358,7 +453,167 @@
       }
     }
     
-    paramMap.put("Parameters",pObject);
+    paramMap.put("PARAMETERS",pObject);
+  }
+
+  protected static class SpecPacker {
+    
+    private final Map<String,String> sourceTargets = new HashMap<String,String>();
+    private final boolean keepAllMetadata;
+    private final Map<String,Set<String>> parameters = new HashMap<String,Set<String>>();
+
+    public SpecPacker(Specification os) {
+      boolean keepAllMetadata = true;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if(sn.getType().equals(NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          sourceTargets.put(source, target);
+        }
+        else if (sn.getType().equals(NODE_PAIR))
+        {
+          String parameter = sn.getAttributeValue(ATTRIBUTE_PARAMETER);
+          String value = sn.getAttributeValue(ATTRIBUTE_VALUE);
+          Set<String> params = parameters.get(parameter);
+          if (params == null)
+          {
+            params = new HashSet<String>();
+            parameters.put(parameter,params);
+          }
+          params.add(value);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+    }
+    
+    public SpecPacker(String packedString) {
+      
+      int index = 0;
+      
+      // Mappings
+      final List<String> packedMappings = new ArrayList<String>();
+      index = unpackList(packedMappings,packedString,index,'+');
+      String[] fixedList = new String[2];
+      for (String packedMapping : packedMappings) {
+        unpackFixedList(fixedList,packedMapping,0,':');
+        sourceTargets.put(fixedList[0], fixedList[1]);
+      }
+      
+      // Keep all metadata
+      if (packedString.length() > index)
+        keepAllMetadata = (packedString.charAt(index++) == '+');
+      else
+        keepAllMetadata = true;
+      
+      List<String> keys = new ArrayList<String>();
+      index = unpackList(keys,packedString,index,'+');
+      // For each key, unpack its list of values
+      for (String key : keys)
+      {
+        List<String> values = new ArrayList<String>();
+        index = unpackList(values,packedString,index,'+');
+        Set<String> valueSet = new HashSet<String>();
+        for (String value : values)
+        {
+          valueSet.add(value);
+        }
+        parameters.put(key,valueSet);
+      }
+
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      int i;
+      
+      // Mappings
+      final String[] sortArray = new String[sourceTargets.size()];
+      i = 0;
+      for (String source : sourceTargets.keySet()) {
+        sortArray[i++] = source;
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> packedMappings = new ArrayList<String>();
+      String[] fixedList = new String[2];
+      for (String source : sortArray) {
+        String target = sourceTargets.get(source);
+        StringBuilder localBuffer = new StringBuilder();
+        fixedList[0] = source;
+        fixedList[1] = target;
+        packFixedList(localBuffer,fixedList,':');
+        packedMappings.add(localBuffer.toString());
+      }
+      packList(sb,packedMappings,'+');
+
+      // Keep all metadata
+      if (keepAllMetadata)
+        sb.append('+');
+      else
+        sb.append('-');
+      
+      // Get the keys and sort them
+      final String[] keys = new String[parameters.size()];
+      int j = 0;
+      for (String key : parameters.keySet())
+      {
+        keys[j++] = key;
+      }
+      java.util.Arrays.sort(keys);
+      // Pack the list of keys
+      packList(sb,keys,'+');
+      // Now, go through each key and individually pack the values
+      for (String key : keys)
+      {
+        Set<String> values = parameters.get(key);
+        String[] valueArray = new String[values.size()];
+        j = 0;
+        for (String value : values)
+        {
+          valueArray[j++] = value;
+        }
+        java.util.Arrays.sort(valueArray);
+        packList(sb,valueArray,'+');
+      }
+
+      return sb.toString();
+    }
+    
+    public String getMapping(String source) {
+      return sourceTargets.get(source);
+    }
+    
+    public boolean keepAllMetadata() {
+      return keepAllMetadata;
+    }
+    
+    public Iterator<String> getParameterKeys()
+    {
+      return parameters.keySet().iterator();
+    }
+    
+    public String[] getParameterValues(String key)
+    {
+      Set<String> values = parameters.get(key);
+      if (values == null)
+        return null;
+      String[] rval = new String[values.size()];
+      int i = 0;
+      for (String value : values)
+      {
+        rval[i++] = value;
+      }
+      return rval;
+    }
   }
 
 }

diff --git a/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_en_US.properties b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_en_US.properties
index 2f713d1..29583fe 100644
--- a/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_en_US.properties
+++ b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_en_US.properties

@@ -13,13 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ForcedMetadata.ForcedMetadata=Forced metadata
-ForcedMetadata.ForcedMetadataNameMustNotBeNull=Forced metadata name must not be null
-ForcedMetadata.ForcedMetadataColon=Forced metadata:
+ForcedMetadata.ForcedMetadata=Add metadata
+ForcedMetadata.FieldMappingTabName=Move metadata
+
+ForcedMetadata.ForcedMetadataNameMustNotBeNull=Added metadata name must not be null
+ForcedMetadata.ForcedMetadataColon=Added metadata:
 ForcedMetadata.ParameterName=Parameter name
 ForcedMetadata.ParameterValue=Parameter value
 ForcedMetadata.Delete=Delete
-ForcedMetadata.Deleteforcedmetadatanumber=Delete forced metadata #
+ForcedMetadata.Deleteforcedmetadatanumber=Delete added metadata #
 ForcedMetadata.Add=Add
-ForcedMetadata.Addforcedmetadata=Add forced metadata item
-ForcedMetadata.NoForcedMetadataSpecified=No forced metadata specified
+ForcedMetadata.Addforcedmetadata=Add metadata item
+ForcedMetadata.NoForcedMetadataSpecified=No added metadata specified
+ForcedMetadata.FieldMappings=Metadata mappings:
+ForcedMetadata.MetadataFieldName=Incoming metadata name
+ForcedMetadata.FinalFieldName=Final metadata name
+ForcedMetadata.NoFieldMappingSpecified=No metadata mapping specified
+ForcedMetadata.KeepAllMetadata=Keep all incoming metadata
+ForcedMetadata.Add=Add
+ForcedMetadata.AddFieldMapping=Add metadata mapping
+ForcedMetadata.Delete=Delete
+ForcedMetadata.DeleteFieldMapping=Delete metadata mapping #
+ForcedMetadata.NoFieldNameSpecified=Please specify a metadata name

diff --git a/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_ja_JP.properties b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_ja_JP.properties
index 2f713d1..29583fe 100644
--- a/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_ja_JP.properties
+++ b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_ja_JP.properties

@@ -13,13 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-ForcedMetadata.ForcedMetadata=Forced metadata
-ForcedMetadata.ForcedMetadataNameMustNotBeNull=Forced metadata name must not be null
-ForcedMetadata.ForcedMetadataColon=Forced metadata:
+ForcedMetadata.ForcedMetadata=Add metadata
+ForcedMetadata.FieldMappingTabName=Move metadata
+
+ForcedMetadata.ForcedMetadataNameMustNotBeNull=Added metadata name must not be null
+ForcedMetadata.ForcedMetadataColon=Added metadata:
 ForcedMetadata.ParameterName=Parameter name
 ForcedMetadata.ParameterValue=Parameter value
 ForcedMetadata.Delete=Delete
-ForcedMetadata.Deleteforcedmetadatanumber=Delete forced metadata #
+ForcedMetadata.Deleteforcedmetadatanumber=Delete added metadata #
 ForcedMetadata.Add=Add
-ForcedMetadata.Addforcedmetadata=Add forced metadata item
-ForcedMetadata.NoForcedMetadataSpecified=No forced metadata specified
+ForcedMetadata.Addforcedmetadata=Add metadata item
+ForcedMetadata.NoForcedMetadataSpecified=No added metadata specified
+ForcedMetadata.FieldMappings=Metadata mappings:
+ForcedMetadata.MetadataFieldName=Incoming metadata name
+ForcedMetadata.FinalFieldName=Final metadata name
+ForcedMetadata.NoFieldMappingSpecified=No metadata mapping specified
+ForcedMetadata.KeepAllMetadata=Keep all incoming metadata
+ForcedMetadata.Add=Add
+ForcedMetadata.AddFieldMapping=Add metadata mapping
+ForcedMetadata.Delete=Delete
+ForcedMetadata.DeleteFieldMapping=Delete metadata mapping #
+ForcedMetadata.NoFieldNameSpecified=Please specify a metadata name

diff --git a/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_zh_CN.properties b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_zh_CN.properties
new file mode 100644
index 0000000..29583fe
--- /dev/null
+++ b/connectors/forcedmetadata/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/forcedmetadata/common_zh_CN.properties

@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ForcedMetadata.ForcedMetadata=Add metadata
+ForcedMetadata.FieldMappingTabName=Move metadata
+
+ForcedMetadata.ForcedMetadataNameMustNotBeNull=Added metadata name must not be null
+ForcedMetadata.ForcedMetadataColon=Added metadata:
+ForcedMetadata.ParameterName=Parameter name
+ForcedMetadata.ParameterValue=Parameter value
+ForcedMetadata.Delete=Delete
+ForcedMetadata.Deleteforcedmetadatanumber=Delete added metadata #
+ForcedMetadata.Add=Add
+ForcedMetadata.Addforcedmetadata=Add metadata item
+ForcedMetadata.NoForcedMetadataSpecified=No added metadata specified
+ForcedMetadata.FieldMappings=Metadata mappings:
+ForcedMetadata.MetadataFieldName=Incoming metadata name
+ForcedMetadata.FinalFieldName=Final metadata name
+ForcedMetadata.NoFieldMappingSpecified=No metadata mapping specified
+ForcedMetadata.KeepAllMetadata=Keep all incoming metadata
+ForcedMetadata.Add=Add
+ForcedMetadata.AddFieldMapping=Add metadata mapping
+ForcedMetadata.Delete=Delete
+ForcedMetadata.DeleteFieldMapping=Delete metadata mapping #
+ForcedMetadata.NoFieldNameSpecified=Please specify a metadata name

diff --git a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification.js b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification.js
index 6145846..2558dde 100644
--- a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification.js
+++ b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification.js

@@ -18,33 +18,58 @@
 <script type="text/javascript">
 <!--
 
-function s${SeqNum}_AddForcedMetadata()
+function s${SEQNUM}_AddForcedMetadata()
 {
-  if (editjob.s${SeqNum}_forcedmetadata_name.value == "")
+  if (editjob.s${SEQNUM}_forcedmetadata_name.value == "")
   {
     alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('ForcedMetadata.ForcedMetadataNameMustNotBeNull'))");
-    editjob.s${SeqNum}_forcedmetadata_name.focus();
+    editjob.s${SEQNUM}_forcedmetadata_name.focus();
     return;
   }
-  document.editjob.s${SeqNum}_forcedmetadata_op.value="Add";
-  postFormSetAnchor("s${SeqNum}_forcedmetadata_tag");
+  document.editjob.s${SEQNUM}_forcedmetadata_op.value="Add";
+  postFormSetAnchor("s${SEQNUM}_forcedmetadata_tag");
 }
 	
-function s${SeqNum}_DeleteForcedMetadata(n)
+function s${SEQNUM}_DeleteForcedMetadata(n)
 {
-  eval("document.editjob.s${SeqNum}_forcedmetadata_"+n+"_op.value = 'Delete'");
+  eval("document.editjob.s${SEQNUM}_forcedmetadata_"+n+"_op.value = 'Delete'");
   if (n == 0)
-    postFormSetAnchor("s${SeqNum}_forcedmetadata_tag");
+    postFormSetAnchor("s${SEQNUM}_forcedmetadata_tag");
   else
-    postFormSetAnchor("s${SeqNum}_forcedmetadata_"+(n-1)+"_tag");
+    postFormSetAnchor("s${SEQNUM}_forcedmetadata_"+(n-1)+"_tag");
 }
 
-function s${SeqNum}_checkSpecificationForSave()
+function s${SEQNUM}_addFieldMapping()
+{
+  if (editjob.s${SEQNUM}_fieldmapping_source.value == "")
+  {
+    alert("$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.NoFieldNameSpecified'))");
+    editjob.s${SEQNUM}_fieldmapping_source.focus();
+    return;
+  }
+  editjob.s${SEQNUM}_fieldmapping_op.value="Add";
+  postFormSetAnchor("s${SEQNUM}_fieldmapping");
+}
+
+function s${SEQNUM}_deleteFieldMapping(i)
+{
+  // Set the operation
+  eval("editjob.s${SEQNUM}_fieldmapping_op_"+i+".value=\"Delete\"");
+  // Submit
+  if (editjob.s${SEQNUM}_fieldmapping_count.value==i)
+    postFormSetAnchor("s${SEQNUM}_fieldmapping");
+  else
+    postFormSetAnchor("s${SEQNUM}_fieldmapping_"+i)
+  // Undo, so we won't get two deletes next time
+  eval("editjob.s${SEQNUM}_fieldmapping_op_"+i+".value=\"Continue\"");
+}
+
+function s${SEQNUM}_checkSpecificationForSave()
 {
   return true;
 }
 
-function s${SeqNum}_checkSpecification()
+function s${SEQNUM}_checkSpecification()
 {
   return true;
 }

diff --git a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_FieldMapping.html b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_FieldMapping.html
new file mode 100644
index 0000000..56f4b67
--- /dev/null
+++ b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_FieldMapping.html

@@ -0,0 +1,107 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('ForcedMetadata.FieldMappingTabName') && ${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.FieldMappings'))</nobr></td>
+    <td class="boxcell">
+      <table class="formtable">
+        <tr class="formheaderrow">
+          <td class="formcolumnheader"></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.MetadataFieldName'))</nobr></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.FinalFieldName'))</nobr></td>
+        </tr>
+
+  #set($fieldcounter = 0)
+  #foreach($fieldmapping in $FIELDMAPPINGS)
+    #set($fieldcounterdisplay = $fieldcounter + 1)
+    #if(($fieldcounter % 2) == 0)
+        <tr class="evenformrow">
+    #else
+        <tr class="oddformrow">
+    #end
+          <td class="formcolumncell">
+            <a name="s${SEQNUM}_fieldmapping_$fieldcounter">
+              <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Delete'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.DeleteFieldMapping'))$fieldcounterdisplay" onclick='javascript:s${SEQNUM}_deleteFieldMapping("$fieldcounter");'/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_op_$fieldcounter" value="Continue"/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+            </a>
+          </td>
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
+          </td>
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
+          </td>
+        </tr>
+    #set($fieldcounter = $fieldcounter + 1)
+  #end
+  
+  #if($fieldcounter == 0)
+        <tr class="formrow"><td class="formmessage" colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.NoFieldMappingSpecified'))</td></tr>
+  #end
+      
+        <tr class="formrow"><td class="formseparator" colspan="3"><hr/></td></tr>
+        <tr class="formrow">
+          <td class="formcolumncell">
+            <a name="fieldmapping">
+              <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.AddFieldMapping'))" onclick="javascript:s${SEQNUM}_addFieldMapping();"/>
+            </a>
+            <input type="hidden" name="s${SEQNUM}_fieldmapping_count" value="$fieldcounter"/>
+            <input type="hidden" name="s${SEQNUM}_fieldmapping_op" value="Continue"/>
+          </td>
+          <td class="formcolumncell">
+            <nobr><input type="text" size="15" name="s${SEQNUM}_fieldmapping_source" value=""/></nobr>
+          </td>
+          <td class="formcolumncell">
+            <nobr><input type="text" size="15" name="s${SEQNUM}_fieldmapping_target" value=""/></nobr>
+          </td>
+        </tr>
+      </table>
+    </td>
+  </tr>
+  
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.KeepAllMetadata'))</nobr></td>
+    <td class="value">
+  #if($KEEPALLMETADATA == 'true')
+       <input type="checkbox" checked="true" name="s${SEQNUM}_keepallmetadata" value="true"/>
+  #else
+       <input type="checkbox" name="s${SEQNUM}_keepallmetadata" value="true"/>
+  #end
+    </td>
+  </tr>
+</table>
+      
+#else
+
+  #set($fieldcounter = 0)
+  #foreach($fieldmapping in $FIELDMAPPINGS)
+<input type="hidden" name="s${SEQNUM}_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+<input type="hidden" name="s${SEQNUM}_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+    #set($fieldcounter = $fieldcounter + 1)
+  #end
+<input type="hidden" name="s${SEQNUM}_fieldmapping_count" value="$fieldcounter"/>
+<input type="hidden" name="s${SEQNUM}_keepallmetadata" value="$Encoder.bodyEscape($KEEPALLMETADATA)"/>
+
+#end
\ No newline at end of file

diff --git a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_ForcedMetadata.html b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_ForcedMetadata.html
index ce1f602..abe4079 100644
--- a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_ForcedMetadata.html
+++ b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/editSpecification_ForcedMetadata.html

@@ -15,7 +15,7 @@
  limitations under the License.
 -->
 
-#if($TabName == $ResourceBundle.getString('ForcedMetadata.ForcedMetadata') && $SeqNum == $SelectedNum)
+#if($TABNAME == $ResourceBundle.getString('ForcedMetadata.ForcedMetadata') && $SEQNUM == $SELECTEDNUM)
 
 <table class="displaytable">
   <tr>
@@ -31,18 +31,18 @@
           <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.ParameterValue'))</nobr></td>
         </tr>
   #set($paramcounter = 0)
-  #foreach($paramrecord in $Parameters)
+  #foreach($paramrecord in $PARAMETERS)
     #if(($paramcounter % 2) == 0)
         <tr class="evenformrow">
     #else
         <tr class="oddformrow">
     #end
           <td class="formcolumncell">
-            <a name="s${SeqNum}_forcedmetadata_${paramcounter}_tag"/>
-            <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Delete'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Deleteforcedmetadatanumber'))${paramcounter}" onclick='javascript:s${SeqNum}_DeleteForcedMetadata(${paramcounter});'/>
-            <input type="hidden" name="s${SeqNum}_forcedmetadata_${paramcounter}_op" value="Continue"/>
-            <input type="hidden" name="s${SeqNum}_forcedmetadata_${paramcounter}_name" value="$Encoder.attributeEscape($paramrecord.get('parameter'))"/>
-            <input type="hidden" name="s${SeqNum}_forcedmetadata_${paramcounter}_value" value="$Encoder.attributeEscape($paramrecord.get('value'))"/>
+            <a name="s${SEQNUM}_forcedmetadata_${paramcounter}_tag"/>
+            <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Delete'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Deleteforcedmetadatanumber'))${paramcounter}" onclick='javascript:s${SEQNUM}_DeleteForcedMetadata(${paramcounter});'/>
+            <input type="hidden" name="s${SEQNUM}_forcedmetadata_${paramcounter}_op" value="Continue"/>
+            <input type="hidden" name="s${SEQNUM}_forcedmetadata_${paramcounter}_name" value="$Encoder.attributeEscape($paramrecord.get('parameter'))"/>
+            <input type="hidden" name="s${SEQNUM}_forcedmetadata_${paramcounter}_value" value="$Encoder.attributeEscape($paramrecord.get('value'))"/>
           </td>
           <td class="formcolumncell">
             <nobr>$Encoder.bodyEscape($paramrecord.get('parameter'))</nobr>
@@ -59,16 +59,16 @@
         <tr class="formrow"><td colspan="3" class="formseparator"><hr/></td></tr>
         <tr class="formrow">
           <td class="formcolumncell">
-            <a name="s${SeqNum}_forcedmetadata_tag"/>
-            <input type="hidden" name="s${SeqNum}_forcedmetadata_op" value="Continue"/>
-            <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Addforcedmetadata'))" onclick="javascript:s${SeqNum}_AddForcedMetadata();"/>
-            <input type="hidden" name="s${SeqNum}_forcedmetadata_count" value="${paramcounter}"/>
+            <a name="s${SEQNUM}_forcedmetadata_tag"/>
+            <input type="hidden" name="s${SEQNUM}_forcedmetadata_op" value="Continue"/>
+            <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('ForcedMetadata.Addforcedmetadata'))" onclick="javascript:s${SEQNUM}_AddForcedMetadata();"/>
+            <input type="hidden" name="s${SEQNUM}_forcedmetadata_count" value="${paramcounter}"/>
           </td>
           <td class="formcolumncell">
-            <input type="text" name="s${SeqNum}_forcedmetadata_name" size="30" value=""/>
+            <input type="text" name="s${SEQNUM}_forcedmetadata_name" size="30" value=""/>
           </td>
           <td class="formcolumncell">
-            <input type="text" name="s${SeqNum}_forcedmetadata_value" size="30" value=""/>
+            <input type="text" name="s${SEQNUM}_forcedmetadata_value" size="30" value=""/>
           </td>
         </tr>
       </table>
@@ -79,11 +79,11 @@
 #else
 
   #set($paramcounter = 0)
-  #foreach($paramrecord in $Parameters)
-<input type="hidden" name="s${SeqNum}_forcedmetadata_${paramcounter}_name" value="$Encoder.attributeEscape($paramrecord.get('parameter'))" />
-<input type="hidden" name="s${SeqNum}_forcedmetadata_${paramcounter}_value" value="$Encoder.attributeEscape($paramrecord.get('value'))" />
+  #foreach($paramrecord in $PARAMETERS)
+<input type="hidden" name="s${SEQNUM}_forcedmetadata_${paramcounter}_name" value="$Encoder.attributeEscape($paramrecord.get('parameter'))" />
+<input type="hidden" name="s${SEQNUM}_forcedmetadata_${paramcounter}_value" value="$Encoder.attributeEscape($paramrecord.get('value'))" />
     #set($paramcounter = $paramcounter + 1)
   #end
-<input type="hidden" name="s${SeqNum}_forcedmetadata_count" value="${paramcounter}"/>
+<input type="hidden" name="s${SEQNUM}_forcedmetadata_count" value="${paramcounter}"/>
 
 #end

diff --git a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/viewSpecification.html b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/viewSpecification.html
index 29706ad..bfa1af6 100644
--- a/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/viewSpecification.html
+++ b/connectors/forcedmetadata/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/forcedmetadata/viewSpecification.html

@@ -17,6 +17,42 @@
 
 <table class="displaytable">
   <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.FieldMappings'))</nobr></td>
+    <td class="boxcell">
+      <table class="formtable">
+        <tr class="formheaderrow">
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.MetadataFieldName'))</nobr></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.FinalFieldName'))</nobr></td>
+        </tr>
+#set($fieldcounter = 0)
+#foreach($fieldmapping in $FIELDMAPPINGS)
+  #if(($fieldcounter % 2) == 0)
+        <tr class="evenformrow">
+  #else
+        <tr class="oddformrow">
+  #end
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
+          </td>
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
+          </td>
+        </tr>
+  #set($fieldcounter = $fieldcounter + 1)
+#end
+#if($fieldcounter == 0)
+        <tr class="formrow"><td class="formmessage" colspan="2">$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.NoFieldMappingSpecified'))</td></tr>
+#end
+      </table>
+    </td>
+  </tr>
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.KeepAllMetadata'))</nobr></td>
+    <td class="value"><nobr>$Encoder.bodyEscape($KEEPALLMETADATA)</nobr></td>
+  </tr>
+
+  <tr>
     <td class="separator" colspan="4"><hr/></td>
   </tr>
   <tr>
@@ -28,7 +64,7 @@
           <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('ForcedMetadata.ParameterValue'))</nobr></td>
         </tr>
   #set($paramcounter = 0)
-  #foreach($paramrecord in $Parameters)
+  #foreach($paramrecord in $PARAMETERS)
     #if(($paramcounter % 2) == 0)
         <tr class="evenformrow">
     #else

diff --git a/connectors/generic/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/generic/GenericAuthority.java b/connectors/generic/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/generic/GenericAuthority.java
index 5599b03..141becb 100644
--- a/connectors/generic/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/generic/GenericAuthority.java
+++ b/connectors/generic/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/generic/GenericAuthority.java

@@ -597,7 +597,7 @@
 
     @Override
     public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
-      request.addHeader(BasicScheme.authenticate(credentials, "US-ASCII", false));
+      request.addHeader(new BasicScheme(StandardCharsets.US_ASCII).authenticate(credentials, request, context));
     }
   }
 

diff --git a/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java b/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java
index 2d3d733..79b8fb5 100644
--- a/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java
+++ b/connectors/generic/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/generic/GenericConnector.java

@@ -1044,13 +1044,13 @@
     public PreemptiveAuth(Credentials creds) {

       this.credentials = creds;

     }

-

-    @Override

-    public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {

-      request.addHeader(BasicScheme.authenticate(credentials, "US-ASCII", false));

-    }

-  }

-

+
+    @Override
+    public void process(final HttpRequest request, final HttpContext context) throws HttpException, IOException {
+      request.addHeader(new BasicScheme(StandardCharsets.US_ASCII).authenticate(credentials, request, context));
+    }
+  }
+
   protected static class CheckThread extends Thread {

 

     protected HttpClient client;


diff --git a/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java b/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
index 390bdc8..096898f 100644
--- a/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java
+++ b/connectors/gts/connector/src/main/java/org/apache/manifoldcf/agents/output/gts/GTSConnector.java

@@ -238,7 +238,7 @@
   * the document will not need to be sent again to the output data store.
   */
   @Override
-  public String getOutputDescription(OutputSpecification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
     // The information we want in this string is:
@@ -282,7 +282,7 @@
     // From here on down, unpacking is unnecessary.
     sb.append(ingestURI);
 
-    return sb.toString();
+    return new VersionContext(sb.toString(),params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -557,17 +557,41 @@
     );
   }
   
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
   /** Output the specification header section.
-  * This method is called in the head section of a job page which has selected an output connection of the current type.  Its purpose is to add the required tabs
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
   * to the list, and to output any javascript methods that might be needed by the job editing HTML.
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
   */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os, List<String> tabsArray)
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
     throws ManifoldCFException, IOException
   {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
     tabsArray.add(Messages.getString(locale,"GTSConnector.GTSCollections"));
     tabsArray.add(Messages.getString(locale,"GTSConnector.GTSTemplate"));
     out.print(
@@ -575,12 +599,12 @@
 "<script type=\"text/javascript\">\n"+
 "<!--\n"+
 "\n"+
-"function checkOutputSpecification()\n"+
+"function "+seqPrefix+"checkSpecification()\n"+
 "{\n"+
-"  if (editjob.gts_collectionname.value.length > 230)\n"+
+"  if (editjob."+seqPrefix+"gts_collectionname.value.length > 230)\n"+
 "  {\n"+
 "    alert(\"" + Messages.getBodyJavascriptString(locale,"GTSConnector.CollectionNameMustBeLessThanOrEqualToCharacters") + "\");\n"+
-"    editjob.gts_collectionname.focus();\n"+
+"    editjob."+seqPrefix+"gts_collectionname.focus();\n"+
 "    return false;\n"+
 "  }\n"+
 "  return true;\n"+
@@ -592,17 +616,23 @@
   }
   
   /** Output the specification body section.
-  * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
   * form is "editjob".
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
   *@param tabName is the current tab name.
   */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
     throws ManifoldCFException, IOException
   {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+
     int i = 0;
     String collectionName = null;
     String documentTemplate = null;
@@ -624,7 +654,7 @@
       documentTemplate = "";
 
     // Collections tab
-    if (tabName.equals(Messages.getString(locale,"GTSConnector.GTSCollections")))
+    if (tabName.equals(Messages.getString(locale,"GTSConnector.GTSCollections")) && connectionSequenceNumber == actualSequenceNumber)
     {
       out.print(
 "<table class=\"displaytable\">\n"+
@@ -632,7 +662,7 @@
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"GTSConnector.CollectionName") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"+
-"      <input name=\"gts_collectionname\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(collectionName)+"\"/>\n"+
+"      <input name=\""+seqPrefix+"gts_collectionname\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(collectionName)+"\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
@@ -642,12 +672,12 @@
     {
       // Hiddens for collections
       out.print(
-"<input type=\"hidden\" name=\"gts_collectionname\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(collectionName)+"\"/>\n"
+"<input type=\"hidden\" name=\""+seqPrefix+"gts_collectionname\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(collectionName)+"\"/>\n"
       );
     }
 
     // Template tab
-    if (tabName.equals(Messages.getString(locale,"GTSConnector.GTSTemplate")))
+    if (tabName.equals(Messages.getString(locale,"GTSConnector.GTSTemplate")) && connectionSequenceNumber == actualSequenceNumber)
     {
       out.print(
 "<table class=\"displaytable\">\n"+
@@ -655,7 +685,7 @@
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"GTSConnector.DocumentTemplate") + "</nobr></td>\n"+
 "    <td class=\"value\">\n"+
-"      <textarea rows=\"10\" cols=\"96\" name=\"gts_documenttemplate\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(documentTemplate)+"</textarea>\n"+
+"      <textarea rows=\"10\" cols=\"96\" name=\""+seqPrefix+"gts_documenttemplate\">"+org.apache.manifoldcf.ui.util.Encoder.bodyEscape(documentTemplate)+"</textarea>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
@@ -665,25 +695,30 @@
     {
       // Hiddens for document template
       out.print(
-"<input type=\"hidden\" name=\"gts_documenttemplate\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(documentTemplate)+"\"/>\n"
+"<input type=\"hidden\" name=\""+seqPrefix+"gts_documenttemplate\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(documentTemplate)+"\"/>\n"
       );
     }
   }
   
   /** Process a specification post.
   * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-  * posted.  Its purpose is to gather form information and modify the output specification accordingly.
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
   * The name of the posted form is "editjob".
   *@param variableContext contains the post data, including binary file-upload information.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
   */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os)
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
     throws ManifoldCFException
   {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+
     // Collection name
-    String collectionName = variableContext.getParameter("gts_collectionname");
+    String collectionName = variableContext.getParameter(seqPrefix+"gts_collectionname");
     if (collectionName != null)
     {
       int i = 0;
@@ -704,7 +739,7 @@
     }
 
     // Document template
-    String documentTemplate = variableContext.getParameter("gts_documenttemplate");
+    String documentTemplate = variableContext.getParameter(seqPrefix+"gts_documenttemplate");
     if (documentTemplate != null)
     {
       int i = 0;
@@ -725,13 +760,16 @@
   }
   
   /** View specification.
-  * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
   */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
     throws ManifoldCFException, IOException
   {
     int i = 0;

diff --git a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java
index 9d8e559..627cdfc 100644
--- a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java
+++ b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputConnector.java

@@ -30,17 +30,18 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.HashMap;
 
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.system.Logging;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -48,6 +49,7 @@
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
 import org.json.JSONException;
 
 public class HDFSOutputConnector extends BaseOutputConnector {
@@ -248,9 +250,9 @@
    * the document will not need to be sent again to the output data store.
    */
   @Override
-  public String getOutputDescription(OutputSpecification spec) throws ManifoldCFException, ServiceInterruption {
+  public VersionContext getPipelineDescription(Specification spec) throws ManifoldCFException, ServiceInterruption {
     HDFSOutputSpecs specs = new HDFSOutputSpecs(getSpecNode(spec));
-    return specs.toJson().toString();
+    return new VersionContext(specs.toJson().toString(),params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -343,7 +345,7 @@
   public void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray) throws ManifoldCFException, IOException {
     super.outputConfigurationHeader(threadContext, out, locale, parameters, tabsArray);
     tabsArray.add(Messages.getString(locale,"HDFSOutputConnector.ServerTabName"));
-    outputResource(EDIT_CONFIGURATION_JS, out, locale, null, null);
+    outputResource(EDIT_CONFIGURATION_JS, out, locale, null, null, null, null);
   }
 
   /** Output the configuration body section.
@@ -359,7 +361,7 @@
   public void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) throws ManifoldCFException, IOException {
     super.outputConfigurationBody(threadContext, out, locale, parameters, tabName);
     HDFSOutputConfig config = this.getConfigParameters(parameters);
-    outputResource(EDIT_CONFIGURATION_HTML, out, locale, config, tabName);
+    outputResource(EDIT_CONFIGURATION_HTML, out, locale, config, tabName, null, null);
   }
 
   /** Process a configuration post.
@@ -386,54 +388,87 @@
    */
   @Override
   public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
-    outputResource(VIEW_CONFIGURATION_HTML, out, locale, getConfigParameters(parameters), null);
+    outputResource(VIEW_CONFIGURATION_HTML, out, locale, getConfigParameters(parameters), null, null, null);
+  }
+
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
   }
 
   /** Output the specification header section.
-   * This method is called in the head section of a job page which has selected an output connection of the current type.  Its purpose is to add the required tabs
-   * to the list, and to output any javascript methods that might be needed by the job editing HTML.
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
-   */
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os, List<String> tabsArray) throws ManifoldCFException, IOException {
-    super.outputSpecificationHeader(out, locale, os, tabsArray);
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException {
+    super.outputSpecificationHeader(out, locale, os, connectionSequenceNumber, tabsArray);
     tabsArray.add(Messages.getString(locale, "HDFSOutputConnector.PathTabName"));
-    outputResource(EDIT_SPECIFICATION_JS, out, locale, null, null);
+    outputResource(EDIT_SPECIFICATION_JS, out, locale, null, null, new Integer(connectionSequenceNumber), null);
   }
 
   /** Output the specification body section.
-   * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
-   * form is "editjob".
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   *@param tabName is the current tab name.
-   */
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName) throws ManifoldCFException, IOException {
-    super.outputSpecificationBody(out, locale, os, tabName);
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException {
+    super.outputSpecificationBody(out, locale, os, connectionSequenceNumber, actualSequenceNumber, tabName);
     HDFSOutputSpecs specs = getSpecParameters(os);
-    outputResource(EDIT_SPECIFICATION_HTML, out, locale, specs, tabName);
+    outputResource(EDIT_SPECIFICATION_HTML, out, locale, specs, tabName, new Integer(connectionSequenceNumber), new Integer(actualSequenceNumber));
   }
 
   /** Process a specification post.
-   * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-   * posted.  Its purpose is to gather form information and modify the output specification accordingly.
-   * The name of the posted form is "editjob".
-   *@param variableContext contains the post data, including binary file-upload information.
-   *@param os is the current output specification for this job.
-   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
-   */
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os) throws ManifoldCFException {
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
     ConfigurationNode specNode = getSpecNode(os);
     boolean bAdd = (specNode == null);
     if (bAdd) {
       specNode = new SpecificationNode(ParameterEnum.rootpath.name());
     }
-    HDFSOutputSpecs.contextToSpecNode(variableContext, specNode);
+    HDFSOutputSpecs.contextToSpecNode(variableContext, specNode, connectionSequenceNumber);
     if (bAdd) {
       os.addChild(os.getChildCount(), specNode);
     }
@@ -442,21 +477,25 @@
   }
 
   /** View specification.
-   * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
-   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
-   *@param out is the output to which any HTML should be sent.
-   *@param os is the current output specification for this job.
-   */
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os) throws ManifoldCFException, IOException {
-    outputResource(VIEW_SPECIFICATION_HTML, out, locale, getSpecParameters(os), null);
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException {
+    outputResource(VIEW_SPECIFICATION_HTML, out, locale, getSpecParameters(os), null, new Integer(connectionSequenceNumber), null);
   }
 
   /**
    * @param os
    * @return
    */
-  final private SpecificationNode getSpecNode(OutputSpecification os)
+  final private SpecificationNode getSpecNode(Specification os)
   {
     int l = os.getChildCount();
     for (int i = 0; i < l; i++) {
@@ -473,7 +512,7 @@
    * @return
    * @throws ManifoldCFException
    */
-  final private HDFSOutputSpecs getSpecParameters(OutputSpecification os) throws ManifoldCFException {
+  final private HDFSOutputSpecs getSpecParameters(Specification os) throws ManifoldCFException {
     return new HDFSOutputSpecs(getSpecNode(os));
   }
 
@@ -493,14 +532,22 @@
    * @param resName
    * @param out
    * @throws ManifoldCFException */
-  private static void outputResource(String resName, IHTTPOutput out, Locale locale, HDFSOutputParam params, String tabName) throws ManifoldCFException {
+  private static void outputResource(String resName, IHTTPOutput out, Locale locale, HDFSOutputParam params, String tabName, Integer sequenceNumber, Integer actualSequenceNumber) throws ManifoldCFException {
     Map<String,String> paramMap = null;
     if (params != null) {
       paramMap = params.buildMap();
       if (tabName != null) {
         paramMap.put("TabName", tabName);
       }
+      if (actualSequenceNumber != null)
+        paramMap.put("SelectedNum",actualSequenceNumber.toString());
     }
+    else
+    {
+      paramMap = new HashMap<String,String>();
+    }
+    if (sequenceNumber != null)
+      paramMap.put("SeqNum",sequenceNumber.toString());
     Messages.outputResourceWithVelocity(out, locale, resName, paramMap, true);
   }
 

diff --git a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputSpecs.java b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputSpecs.java
index a6b470d..a8e422c 100644
--- a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputSpecs.java
+++ b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/agents/output/hdfs/HDFSOutputSpecs.java

@@ -100,9 +100,9 @@
    * @param variableContext
    * @param specNode
    */
-  public static void contextToSpecNode(IPostParameters variableContext, ConfigurationNode specNode) {
+  public static void contextToSpecNode(IPostParameters variableContext, ConfigurationNode specNode, int sequenceNumber) {
     for (ParameterEnum param : SPECIFICATIONLIST) {
-      String p = variableContext.getParameter(param.name().toLowerCase());
+      String p = variableContext.getParameter("s"+sequenceNumber+"_"+param.name().toLowerCase());
       if (p != null) {
         specNode.setAttribute(param.name(), p);
       }

diff --git a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
index 4333d9c..c6b9e24 100644
--- a/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java
+++ b/connectors/hdfs/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/hdfs/HDFSRepositoryConnector.java

@@ -350,6 +350,10 @@
       FileStatus fileStatus = getObject(new Path(documentIdentifier));
       if (fileStatus != null) {
         if (fileStatus.isDirectory()) {
+          // If HDFS directory modify dates are transitive, as they are on Unix,
+          // then getting the modify date of the current version is sufficient
+          // to detect any downstream changes we need to be aware of.
+          // (If this turns out to be a bad assumption, this should simply set rval[i] ="").
           long lastModified = fileStatus.getModificationTime();
           rval[i] = new Long(lastModified).toString();
         } else {
@@ -409,26 +413,31 @@
         
       if (fileStatus == null) {
         // It is no longer there , so delete right away
-        activities.deleteDocument(documentIdentifier,version);
+        activities.deleteDocument(documentIdentifier);
         continue;
       }
         
       if (fileStatus.isDirectory()) {
-        /*
-          * Queue up stuff for directory
-          */
-        String entityReference = documentIdentifier;
-        FileStatus[] fileStatuses = getChildren(fileStatus.getPath());
-        if (fileStatuses == null) {
-          // Directory was deleted, so remove
-          activities.deleteDocument(documentIdentifier,version);
-          continue;
-        }
-        for (int j = 0; j < fileStatuses.length; j++) {
-          FileStatus fs = fileStatuses[j++];
-          String canonicalPath = fs.getPath().toString();
-          if (checkInclude(session.getUri().toString(),fs,canonicalPath,spec)) {
-            activities.addDocumentReference(canonicalPath,documentIdentifier,RELATIONSHIP_CHILD);
+        // Since we believe that downstream changes affect the current node's version string,
+        // then we only have to add references when there are detected changes.
+        if (!scanOnly[i]) {
+          activities.noDocument(documentIdentifier,version);
+          /*
+            * Queue up stuff for directory
+            */
+          String entityReference = documentIdentifier;
+          FileStatus[] fileStatuses = getChildren(fileStatus.getPath());
+          if (fileStatuses == null) {
+            // Directory was deleted, so remove
+            activities.deleteDocument(documentIdentifier);
+            continue;
+          }
+          for (int j = 0; j < fileStatuses.length; j++) {
+            FileStatus fs = fileStatuses[j++];
+            String canonicalPath = fs.getPath().toString();
+            if (checkInclude(session.getUri().toString(),fs,canonicalPath,spec)) {
+              activities.addDocumentReference(canonicalPath,documentIdentifier,RELATIONSHIP_CHILD);
+            }
           }
         }
       } else {

diff --git a/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.html b/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.html
index ab9b965..2aaad58 100644
--- a/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.html
+++ b/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.html

@@ -15,18 +15,18 @@
  limitations under the License.
 -->
 
-#if($TABNAME == $ResourceBundle.getString('HDFSOutputConnector.PathTabName'))
+#if($TABNAME == $ResourceBundle.getString('HDFSOutputConnector.PathTabName') && ${SEQNUM} == ${SELECTEDNUM})
 
 <table class="displaytable">
   <tr><td class="separator" colspan="2"><hr/></td></tr>
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('HDFSOutputConnector.RootPath'))</nobr></td>
-    <td class="value"><input type="text" name="rootpath" size="64" value="$Encoder.attributeEscape($ROOTPATH)" /></td>
+    <td class="value"><input type="text" name="s${SEQNUM}_rootpath" size="64" value="$Encoder.attributeEscape($ROOTPATH)" /></td>
   </tr>
 </table>
 
 #else
 
-<input type="hidden" name="rootpath" value="$Encoder.attributeEscape($ROOTPATH)" />
+<input type="hidden" name="s${SEQNUM}_rootpath" value="$Encoder.attributeEscape($ROOTPATH)" />
 
 #end

diff --git a/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.js b/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.js
index b5d31ea..b56c6ab 100644
--- a/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.js
+++ b/connectors/hdfs/connector/src/main/resources/org/apache/manifoldcf/agents/output/hdfs/editSpecification.js

@@ -17,13 +17,13 @@
 
 <script type="text/javascript">
 <!--
-function checkOutputSpecificationForSave()
+function s${SEQNUM}_checkSpecificationForSave()
 {
-  if (editjob.rootpath.value == "")
+  if (editjob.s${SEQNUM}_rootpath.value == "")
   {
     alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('HDFSOutputConnector.RootPathCannotBeNull'))");
-    SelectTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('HDFSOutputConnector.PathTabName'))");
-    editjob.rootpath.focus();
+    SelectSequencedTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('HDFSOutputConnector.PathTabName'))",${SEQNUM});
+    editjob.s${SEQNUM}_rootpath.focus();
     return false;
   }
   return true;

diff --git a/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java b/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
index 186e5f4..a3e4e35 100644
--- a/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java
+++ b/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jdbc/JDBCConnector.java

@@ -634,7 +634,7 @@
                         try
                         {
                           rd.setBinary(is,ci.getUtf8StreamLength());
-                          activities.ingestDocument(id, version, url, rd);
+                          activities.ingestDocumentWithException(id, version, url, rd);
                         }
                         finally
                         {
@@ -676,7 +676,7 @@
                         try
                         {
                           rd.setBinary(is,bytes.length);
-                          activities.ingestDocument(id, version, url, rd);
+                          activities.ingestDocumentWithException(id, version, url, rd);
                         }
                         finally
                         {
@@ -722,7 +722,7 @@
           if (map.get(documentIdentifier) != null)
           {
             // This means we did not see it (or data for it) in the result set.  Delete it!
-            activities.deleteDocument(documentIdentifier,versions[i]);
+            activities.deleteDocument(documentIdentifier);
           }
         }
         i++;

diff --git a/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/jdbc/JDBCConnection.java b/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/jdbc/JDBCConnection.java
index 4c4d530..fb3547f 100644
--- a/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/jdbc/JDBCConnection.java
+++ b/connectors/jdbc/connector/src/main/java/org/apache/manifoldcf/jdbc/JDBCConnection.java

@@ -512,7 +512,16 @@
           {
             InputStream bis = rs.getBinaryStream(colnum);
             if (bis != null)
-              value = new TempFileInput(bis);
+            {
+              try
+              {
+                value = new TempFileInput(bis);
+              }
+              catch (IOException e)
+              {
+                handleIOException(e,"reading binary data");
+              }
+            }
           }
           else if (isBLOB(rsmd,colnum))
           {
@@ -522,13 +531,31 @@
             // Cleanup should happen by the user of the resultset.
             // System.out.println(" Blob length = "+Long.toString(blob.length()));
             if (blob != null)
-              value = new TempFileInput(blob.getBinaryStream(),blob.length());
+            {
+              try
+              {
+                value = new TempFileInput(blob.getBinaryStream(),blob.length());
+              }
+              catch (IOException e)
+              {
+                handleIOException(e,"reading blob");
+              }
+            }
           }
           else if (isCLOB(rsmd,colnum))
           {
             Clob clob = getCLOB(rs,colnum);
             if (clob != null)
-              value = new TempFileCharacterInput(clob.getCharacterStream(),clob.length());
+            {
+              try
+              {
+                value = new TempFileCharacterInput(clob.getCharacterStream(),clob.length());
+              }
+              catch (IOException e)
+              {
+                handleIOException(e,"reading clob");
+              }
+            }
           }
           else
           {
@@ -548,6 +575,14 @@
     }
   }
 
+  protected static void handleIOException(IOException e, String context)
+    throws ManifoldCFException
+  {
+    if (e instanceof InterruptedIOException)
+      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    throw new ManifoldCFException("IO exception while "+context+": "+e.getMessage(),e);
+  }
+  
   protected static String[] readColumnNames(ResultSetMetaData rsmd, boolean useName)
     throws ManifoldCFException, ServiceInterruption
   {

diff --git a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraAuthorityConnector.java b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraAuthorityConnector.java
index 26b62d4..c7747ba 100644
--- a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraAuthorityConnector.java
+++ b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraAuthorityConnector.java

@@ -214,9 +214,44 @@
         Logging.authorityConnectors.debug("JIRA: Clientsecret = '" + clientsecret + "'");
       }
 
-      String jiraurl = jiraprotocol + "://" + jirahost + (StringUtils.isEmpty(jiraport)?"":":"+jiraport) + jirapath;
-      session = new JiraSession(clientid, clientsecret, jiraurl,
-        jiraproxyhost, jiraproxyport, jiraproxydomain, jiraproxyusername, jiraproxypassword);
+      int portInt;
+      if (jiraport != null && jiraport.length() > 0)
+      {
+        try
+        {
+          portInt = Integer.parseInt(jiraport);
+        }
+        catch (NumberFormatException e)
+        {
+          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
+        }
+      }
+      else
+      {
+        if (jiraprotocol.toLowerCase(Locale.ROOT).equals("http"))
+          portInt = 80;
+        else
+          portInt = 443;
+      }
+
+      int proxyPortInt;
+      if (jiraproxyport != null && jiraproxyport.length() > 0)
+      {
+        try
+        {
+          proxyPortInt = Integer.parseInt(jiraproxyport);
+        }
+        catch (NumberFormatException e)
+        {
+          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
+        }
+      }
+      else
+        proxyPortInt = 8080;
+
+      session = new JiraSession(clientid, clientsecret,
+        jiraprotocol, jirahost, portInt, jirapath,
+        jiraproxyhost, proxyPortInt, jiraproxydomain, jiraproxyusername, jiraproxypassword);
 
     }
     lastSessionFetch = System.currentTimeMillis();

diff --git a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraSession.java b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraSession.java
index 27edd4b..1c99dff 100644
--- a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraSession.java
+++ b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/authorities/authorities/jira/JiraSession.java

@@ -64,6 +64,10 @@
 import org.apache.http.protocol.HttpContext;
 import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
 import org.apache.http.entity.ContentType;
+import org.apache.http.client.AuthCache;
+import org.apache.http.impl.client.BasicAuthCache;
+import org.apache.http.impl.auth.BasicScheme;
+import org.apache.http.client.protocol.HttpClientContext;
 
 import org.apache.http.ParseException;
 
@@ -77,7 +81,8 @@
  */
 public class JiraSession {
 
-  private final String URLbase;
+  private final HttpHost host;
+  private final String path;
   private final String clientId;
   private final String clientSecret;
   
@@ -104,10 +109,12 @@
   /**
    * Constructor. Create a session.
    */
-  public JiraSession(String clientId, String clientSecret, String URLbase,
-    String proxyHost, String proxyPort, String proxyDomain, String proxyUsername, String proxyPassword)
+  public JiraSession(String clientId, String clientSecret,
+    String protocol, String host, int port, String path,
+    String proxyHost, int proxyPort, String proxyDomain, String proxyUsername, String proxyPassword)
     throws ManifoldCFException {
-    this.URLbase = URLbase;
+    this.host = new HttpHost(host,port,protocol);
+    this.path = path;
     this.clientId = clientId;
     this.clientSecret = clientSecret;
 
@@ -142,21 +149,6 @@
     if (proxyHost != null && proxyHost.length() > 0)
     {
 
-      int proxyPortInt;
-      if (proxyPort != null && proxyPort.length() > 0)
-      {
-        try
-        {
-          proxyPortInt = Integer.parseInt(proxyPort);
-        }
-        catch (NumberFormatException e)
-        {
-          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);
-        }
-      }
-      else
-        proxyPortInt = 8080;
-
       // Configure proxy authentication
       if (proxyUsername != null && proxyUsername.length() > 0)
       {
@@ -166,11 +158,11 @@
           proxyDomain = "";
 
         credentialsProvider.setCredentials(
-          new AuthScope(proxyHost, proxyPortInt),
+          new AuthScope(proxyHost, proxyPort),
           new NTCredentials(proxyUsername, proxyPassword, currentHost, proxyDomain));
       }
 
-      HttpHost proxy = new HttpHost(proxyHost, proxyPortInt);
+      HttpHost proxy = new HttpHost(proxyHost, proxyPort);
       requestBuilder.setProxy(proxy);
     }
 
@@ -261,14 +253,25 @@
     return charSet;
   }
 
-  private void getRest(String rightside, JiraJSONResponse response)
+  private void getRest(String rightside, JiraJSONResponse response) 
     throws IOException, ResponseException {
 
-    final HttpRequestBase method = new HttpGet(URLbase + rightside);
+    // Create AuthCache instance
+    AuthCache authCache = new BasicAuthCache();
+    // Generate BASIC scheme object and add it to the local
+    // auth cache
+    BasicScheme basicAuth = new BasicScheme();
+    authCache.put(host, basicAuth);
+
+    // Add AuthCache to the execution context
+    HttpClientContext localContext = HttpClientContext.create();
+    localContext.setAuthCache(authCache);
+
+    final HttpRequestBase method = new HttpGet(host.toURI() + path + rightside);
     method.addHeader("Accept", "application/json");
 
     try {
-      HttpResponse httpResponse = httpClient.execute(method);
+      HttpResponse httpResponse = httpClient.execute(method,localContext);
       int resultCode = httpResponse.getStatusLine().getStatusCode();
       if (resultCode != 200)
         throw new ResponseException("Unexpected result code "+resultCode+": "+convertToString(httpResponse));

diff --git a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraRepositoryConnector.java b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraRepositoryConnector.java
index d504852..f3d6271 100644
--- a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraRepositoryConnector.java
+++ b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraRepositoryConnector.java

@@ -290,9 +290,44 @@
         Logging.connectors.debug("JIRA: Clientsecret = '" + clientsecret + "'");

       }

 

-      String jiraurl = jiraprotocol + "://" + jirahost + (StringUtils.isEmpty(jiraport)?"":":"+jiraport) + jirapath;

-      session = new JiraSession(clientid, clientsecret, jiraurl,

-        jiraproxyhost, jiraproxyport, jiraproxydomain, jiraproxyusername, jiraproxypassword);

+      int portInt;

+      if (jiraport != null && jiraport.length() > 0)

+      {

+        try

+        {

+          portInt = Integer.parseInt(jiraport);

+        }

+        catch (NumberFormatException e)

+        {

+          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);

+        }

+      }

+      else

+      {

+        if (jiraprotocol.toLowerCase(Locale.ROOT).equals("http"))

+          portInt = 80;

+        else

+          portInt = 443;

+      }

+

+      int proxyPortInt;

+      if (jiraproxyport != null && jiraproxyport.length() > 0)

+      {

+        try

+        {

+          proxyPortInt = Integer.parseInt(jiraproxyport);

+        }

+        catch (NumberFormatException e)

+        {

+          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);

+        }

+      }

+      else

+        proxyPortInt = 8080;

+

+      session = new JiraSession(clientid, clientsecret,

+        jiraprotocol, jirahost, portInt, jirapath,

+        jiraproxyhost, proxyPortInt, jiraproxydomain, jiraproxyusername, jiraproxypassword);

 

     }

     lastSessionFetch = System.currentTimeMillis();

@@ -927,7 +962,7 @@
             String issueKey = nodeId.substring(2);

             JiraIssue jiraFile = getIssue(issueKey);

             if (jiraFile == null) {

-              activities.deleteDocument(nodeId, version);

+              activities.deleteDocument(nodeId);

               continue;

             }

             


diff --git a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraSession.java b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraSession.java
index e25527a..0e246e6 100644
--- a/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraSession.java
+++ b/connectors/jira/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/jira/JiraSession.java

@@ -62,6 +62,10 @@
 import org.apache.http.protocol.HttpContext;

 import org.apache.http.conn.ssl.SSLConnectionSocketFactory;

 import org.apache.http.entity.ContentType;

+import org.apache.http.client.AuthCache;

+import org.apache.http.impl.client.BasicAuthCache;

+import org.apache.http.impl.auth.BasicScheme;

+import org.apache.http.client.protocol.HttpClientContext;

 

 import org.apache.http.ParseException;

 

@@ -75,7 +79,8 @@
  */

 public class JiraSession {

 

-  private final String URLbase;

+  private final HttpHost host;

+  private final String path;

   private final String clientId;

   private final String clientSecret;

   

@@ -102,10 +107,12 @@
   /**

    * Constructor. Create a session.

    */

-  public JiraSession(String clientId, String clientSecret, String URLbase,

-    String proxyHost, String proxyPort, String proxyDomain, String proxyUsername, String proxyPassword)

+  public JiraSession(String clientId, String clientSecret,

+    String protocol, String host, int port, String path,

+    String proxyHost, int proxyPort, String proxyDomain, String proxyUsername, String proxyPassword)

     throws ManifoldCFException {

-    this.URLbase = URLbase;

+    this.host = new HttpHost(host,port,protocol);

+    this.path = path;

     this.clientId = clientId;

     this.clientSecret = clientSecret;

 

@@ -140,20 +147,6 @@
     if (proxyHost != null && proxyHost.length() > 0)

     {

 

-      int proxyPortInt;

-      if (proxyPort != null && proxyPort.length() > 0)

-      {

-        try

-        {

-          proxyPortInt = Integer.parseInt(proxyPort);

-        }

-        catch (NumberFormatException e)

-        {

-          throw new ManifoldCFException("Bad number: "+e.getMessage(),e);

-        }

-      }

-      else

-        proxyPortInt = 8080;

 

       // Configure proxy authentication

       if (proxyUsername != null && proxyUsername.length() > 0)

@@ -164,11 +157,11 @@
           proxyDomain = "";

 

         credentialsProvider.setCredentials(

-          new AuthScope(proxyHost, proxyPortInt),

+          new AuthScope(proxyHost, proxyPort),

           new NTCredentials(proxyUsername, proxyPassword, currentHost, proxyDomain));

       }

 

-      HttpHost proxy = new HttpHost(proxyHost, proxyPortInt);

+      HttpHost proxy = new HttpHost(proxyHost, proxyPort);

       requestBuilder.setProxy(proxy);

     }

 

@@ -262,11 +255,22 @@
   private void getRest(String rightside, JiraJSONResponse response) 

     throws IOException, ResponseException {

 

-    final HttpRequestBase method = new HttpGet(URLbase + rightside);

+    // Create AuthCache instance

+    AuthCache authCache = new BasicAuthCache();

+    // Generate BASIC scheme object and add it to the local

+    // auth cache

+    BasicScheme basicAuth = new BasicScheme();

+    authCache.put(host, basicAuth);

+

+    // Add AuthCache to the execution context

+    HttpClientContext localContext = HttpClientContext.create();

+    localContext.setAuthCache(authCache);

+

+    final HttpRequestBase method = new HttpGet(host.toURI() + path + rightside);

     method.addHeader("Accept", "application/json");

 

     try {

-      HttpResponse httpResponse = httpClient.execute(method);

+      HttpResponse httpResponse = httpClient.execute(method,localContext);

       int resultCode = httpResponse.getStatusLine().getStatusCode();

       if (resultCode != 200)

         throw new IOException("Unexpected result code "+resultCode+": "+convertToString(httpResponse));


diff --git a/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java b/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
index d761acd..c950852 100644
--- a/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java
+++ b/connectors/livelink/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/livelink/LivelinkConnector.java

@@ -1450,7 +1450,10 @@
     {
       // Since each livelink access is time-consuming, be sure that we abort if the job has gone inactive
       activities.checkJobStillActive();
+      
       String documentIdentifier = documentIdentifiers[i];
+      String version = versions[i];
+      
       boolean doScanOnly = scanOnly[i];
 
       boolean isFolder = documentIdentifier.startsWith("F");
@@ -1470,99 +1473,103 @@
 
       if (isFolder)
       {
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("Livelink: Processing folder "+Integer.toString(vol)+":"+Integer.toString(objID));
-
-        // Since the identifier indicates it is a directory, then queue up all the current children which pass the filter.
-        String filterString = buildFilterString(spec);
-
-        int sanityRetryCount = FAILURE_RETRY_COUNT;
-        while (true)
+        if (doScanOnly == false)
         {
-          ListObjectsThread t = new ListObjectsThread(vol,objID,filterString);
-          try
+          activities.noDocument(documentIdentifier,version);
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("Livelink: Processing folder "+Integer.toString(vol)+":"+Integer.toString(objID));
+
+          // Since the identifier indicates it is a directory, then queue up all the current children which pass the filter.
+          String filterString = buildFilterString(spec);
+
+          int sanityRetryCount = FAILURE_RETRY_COUNT;
+          while (true)
           {
-            t.start();
-	    LLValue childrenDocs;
-	    try
-	    {
-	      childrenDocs = t.finishUp();
-	    }
-	    catch (ManifoldCFException e)
-	    {
-	      sanityRetryCount = assessRetry(sanityRetryCount,e);
-	      continue;
-	    }
-
-            int size = 0;
-
-            if (childrenDocs.isRecord())
-              size = 1;
-            if (childrenDocs.isTable())
-              size = childrenDocs.size();
-
-            // System.out.println("Total child count = "+Integer.toString(size));
-
-            // Do the scan
-            int j = 0;
-            while (j < size)
+            ListObjectsThread t = new ListObjectsThread(vol,objID,filterString);
+            try
             {
-              int childID = childrenDocs.toInteger(j, "ID");
-
-              if (Logging.connectors.isDebugEnabled())
-                Logging.connectors.debug("Livelink: Found a child of folder "+Integer.toString(vol)+":"+Integer.toString(objID)+" : ID="+Integer.toString(childID));
-
-              int subtype = childrenDocs.toInteger(j, "SubType");
-              boolean childIsFolder = (subtype == LAPI_DOCUMENTS.FOLDERSUBTYPE || subtype == LAPI_DOCUMENTS.PROJECTSUBTYPE ||
-                subtype == LAPI_DOCUMENTS.COMPOUNDDOCUMENTSUBTYPE);
-
-              // If it's a folder, we just let it through for now
-              if (!childIsFolder && checkInclude(childrenDocs.toString(j,"Name") + "." + childrenDocs.toString(j,"FileType"), spec) == false)
+              t.start();
+              LLValue childrenDocs;
+              try
               {
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" was excluded by inclusion criteria");
-                j++;
+                childrenDocs = t.finishUp();
+              }
+              catch (ManifoldCFException e)
+              {
+                sanityRetryCount = assessRetry(sanityRetryCount,e);
                 continue;
               }
 
-              if (childIsFolder)
+              int size = 0;
+
+              if (childrenDocs.isRecord())
+                size = 1;
+              if (childrenDocs.isTable())
+                size = childrenDocs.size();
+
+              // System.out.println("Total child count = "+Integer.toString(size));
+
+              // Do the scan
+              int j = 0;
+              while (j < size)
               {
+                int childID = childrenDocs.toInteger(j, "ID");
+
                 if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" is a folder, project, or compound document; adding a reference");
-                if (subtype == LAPI_DOCUMENTS.PROJECTSUBTYPE)
+                  Logging.connectors.debug("Livelink: Found a child of folder "+Integer.toString(vol)+":"+Integer.toString(objID)+" : ID="+Integer.toString(childID));
+
+                int subtype = childrenDocs.toInteger(j, "SubType");
+                boolean childIsFolder = (subtype == LAPI_DOCUMENTS.FOLDERSUBTYPE || subtype == LAPI_DOCUMENTS.PROJECTSUBTYPE ||
+                  subtype == LAPI_DOCUMENTS.COMPOUNDDOCUMENTSUBTYPE);
+
+                // If it's a folder, we just let it through for now
+                if (!childIsFolder && checkInclude(childrenDocs.toString(j,"Name") + "." + childrenDocs.toString(j,"FileType"), spec) == false)
                 {
-                  // If we pick up a project object, we need to describe the volume object (which
-                  // will be the root of all documents beneath)
-                  activities.addDocumentReference("F"+new Integer(childID).toString()+":"+new Integer(-childID).toString());
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" was excluded by inclusion criteria");
+                  j++;
+                  continue;
+                }
+
+                if (childIsFolder)
+                {
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" is a folder, project, or compound document; adding a reference");
+                  if (subtype == LAPI_DOCUMENTS.PROJECTSUBTYPE)
+                  {
+                    // If we pick up a project object, we need to describe the volume object (which
+                    // will be the root of all documents beneath)
+                    activities.addDocumentReference("F"+new Integer(childID).toString()+":"+new Integer(-childID).toString());
+                  }
+                  else
+                    activities.addDocumentReference("F"+new Integer(vol).toString()+":"+new Integer(childID).toString());
                 }
                 else
-                  activities.addDocumentReference("F"+new Integer(vol).toString()+":"+new Integer(childID).toString());
-              }
-              else
-              {
-                if (Logging.connectors.isDebugEnabled())
-                  Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" is a simple document; adding a reference");
+                {
+                  if (Logging.connectors.isDebugEnabled())
+                    Logging.connectors.debug("Livelink: Child identifier "+Integer.toString(childID)+" is a simple document; adding a reference");
 
-                activities.addDocumentReference("D"+new Integer(vol).toString()+":"+new Integer(childID).toString());
-              }
+                  activities.addDocumentReference("D"+new Integer(vol).toString()+":"+new Integer(childID).toString());
+                }
 
-              j++;
+                j++;
+              }
+              break;
             }
-            break;
+            catch (InterruptedException e)
+            {
+              t.interrupt();
+              throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+            }
+            catch (RuntimeException e)
+            {
+              sanityRetryCount = handleLivelinkRuntimeException(e,sanityRetryCount,true);
+              continue;
+            }
           }
-          catch (InterruptedException e)
-          {
-            t.interrupt();
-            throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-          }
-          catch (RuntimeException e)
-          {
-            sanityRetryCount = handleLivelinkRuntimeException(e,sanityRetryCount,true);
-            continue;
-          }
+          if (Logging.connectors.isDebugEnabled())
+            Logging.connectors.debug("Livelink: Done processing folder "+Integer.toString(vol)+":"+Integer.toString(objID));
         }
-        if (Logging.connectors.isDebugEnabled())
-          Logging.connectors.debug("Livelink: Done processing folder "+Integer.toString(vol)+":"+Integer.toString(objID));
       }
       else
       {
@@ -1579,7 +1586,7 @@
               Logging.connectors.debug("Livelink: Decided to ingest document "+Integer.toString(vol)+":"+Integer.toString(objID));
 
             // Grab the access tokens for this file from the version string, inside ingest method.
-            ingestFromLiveLink(llc,documentIdentifiers[i],versions[i],activities,desc,sDesc);
+            ingestFromLiveLink(llc,documentIdentifiers[i],version,activities,desc,sDesc);
           }
           else
           {
@@ -4440,7 +4447,7 @@
                     // Since we logged in, we should fail here if the ingestion user doesn't have access to the
                     // the document, but if we do, don't fail hard.
                     resultCode = "UNAUTHORIZED";
-                    activities.deleteDocument(documentIdentifier,version);
+                    activities.noDocument(documentIdentifier,version);
                     return;
 
                   case HttpStatus.SC_OK:
@@ -4533,7 +4540,7 @@
                     else
                     {
                       resultCode = "SESSIONLOGINFAILED";
-                      activities.deleteDocument(documentIdentifier,version);
+                      activities.noDocument(documentIdentifier,version);
                     }
                     break;
                   case HttpStatus.SC_BAD_REQUEST:
@@ -4731,7 +4738,7 @@
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its length ("+dataSize+") was rejected by output connector");
             resultCode = "DOCUMENTTOOLONG";
-            activities.deleteDocument(documentIdentifier,version);
+            activities.noDocument(documentIdentifier,version);
           }
         }
         else
@@ -4741,7 +4748,7 @@
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its mime type ("+mimeType+") was rejected by output connector");
           resultCode = "MIMETYPEEXCLUSION";
-          activities.deleteDocument(documentIdentifier,version);
+          activities.noDocument(documentIdentifier,version);
         }
       }
       else
@@ -4751,7 +4758,7 @@
         if (Logging.connectors.isDebugEnabled())
           Logging.connectors.debug("Livelink: Excluding document "+documentIdentifier+" because its URL ("+viewHttpAddress+") was rejected by output connector");
         resultCode = "URLEXCLUSION";
-        activities.deleteDocument(documentIdentifier,version);
+        activities.noDocument(documentIdentifier,version);
       }
     }
     finally

diff --git a/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java b/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
index 935b596..d6ac0e0 100644
--- a/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java
+++ b/connectors/meridio/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/meridio/MeridioConnector.java

@@ -1164,7 +1164,7 @@
             if (Logging.connectors.isDebugEnabled())
               Logging.connectors.debug("Meridio: Could not retrieve document data for document id '" +
               new Long(docId).toString() + "' in processDocuments method - deleting document.");
-            activities.deleteDocument(documentIdentifier,docVersion);
+            activities.noDocument(documentIdentifier,docVersion);
             i++;
             continue;
           }
@@ -1176,7 +1176,7 @@
               Logging.connectors.debug("Meridio: Could not retrieve document owner for document id '" +
               new Long(docId).toString() + "' in processDocuments method. No information or incorrect amount " +
               "of information was returned");
-            activities.deleteDocument(documentIdentifier,docVersion);
+            activities.noDocument(documentIdentifier,docVersion);
             i++;
             continue;
           }
@@ -1336,7 +1336,7 @@
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("Meridio: Failed to get content for document '" + new Long(docId).toString() + "'");
               // No document.  Delete what's there
-              activities.deleteDocument(documentIdentifier,docVersion);
+              activities.noDocument(documentIdentifier,docVersion);
               i++;
               continue;
             }
@@ -1370,13 +1370,13 @@
                   }
                 }
                 else
-                  activities.deleteDocument(documentIdentifier, docVersion);
+                  activities.noDocument(documentIdentifier, docVersion);
               }
               else
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("Meridio: Expected temporary file was not present - skipping document '"+new Long(docId).toString() + "'");
-                activities.deleteDocument(documentIdentifier, docVersion);
+                activities.deleteDocument(documentIdentifier);
               }
             }
             finally

diff --git a/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java b/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
index 973f0ce..0b1360d 100644
--- a/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java
+++ b/connectors/nulloutput/connector/src/main/java/org/apache/manifoldcf/agents/output/nullconnector/NullConnector.java

@@ -22,6 +22,7 @@
 import org.apache.manifoldcf.agents.interfaces.*;
 
 import java.util.*;
+import java.io.*;
 
 /** This is a null output connector.  It eats all output and simply logs the events.
 */
@@ -109,10 +110,10 @@
   * the document will not need to be sent again to the output data store.
   */
   @Override
-  public String getOutputDescription(OutputSpecification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    return "";
+    return new VersionContext("",params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -130,8 +131,8 @@
   *@return the document status (accepted or permanently rejected).
   */
   @Override
-  public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
-    throws ManifoldCFException, ServiceInterruption
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
   {
     // Establish a session
     getSession();
@@ -167,4 +168,24 @@
     activities.recordActivity(null,JOB_COMPLETE_ACTIVITY,null,"","OK",null);
   }
 
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
 }

diff --git a/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java b/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java
index beb1a15..6fe24a2 100644
--- a/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java
+++ b/connectors/nulltransformation/connector/src/main/java/org/apache/manifoldcf/agents/transformation/nullconnector/NullConnector.java

@@ -62,7 +62,7 @@
   *@throws IOException only if there's a stream error reading the document data.
   */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
     long startTime = System.currentTimeMillis();
@@ -72,7 +72,7 @@
     try
     {
       long binaryLength = document.getBinaryLength();
-      int rval = activities.sendDocument(documentURI,document,authorityNameString);
+      int rval = activities.sendDocument(documentURI,document);
       length =  new Long(binaryLength);
       resultCode = (rval == DOCUMENTSTATUS_ACCEPTED)?"ACCEPTED":"REJECTED";
       return rval;

diff --git a/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java b/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java
index 758bcf9..75e8f00 100644
--- a/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java
+++ b/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerConnector.java

@@ -27,6 +27,7 @@
 import java.util.Map;
 import java.util.TreeMap;
 import java.util.Locale;
+import java.util.HashMap;
 
 import org.apache.http.conn.HttpClientConnectionManager;
 import org.apache.http.client.HttpClient;
@@ -45,12 +46,12 @@
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
 import org.apache.manifoldcf.agents.output.BaseOutputConnector;
 import org.apache.manifoldcf.agents.output.opensearchserver.OpenSearchServerAction.CommandEnum;
 import org.apache.manifoldcf.agents.output.opensearchserver.OpenSearchServerConnection.Result;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
 import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
@@ -58,6 +59,7 @@
 import org.apache.manifoldcf.core.interfaces.IThreadContext;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
 import org.apache.manifoldcf.core.system.Logging;
 import org.json.JSONException;
 import org.json.JSONObject;
@@ -217,14 +219,24 @@
    * @throws ManifoldCFException
    */
   private static void outputResource(String resName, IHTTPOutput out,
-    Locale locale, OpenSearchServerParam params, String tabName) throws ManifoldCFException {
+    Locale locale, OpenSearchServerParam params, String tabName,
+    Integer sequenceNumber, Integer actualSequenceNumber) throws ManifoldCFException {
     Map<String,String> paramMap = null;
     if (params != null) {
       paramMap = params.buildMap();
       if (tabName != null) {
         paramMap.put("TabName", tabName);
       }
+      if (actualSequenceNumber != null)
+        paramMap.put("SelectedNum",actualSequenceNumber.toString());
     }
+    else
+    {
+      paramMap = new HashMap<String,String>();
+    }
+    if (sequenceNumber != null)
+      paramMap.put("SeqNum",sequenceNumber.toString());
+
     Messages.outputResourceWithVelocity(out,locale,resName,paramMap,false);
   }
 
@@ -234,7 +246,7 @@
       throws ManifoldCFException, IOException {
     super.outputConfigurationHeader(threadContext, out, locale, parameters, tabsArray);
     tabsArray.add(Messages.getString(locale,PARAMETERS_TAB_MESSAGE));
-    outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null);
+    outputResource(EDIT_CONFIG_HEADER_FORWARD, out, locale, null, null, null, null);
   }
 
   @Override
@@ -243,19 +255,48 @@
       throws ManifoldCFException, IOException {
     super.outputConfigurationBody(threadContext, out, locale, parameters, tabName);
     OpenSearchServerConfig config = this.getConfigParameters(parameters);
-    outputResource(EDIT_CONFIG_FORWARD, out, locale, config, tabName);
+    outputResource(EDIT_CONFIG_FORWARD, out, locale, config, tabName, null, null);
   }
 
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out,
-      Locale locale, OutputSpecification os, List<String> tabsArray)
-      throws ManifoldCFException, IOException {
-    super.outputSpecificationHeader(out, locale, os, tabsArray);
-    tabsArray.add(Messages.getString(locale,OPENSEARCHSERVER_TAB_MESSAGE));
-    outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, null, null);
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
   }
 
-  final private SpecificationNode getSpecNode(OutputSpecification os) {
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException {
+    super.outputSpecificationHeader(out, locale, os, connectionSequenceNumber, tabsArray);
+    tabsArray.add(Messages.getString(locale,OPENSEARCHSERVER_TAB_MESSAGE));
+    outputResource(EDIT_SPEC_HEADER_FORWARD, out, locale, null, null, new Integer(connectionSequenceNumber), null);
+  }
+
+  final private SpecificationNode getSpecNode(Specification os) {
     int l = os.getChildCount();
     for (int i = 0; i < l; i++) {
       SpecificationNode node = os.getChild(i);
@@ -267,24 +308,46 @@
     return null;
   }
 
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os,
-      String tabName) throws ManifoldCFException, IOException {
-    super.outputSpecificationBody(out, locale, os, tabName);
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException {
     OpenSearchServerSpecs specs = getSpecParameters(os);
-    outputResource(EDIT_SPEC_FORWARD, out, locale, specs, tabName);
+    outputResource(EDIT_SPEC_FORWARD, out, locale, specs, tabName, new Integer(connectionSequenceNumber), new Integer(actualSequenceNumber));
   }
 
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext,
-      Locale locale, OutputSpecification os) throws ManifoldCFException {
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
     ConfigurationNode specNode = getSpecNode(os);
     boolean bAdd = (specNode == null);
     if (bAdd) {
       specNode = new SpecificationNode(
           OpenSearchServerSpecs.OPENSEARCHSERVER_SPECS_NODE);
     }
-    OpenSearchServerSpecs.contextToSpecNode(variableContext, specNode);
+    OpenSearchServerSpecs.contextToSpecNode(variableContext, specNode, connectionSequenceNumber);
     if (bAdd)
       os.addChild(os.getChildCount(), specNode);
     return null;
@@ -303,7 +366,7 @@
     return new OpenSearchServerConfig(configParams);
   }
 
-  final private OpenSearchServerSpecs getSpecParameters(OutputSpecification os)
+  final private OpenSearchServerSpecs getSpecParameters(Specification os)
       throws ManifoldCFException {
     return new OpenSearchServerSpecs(getSpecNode(os));
   }
@@ -325,10 +388,10 @@
   }
 
   @Override
-  public String getOutputDescription(OutputSpecification os)
+  public VersionContext getPipelineDescription(Specification os)
       throws ManifoldCFException {
     OpenSearchServerSpecs specs = new OpenSearchServerSpecs(getSpecNode(os));
-    return specs.toJson().toString();
+    return new VersionContext(specs.toJson().toString(),params,os);
   }
 
   @Override
@@ -370,13 +433,22 @@
   @Override
   public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
       Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
-    outputResource(VIEW_CONFIG_FORWARD, out, locale, getConfigParameters(parameters), null);
+    outputResource(VIEW_CONFIG_FORWARD, out, locale, getConfigParameters(parameters), null, null, null);
   }
 
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
-      throws ManifoldCFException, IOException {
-    outputResource(VIEW_SPEC_FORWARD, out, locale, getSpecParameters(os), null);
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException {
+    outputResource(VIEW_SPEC_FORWARD, out, locale, getSpecParameters(os), null, new Integer(connectionSequenceNumber), null);
   }
 
   @Override

diff --git a/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerSpecs.java b/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerSpecs.java
index f0ff202..15b5547 100644
--- a/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerSpecs.java
+++ b/connectors/opensearchserver/connector/src/main/java/org/apache/manifoldcf/agents/output/opensearchserver/OpenSearchServerSpecs.java

@@ -93,9 +93,9 @@
   }
 
   public static void contextToSpecNode(IPostParameters variableContext,
-      ConfigurationNode specNode) {
+      ConfigurationNode specNode, int sequenceNumber) {
     for (ParameterEnum param : SPECIFICATIONLIST) {
-      String p = variableContext.getParameter(param.name().toLowerCase());
+      String p = variableContext.getParameter("s"+sequenceNumber+"_"+param.name().toLowerCase());
       if (p != null)
         specNode.setAttribute(param.name(), p);
     }

diff --git a/connectors/opensearchserver/connector/src/main/resources/org/apache/manifoldcf/agents/output/opensearchserver/editSpecification.html b/connectors/opensearchserver/connector/src/main/resources/org/apache/manifoldcf/agents/output/opensearchserver/editSpecification.html
index 1d0dbd5..99385e6 100644
--- a/connectors/opensearchserver/connector/src/main/resources/org/apache/manifoldcf/agents/output/opensearchserver/editSpecification.html
+++ b/connectors/opensearchserver/connector/src/main/resources/org/apache/manifoldcf/agents/output/opensearchserver/editSpecification.html

@@ -15,14 +15,14 @@
  limitations under the License.
 -->
 
-#if($TabName == $ResourceBundle.getString('OpenSearchServerConnector.OpenSearchServer'))
+#if($TabName == $ResourceBundle.getString('OpenSearchServerConnector.OpenSearchServer') && ${SEQNUM} == ${SELECTEDNUM})
 
 <table class="displaytable">
   <tr>
     <td class="description">
       <nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenSearchServerConnector.MaxFileSizeBytesColon'))</nobr>
     </td>
-    <td class="value"><input name="maxfilesize" type="text"
+    <td class="value"><input name="s${SEQNUM}_maxfilesize" type="text"
       value="$Encoder.attributeEscape($MAXFILESIZE)" size="24" /></td>
   </tr>
   <tr>
@@ -30,21 +30,21 @@
       <nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenSearchServerConnector.AllowedMIMETypesColon'))</nobr>
     </td>
     <td class="value">
-      <textarea rows="10" cols="64" name="mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
+      <textarea rows="10" cols="64" name="s${SEQNUM}_mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
     </td>
   </tr>
   <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('OpenSearchServerConnector.AllowedFileExtensionsColon'))</nobr></td>
     <td class="value">
-      <textarea rows="10" cols="12" name="extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
+      <textarea rows="10" cols="12" name="s${SEQNUM}_extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
     </td>
   </tr>
 </table>
 
 #else
 
-<input type="hidden" name="maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
-<input type="hidden" name="mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
-<input type="hidden" name="extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
+<input type="hidden" name="s${SEQNUM}_maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
+<input type="hidden" name="s${SEQNUM}_mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
+<input type="hidden" name="s${SEQNUM}_extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
 
 #end

diff --git a/connectors/pom.xml b/connectors/pom.xml
index ba3f868..93540af 100644
--- a/connectors/pom.xml
+++ b/connectors/pom.xml

@@ -61,6 +61,8 @@
     <module>email</module>
     <module>amazoncloudsearch</module>
     <module>forcedmetadata</module>
+    <module>tika</module>
+    <module>documentfilter</module>
   </modules>
 
 </project>

diff --git a/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java b/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
index 979cdd1..15aedc3 100644
--- a/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java
+++ b/connectors/rss/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/rss/RSSConnector.java

@@ -1239,7 +1239,7 @@
         // Leave document in jobqueue, but do NOT get rid of it, or we will wind up seeing it queued again by
         // somebody else.  We *do* have to signal the document to be removed from the index, however, or it will
         // stick around until the job is deleted.
-        activities.deleteDocument(urlValue,version);
+        activities.noDocument(urlValue,version);
         continue;
       }
 
@@ -1528,7 +1528,7 @@
         }
         else
         {
-          activities.deleteDocument(urlValue,version);
+          activities.noDocument(urlValue,version);
 
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("RSS: Skipping document '"+urlValue+"' because it cannot be indexed");

diff --git a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
index 3fb5141..27fb4e9 100644
--- a/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java
+++ b/connectors/sharepoint/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/sharepoint/SharePointRepository.java

@@ -1266,6 +1266,7 @@
           // === List style identifier ===
           if (dListSeparatorIndex == documentIdentifier.length() - 3)
           {
+            // Chained connectors always scan parent nodes, so they don't bother setting a version
             String siteListPath = documentIdentifier.substring(0,documentIdentifier.length()-3);
             int listCutoff = siteListPath.lastIndexOf( "/" );
             String site = siteListPath.substring(0,listCutoff);
@@ -1278,7 +1279,7 @@
             if (listID != null)
             {
               String encodedSitePath = encodePath(site);
-              
+                
               // Get the list's fields
               Map<String,String> fieldNames = proxy.getFieldList( encodedSitePath, listID );
               if (fieldNames != null)
@@ -1289,10 +1290,10 @@
                 {
                   fields[j++] = field;
                 }
-                
+                  
                 String[] accessTokens;
                 String[] denyTokens;
-                
+                  
                 if (forcedAcls == null)
                 {
                   // Security is off
@@ -1322,28 +1323,28 @@
                     // Site/list no longer exists, so delete entry
                     if (Logging.connectors.isDebugEnabled())
                       Logging.connectors.debug("SharePoint: No list found for list '"+siteListPath+"' - deleting");
-                    activities.deleteDocument(documentIdentifier,version);
+                    activities.deleteDocument(documentIdentifier);
                   }
                 }
                 else
                 {
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("SharePoint: Access token lookup failed for list '"+siteListPath+"' - deleting");
-                  activities.deleteDocument(documentIdentifier,version);
+                  activities.deleteDocument(documentIdentifier);
                 }
               }
               else
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("SharePoint: Field list lookup failed for list '"+siteListPath+"' - deleting");
-                activities.deleteDocument(documentIdentifier,version);
+                activities.deleteDocument(documentIdentifier);
               }
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("SharePoint: GUID lookup failed for list '"+siteListPath+"' - deleting");
-              activities.deleteDocument(documentIdentifier,version);
+              activities.deleteDocument(documentIdentifier);
             }
           }
           else
@@ -1413,7 +1414,7 @@
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("SharePoint: List '"+decodedListPath+"' no longer exists - deleting item '"+documentIdentifier+"'");
-                activities.deleteDocument(documentIdentifier,version);
+                activities.deleteDocument(documentIdentifier);
                 i++;
                 continue;
               }
@@ -1479,7 +1480,7 @@
                     // Item has vanished
                     if (Logging.connectors.isDebugEnabled())
                       Logging.connectors.debug("SharePoint: Item metadata fetch failure indicated that item is gone: '"+documentIdentifier+"' - removing");
-                    activities.deleteDocument(documentIdentifier,version);
+                    activities.noDocument(documentIdentifier,version);
                     i++;
                     continue;
                   }
@@ -1536,7 +1537,7 @@
                 }
                 else
                   // Document too long (should never happen; length is 0)
-                  activities.deleteDocument( documentIdentifier, version );
+                  activities.noDocument( documentIdentifier, version );
               }
             }
             else
@@ -1584,7 +1585,7 @@
                     accessTokens, denyTokens, createdDate, modifiedDate, null, guid, sDesc))
                   {
                     // Document not indexed for whatever reason
-                    activities.deleteDocument(documentIdentifier,version);
+                    activities.noDocument(documentIdentifier,version);
                     i++;
                     continue;
                   }
@@ -1593,7 +1594,7 @@
                 {
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("SharePoint: Skipping attachment '"+documentIdentifier+"' because no parent guid found");
-                  activities.deleteDocument(documentIdentifier,version);
+                  activities.noDocument(documentIdentifier,version);
                   i++;
                   continue;
                 }
@@ -1607,7 +1608,7 @@
           // === Library style identifier ===
           if (dLibSeparatorIndex == documentIdentifier.length() - 2)
           {
-            // It's a library.
+            // Chained document parents are always rescanned
             String siteLibPath = documentIdentifier.substring(0,documentIdentifier.length()-2);
             int libCutoff = siteLibPath.lastIndexOf( "/" );
             String site = siteLibPath.substring(0,libCutoff);
@@ -1631,10 +1632,10 @@
                 {
                   fields[j++] = field;
                 }
-                
+                  
                 String[] accessTokens;
                 String[] denyTokens;
-                
+                  
                 if (forcedAcls == null)
                 {
                   // Security is off
@@ -1664,28 +1665,28 @@
                     // Site/library no longer exists, so delete entry
                     if (Logging.connectors.isDebugEnabled())
                       Logging.connectors.debug("SharePoint: No list found for library '"+siteLibPath+"' - deleting");
-                    activities.deleteDocument(documentIdentifier,version);
+                    activities.deleteDocument(documentIdentifier);
                   }
                 }
                 else
                 {
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("SharePoint: Access token lookup failed for library '"+siteLibPath+"' - deleting");
-                  activities.deleteDocument(documentIdentifier,version);
+                  activities.deleteDocument(documentIdentifier);
                 }
               }
               else
               {
                 if (Logging.connectors.isDebugEnabled())
                   Logging.connectors.debug("SharePoint: Field list lookup failed for library '"+siteLibPath+"' - deleting");
-                activities.deleteDocument(documentIdentifier,version);
+                activities.deleteDocument(documentIdentifier);
               }
             }
             else
             {
               if (Logging.connectors.isDebugEnabled())
                 Logging.connectors.debug("SharePoint: GUID lookup failed for library '"+siteLibPath+"' - deleting");
-              activities.deleteDocument(documentIdentifier,version);
+              activities.deleteDocument(documentIdentifier);
             }
           }
           else
@@ -1751,7 +1752,7 @@
                 {
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("SharePoint: Library '"+decodedLibPath+"' no longer exists - deleting document '"+documentIdentifier+"'");
-                  activities.deleteDocument(documentIdentifier,version);
+                  activities.deleteDocument(documentIdentifier);
                   i++;
                   continue;
                 }
@@ -1763,7 +1764,7 @@
                   // Document has vanished
                   if (Logging.connectors.isDebugEnabled())
                     Logging.connectors.debug("SharePoint: Document metadata fetch failure indicated that document is gone: '"+documentIdentifier+"' - removing");
-                  activities.deleteDocument(documentIdentifier,version);
+                  activities.noDocument(documentIdentifier,version);
                   i++;
                   continue;
                 }
@@ -1774,7 +1775,7 @@
                 acls, denyAcls, createdDate, modifiedDate, metadataValues, guid, sDesc))
               {
                 // Document not indexed for whatever reason
-                activities.deleteDocument(documentIdentifier,version);
+                activities.noDocument(documentIdentifier,version);
                 i++;
                 continue;
               }
@@ -1784,6 +1785,8 @@
         else
         {
           // === Site-style identifier ===
+          activities.noDocument(documentIdentifier,version);
+
           // Strip off the trailing "/" to get the site name.
           String decodedSitePath = documentIdentifier.substring(0,documentIdentifier.length()-1);
 

diff --git a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
index 7392328..c480807 100644
--- a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
+++ b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java

@@ -36,6 +36,7 @@
 import java.util.*;
 import java.util.regex.*;
 
+import org.apache.http.Consts;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.Credentials;
 import org.apache.http.auth.UsernamePasswordCredentials;
@@ -55,6 +56,7 @@
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
+import org.apache.solr.common.SolrInputDocument;
 
 
 /**
@@ -82,25 +84,29 @@
   protected SolrServer solrServer = null;
   
   // Action URI pieces
-  private String postUpdateAction;
-  private String postRemoveAction;
-  private String postStatusAction;
+  private final String postUpdateAction;
+  private final String postRemoveAction;
+  private final String postStatusAction;
   
   // Attribute names
-  private String allowAttributeName;
-  private String denyAttributeName;
-  private String idAttributeName;
-  private String modifiedDateAttributeName;
-  private String createdDateAttributeName;
-  private String indexedDateAttributeName;
-  private String fileNameAttributeName;
-  private String mimeTypeAttributeName;
+  private final String allowAttributeName;
+  private final String denyAttributeName;
+  private final String idAttributeName;
+  private final String modifiedDateAttributeName;
+  private final String createdDateAttributeName;
+  private final String indexedDateAttributeName;
+  private final String fileNameAttributeName;
+  private final String mimeTypeAttributeName;
+  private final String contentAttributeName;
+  
+  // Whether we use extract/update handler or not
+  private final boolean useExtractUpdateHandler;
   
   // Document max length
-  private Long maxDocumentLength;
+  private final Long maxDocumentLength;
 
   // Commit-within flag
-  private String commitWithin;
+  private final String commitWithin;
 
   // Constants we need
   private static final String LITERAL = "literal.";
@@ -118,9 +124,9 @@
     String updatePath, String removePath, String statusPath,
     String allowAttributeName, String denyAttributeName, String idAttributeName,
     String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
-    String fileNameAttributeName, String mimeTypeAttributeName,
+    String fileNameAttributeName, String mimeTypeAttributeName, String contentAttributeName,
     Long maxDocumentLength,
-    String commitWithin)
+    String commitWithin, boolean useExtractUpdateHandler)
     throws ManifoldCFException
   {
     // These are the paths to the handlers in Solr that deal with the actions we need to do
@@ -138,6 +144,8 @@
     this.indexedDateAttributeName = indexedDateAttributeName;
     this.fileNameAttributeName = fileNameAttributeName;
     this.mimeTypeAttributeName = mimeTypeAttributeName;
+    this.contentAttributeName = contentAttributeName;
+    this.useExtractUpdateHandler = useExtractUpdateHandler;
     
     this.maxDocumentLength = maxDocumentLength;
     
@@ -164,9 +172,9 @@
     String realm, String userID, String password,
     String allowAttributeName, String denyAttributeName, String idAttributeName,
     String modifiedDateAttributeName, String createdDateAttributeName, String indexedDateAttributeName,
-    String fileNameAttributeName, String mimeTypeAttributeName,
+    String fileNameAttributeName, String mimeTypeAttributeName, String contentAttributeName,
     IKeystoreManager keystoreManager, Long maxDocumentLength,
-    String commitWithin)
+    String commitWithin, boolean useExtractUpdateHandler)
     throws ManifoldCFException
   {
     // These are the paths to the handlers in Solr that deal with the actions we need to do
@@ -184,6 +192,8 @@
     this.indexedDateAttributeName = indexedDateAttributeName;
     this.fileNameAttributeName = fileNameAttributeName;
     this.mimeTypeAttributeName = mimeTypeAttributeName;
+    this.contentAttributeName = contentAttributeName;
+    this.useExtractUpdateHandler = useExtractUpdateHandler;
     
     this.maxDocumentLength = maxDocumentLength;
 
@@ -506,12 +516,12 @@
    * @param document is the document structure to ingest.
    * @param arguments are the configuration arguments to pass in the post.  Key is argument name, value is a list of the argument values.
    * @param keepAllMetadata
-   *@param authorityNameString is the name of the governing authority for this document's acls, or null if none.
+   * @param authorityNameString is the name of the governing authority for this document's acls, or null if none.
    * @param activities is the activities object, so we can report what's happening.   @return true if the ingestion was successful, or false if the ingestion is illegal.
   * @throws ManifoldCFException, ServiceInterruption
   */
   public boolean indexPost(String documentURI,
-    RepositoryDocument document, Map arguments, Map<String, List<String>> sourceTargets,
+    RepositoryDocument document, Map<String,List<String>> arguments, Map<String, List<String>> sourceTargets,
     boolean keepAllMetadata, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -544,7 +554,7 @@
     try
     {
       IngestThread t = new IngestThread(documentURI,document,arguments,keepAllMetadata,sourceTargets,
-                                        aclsMap,denyAclsMap,commitWithin);
+                                        aclsMap,denyAclsMap);
       try
       {
         t.start();
@@ -803,6 +813,18 @@
     }
   }
   
+  /**
+    * Output an acl level in a SolrInputDocument
+    */
+  protected void writeACLsInSolrDoc( SolrInputDocument inputDoc, String aclType, String[] acl, String[] denyAcl )
+  {
+    String metadataACLName = allowAttributeName + aclType;
+    inputDoc.addField( metadataACLName, acl );
+
+    String metadataDenyACLName = denyAttributeName + aclType;
+    inputDoc.addField( metadataDenyACLName, denyAcl );
+  }
+
   /** Killable thread that does ingestions.
   * Java 1.5 stopped permitting thread interruptions to abort socket waits.  As a result, it is impossible to get threads to shutdown cleanly that are doing
   * such waits.  So, the places where this happens are segregated in their own threads so that they can be just abandoned.
@@ -817,7 +839,6 @@
     protected final Map<String,List<String>> sourceTargets;
     protected final Map<String,String[]> aclsMap;
     protected final Map<String,String[]> denyAclsMap;
-    protected final String commitWithin;
     protected final boolean keepAllMetadata;
     
     protected Long activityStart = null;
@@ -830,8 +851,7 @@
 
     public IngestThread(String documentURI, RepositoryDocument document,
       Map<String, List<String>> arguments, boolean keepAllMetadata, Map<String, List<String>> sourceTargets,
-      Map<String,String[]> aclsMap, Map<String,String[]> denyAclsMap,
-      String commitWithin)
+      Map<String,String[]> aclsMap, Map<String,String[]> denyAclsMap)
     {
       super();
       setDaemon(true);
@@ -841,7 +861,6 @@
       this.aclsMap = aclsMap;
       this.denyAclsMap = denyAclsMap;
       this.sourceTargets = sourceTargets;
-      this.commitWithin = commitWithin;
       this.keepAllMetadata=keepAllMetadata;
     }
 
@@ -860,81 +879,17 @@
         // Open a socket to ingest, and to the response stream to get the post result
         try
         {
+          SolrInputDocument currentSolrDoc = new SolrInputDocument();
           ContentStreamUpdateRequest contentStreamUpdateRequest = new ContentStreamUpdateRequest(postUpdateAction);
-          
-          ModifiableSolrParams out = new ModifiableSolrParams();
-          
-          // Write the id field
-          writeField(out,LITERAL+idAttributeName,documentURI);
-          // Write the rest of the attributes
-          if (modifiedDateAttributeName != null)
+          if ( useExtractUpdateHandler )
           {
-            Date date = document.getModifiedDate();
-            if (date != null)
-              // Write value
-              writeField(out,LITERAL+modifiedDateAttributeName,DateParser.formatISO8601Date(date));
+            buildExtractUpdateHandlerRequest( length, is, contentType, contentName,
+              contentStreamUpdateRequest );
           }
-          if (createdDateAttributeName != null)
+          else
           {
-            Date date = document.getCreatedDate();
-            if (date != null)
-              // Write value
-              writeField(out,LITERAL+createdDateAttributeName,DateParser.formatISO8601Date(date));
+            currentSolrDoc = buildSolrDocument( length, is );
           }
-          if (indexedDateAttributeName != null)
-          {
-            Date date = document.getIndexingDate();
-            if (date != null)
-              // Write value
-              writeField(out,LITERAL+indexedDateAttributeName,DateParser.formatISO8601Date(date));
-          }
-          if (fileNameAttributeName != null)
-          {
-            String fileName = document.getFileName();
-            if (fileName != null)
-              writeField(out,LITERAL+fileNameAttributeName,fileName);
-          }
-          if (mimeTypeAttributeName != null)
-          {
-            String mimeType = document.getMimeType();
-            if (mimeType != null)
-              writeField(out,LITERAL+mimeTypeAttributeName,mimeType);
-          }
-          
-          // Write the access token information
-          // Both maps have the same keys.
-          Iterator<String> typeIterator = aclsMap.keySet().iterator();
-          while (typeIterator.hasNext())
-          {
-            String aclType = typeIterator.next();
-            writeACLs(out,aclType,aclsMap.get(aclType),denyAclsMap.get(aclType));
-          }
-
-          // Write the arguments
-          for (String name : arguments.keySet())
-          {
-            List<String> values = arguments.get(name);
-            writeField(out,name,values);
-          }
-
-          // Write the metadata, each in a field by itself
-           buildSolrParamsFromMetadata(out);
-             
-          // These are unnecessary now in the case of non-solrcloud setups, because we overrode the SolrJ posting method to use multipart.
-          //writeField(out,LITERAL+"stream_size",String.valueOf(length));
-          //writeField(out,LITERAL+"stream_name",document.getFileName());
-          
-          // General hint for Tika
-          if (document.getFileName() != null)
-            writeField(out,"resource.name",document.getFileName());
-          
-          // Write the commitWithin parameter
-          if (commitWithin != null)
-            writeField(out,COMMITWITHIN_METADATA,commitWithin);
-
-          contentStreamUpdateRequest.setParams(out);
-          
-          contentStreamUpdateRequest.addContentStream(new RepositoryDocumentStream(is,length,contentType,contentName));
 
           // Fire off the request.
           // Note: I need to know whether the document has been permanently rejected or not, but we currently have
@@ -942,8 +897,16 @@
           try
           {
             readFromDocumentStreamYet = true;
-            UpdateResponse response = contentStreamUpdateRequest.process(solrServer);
-            
+            UpdateResponse response;
+            if ( useExtractUpdateHandler )
+            {
+              response = contentStreamUpdateRequest.process( solrServer );
+            }
+            else
+            {
+              response = solrServer.add( currentSolrDoc );
+            }
+
             // Successful completion
             activityStart = new Long(fullStartTime);
             activityBytes = new Long(length);
@@ -1008,6 +971,174 @@
       }
     }
 
+    private SolrInputDocument buildSolrDocument( long length, InputStream is )
+      throws IOException
+    {
+      SolrInputDocument outputDoc = new SolrInputDocument();
+
+      // Write the id field
+      outputDoc.addField( idAttributeName, documentURI );
+      
+      if (contentAttributeName != null)
+      {
+        // Copy the content into a string.  This is a bad thing to do, but we have no choice given SolrJ architecture at this time.
+        // We enforce a size limit upstream.
+        Reader r = new InputStreamReader(is, Consts.UTF_8);
+        StringBuilder sb = new StringBuilder((int)length);
+        char[] buffer = new char[65536];
+        while (true)
+        {
+          int amt = r.read(buffer,0,buffer.length);
+          if (amt == -1)
+            break;
+          sb.append(buffer,0,amt);
+        }
+        outputDoc.addField( contentAttributeName, sb.toString() );
+      }
+      
+      // Write the rest of the attributes
+      if ( modifiedDateAttributeName != null )
+      {
+        Date date = document.getModifiedDate();
+        if ( date != null )
+        {
+          outputDoc.addField( modifiedDateAttributeName, DateParser.formatISO8601Date( date ) );
+        }
+      }
+      if ( createdDateAttributeName != null )
+      {
+        Date date = document.getCreatedDate();
+        if ( date != null )
+        {
+          outputDoc.addField( createdDateAttributeName, DateParser.formatISO8601Date( date ) );
+        }
+
+      }
+      if ( indexedDateAttributeName != null )
+      {
+        Date date = document.getIndexingDate();
+        if ( date != null )
+        {
+          outputDoc.addField( indexedDateAttributeName, DateParser.formatISO8601Date( date ) );
+        }
+      }
+      if ( fileNameAttributeName != null )
+      {
+        String fileName = document.getFileName();
+        if ( fileName != null )
+        {
+          outputDoc.addField( fileNameAttributeName, fileName );
+        }
+      }
+      if ( mimeTypeAttributeName != null )
+      {
+        String mimeType = document.getMimeType();
+        if ( mimeType != null )
+        {
+          outputDoc.addField( mimeTypeAttributeName, mimeType );
+        }
+      }
+
+      Iterator<String> typeIterator = aclsMap.keySet().iterator();
+      while (typeIterator.hasNext())
+      {
+        String aclType = typeIterator.next();
+        writeACLsInSolrDoc(outputDoc,aclType,aclsMap.get(aclType),denyAclsMap.get(aclType));
+      }
+
+      // Write the arguments
+      for ( String name : arguments.keySet() )
+      {
+        List<String> values = arguments.get( name );
+        outputDoc.addField( name, values );
+      }
+
+      // Write the metadata, each in a field by itself
+      buildSolrParamsFromMetadata( outputDoc );
+
+      return outputDoc;
+    }
+
+    private void buildExtractUpdateHandlerRequest( long length, InputStream is, String contentType,
+      String contentName,
+      ContentStreamUpdateRequest contentStreamUpdateRequest )
+      throws IOException
+    {
+      ModifiableSolrParams out = new ModifiableSolrParams();
+          
+      // Write the id field
+      writeField(out,LITERAL+idAttributeName,documentURI);
+      // Write the rest of the attributes
+      if (modifiedDateAttributeName != null)
+      {
+        Date date = document.getModifiedDate();
+        if (date != null)
+          // Write value
+          writeField(out,LITERAL+modifiedDateAttributeName,DateParser.formatISO8601Date(date));
+      }
+      if (createdDateAttributeName != null)
+      {
+        Date date = document.getCreatedDate();
+        if (date != null)
+          // Write value
+          writeField(out,LITERAL+createdDateAttributeName,DateParser.formatISO8601Date(date));
+      }
+      if (indexedDateAttributeName != null)
+      {
+        Date date = document.getIndexingDate();
+        if (date != null)
+          // Write value
+          writeField(out,LITERAL+indexedDateAttributeName,DateParser.formatISO8601Date(date));
+      }
+      if (fileNameAttributeName != null)
+      {
+        String fileName = document.getFileName();
+        if (fileName != null)
+          writeField(out,LITERAL+fileNameAttributeName,fileName);
+      }
+      if (mimeTypeAttributeName != null)
+      {
+        String mimeType = document.getMimeType();
+        if (mimeType != null)
+          writeField(out,LITERAL+mimeTypeAttributeName,mimeType);
+      }
+          
+      // Write the access token information
+      // Both maps have the same keys.
+      Iterator<String> typeIterator = aclsMap.keySet().iterator();
+      while (typeIterator.hasNext())
+      {
+        String aclType = typeIterator.next();
+        writeACLs(out,aclType,aclsMap.get(aclType),denyAclsMap.get(aclType));
+      }
+
+      // Write the arguments
+      for (String name : arguments.keySet())
+      {
+        List<String> values = arguments.get(name);
+        writeField(out,name,values);
+      }
+
+      // Write the metadata, each in a field by itself
+      buildSolrParamsFromMetadata(out);
+             
+      // These are unnecessary now in the case of non-solrcloud setups, because we overrode the SolrJ posting method to use multipart.
+      //writeField(out,LITERAL+"stream_size",String.valueOf(length));
+      //writeField(out,LITERAL+"stream_name",document.getFileName());
+          
+      // General hint for Tika
+      if (document.getFileName() != null)
+        writeField(out,"resource.name",document.getFileName());
+          
+      // Write the commitWithin parameter
+      if (commitWithin != null)
+        writeField(out,COMMITWITHIN_METADATA,commitWithin);
+
+      contentStreamUpdateRequest.setParams(out);
+          
+      contentStreamUpdateRequest.addContentStream(new RepositoryDocumentStream(is,length,contentType,contentName));
+    }
+
     /**
       * builds the solr parameter maps for the update request.
       * For each mapping expressed is applied the renaming for the metadata field name.
@@ -1044,6 +1175,34 @@
       }
     }
 
+    private void buildSolrParamsFromMetadata(SolrInputDocument outputDocument) throws IOException
+    {
+      if (this.keepAllMetadata)
+      {
+        Iterator<String> iter = document.getFields();
+        while (iter.hasNext())
+        {
+          String fieldName = iter.next();
+          List<String> mappings = sourceTargets.get(fieldName);
+          if (mappings != null)
+            for (String newFieldName : mappings)
+              applySingleMapping(fieldName, outputDocument, newFieldName);
+          else // the fields not mentioned in the mapping are added only if we have set the keep all metadata=true.
+            applySingleMapping(fieldName, outputDocument, fieldName);
+        }
+      }
+      else
+      {
+        //don't keep all the metadata but only the ones in sourceTargets
+        for (String originalFieldName : sourceTargets.keySet())
+        {
+          List<String> mapping = sourceTargets.get(originalFieldName);
+          for (String newFieldName : mapping)
+            applySingleMapping(originalFieldName, outputDocument, newFieldName);
+        }
+      }
+    }
+
     private void applySingleMapping(String originalFieldName, ModifiableSolrParams out, String newFieldName) throws IOException {
       if(newFieldName != null && !newFieldName.isEmpty()) {
         if (newFieldName.toLowerCase(Locale.ROOT).equals(idAttributeName.toLowerCase(Locale.ROOT))) {
@@ -1054,6 +1213,16 @@
       }
     }
 
+    private void applySingleMapping(String originalFieldName, SolrInputDocument outputDocument, String newFieldName) throws IOException {
+      if(newFieldName != null && !newFieldName.isEmpty()) {
+        if (newFieldName.toLowerCase(Locale.ROOT).equals(idAttributeName.toLowerCase(Locale.ROOT))) {
+          newFieldName = ID_METADATA;
+        }
+        String[] values = document.getFieldAsStrings(originalFieldName);
+        outputDocument.addField( newFieldName, values );
+      }
+    }
+
     public void finishUp()
       throws InterruptedException, SolrServerException, IOException
     {

diff --git a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java
index d1b35df..ba60164 100644
--- a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java
+++ b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConfig.java

@@ -111,6 +111,10 @@
   public static final String PARAM_INCLUDEDMIMETYPES = "Included mime types";
   /** Excluded mime types */
   public static final String PARAM_EXCLUDEDMIMETYPES="Excluded mime types";
+  /** Parameter describing the use of Extract Update handler */
+  public static final String PARAM_EXTRACTUPDATE = "Use extract update handler";
+  /** Optional content field (if not using extract update handler) */
+  public static final String PARAM_CONTENTFIELD = "Solr content field name";
   /** Node describing an argument */
   public static final String NODE_ARGUMENT = "argument";
   /** Attribute with the argument name */
@@ -132,4 +136,5 @@
    */
   public static final String NODE_KEEPMETADATA = "keepAllMetadata";
 
+
 }

diff --git a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
index 6ccb4d4..9dd49fe 100644
--- a/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java
+++ b/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/SolrConnector.java

@@ -25,13 +25,15 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
 
 import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputNotifyActivity;
 import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
-import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
 import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
 import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.core.interfaces.Specification;
 import org.apache.manifoldcf.core.interfaces.ConfigNode;
 import org.apache.manifoldcf.core.interfaces.ConfigParams;
 import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
@@ -43,6 +45,7 @@
 import org.apache.manifoldcf.core.interfaces.KeystoreManagerFactory;
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
 import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.interfaces.VersionContext;
 
 
 /** This is the output connector for SOLR.  Currently, no frills.
@@ -79,6 +82,17 @@
   /** Excluded mime types */
   protected Map<String,String> excludedMimeTypes = null;
   
+  // Attributes going into Solr
+  protected String idAttributeName = null;
+  protected String modifiedDateAttributeName = null;
+  protected String createdDateAttributeName = null;
+  protected String indexedDateAttributeName = null;
+  protected String fileNameAttributeName = null;
+  protected String mimeTypeAttributeName = null;
+  protected String contentAttributeName = null;
+  /** Use extractiing update handler? */
+  protected boolean useExtractUpdateHandler = true;
+  
   /** Whether or not to commit */
   protected boolean doCommits = false;
   
@@ -162,6 +176,14 @@
     includedMimeTypes = null;
     excludedMimeTypesString = null;
     excludedMimeTypes = null;
+    idAttributeName = null;
+    modifiedDateAttributeName = null;
+    createdDateAttributeName = null;
+    indexedDateAttributeName = null;
+    fileNameAttributeName = null;
+    mimeTypeAttributeName = null;
+    contentAttributeName = null;
+    useExtractUpdateHandler = true;
     super.disconnect();
   }
 
@@ -183,30 +205,42 @@
       if (statusPath == null || statusPath.length() == 0)
         statusPath = "";
 
-      String idAttributeName = params.getParameter(SolrConfig.PARAM_IDFIELD);
+      idAttributeName = params.getParameter(SolrConfig.PARAM_IDFIELD);
       if (idAttributeName == null || idAttributeName.length() == 0)
         idAttributeName = "id";
 
-      String modifiedDateAttributeName = params.getParameter(SolrConfig.PARAM_MODIFIEDDATEFIELD);
+      modifiedDateAttributeName = params.getParameter(SolrConfig.PARAM_MODIFIEDDATEFIELD);
       if (modifiedDateAttributeName == null || modifiedDateAttributeName.length() == 0)
         modifiedDateAttributeName = null;
 
-      String createdDateAttributeName = params.getParameter(SolrConfig.PARAM_CREATEDDATEFIELD);
+      createdDateAttributeName = params.getParameter(SolrConfig.PARAM_CREATEDDATEFIELD);
       if (createdDateAttributeName == null || createdDateAttributeName.length() == 0)
         createdDateAttributeName = null;
   
-      String indexedDateAttributeName = params.getParameter(SolrConfig.PARAM_INDEXEDDATEFIELD);
+      indexedDateAttributeName = params.getParameter(SolrConfig.PARAM_INDEXEDDATEFIELD);
       if (indexedDateAttributeName == null || indexedDateAttributeName.length() == 0)
         indexedDateAttributeName = null;
 
-      String fileNameAttributeName = params.getParameter(SolrConfig.PARAM_FILENAMEFIELD);
+      fileNameAttributeName = params.getParameter(SolrConfig.PARAM_FILENAMEFIELD);
       if (fileNameAttributeName == null || fileNameAttributeName.length() == 0)
         fileNameAttributeName = null;
 
-      String mimeTypeAttributeName = params.getParameter(SolrConfig.PARAM_MIMETYPEFIELD);
+      mimeTypeAttributeName = params.getParameter(SolrConfig.PARAM_MIMETYPEFIELD);
       if (mimeTypeAttributeName == null || mimeTypeAttributeName.length() == 0)
         mimeTypeAttributeName = null;
 
+      contentAttributeName = params.getParameter(SolrConfig.PARAM_CONTENTFIELD);
+      if (contentAttributeName == null || contentAttributeName.length() == 0)
+        contentAttributeName = null;
+      
+      String useExtractUpdateHandlerValue = params.getParameter(SolrConfig.PARAM_EXTRACTUPDATE);
+      if (useExtractUpdateHandlerValue == null || useExtractUpdateHandlerValue.length() == 0)
+        useExtractUpdateHandler = true;
+      else
+        useExtractUpdateHandler = !useExtractUpdateHandlerValue.equals("false");
+      if (contentAttributeName == null && !useExtractUpdateHandler)
+        throw new ManifoldCFException("Content attribute name required for non-extract-update indexing");
+
       String commits = params.getParameter(SolrConfig.PARAM_COMMITS);
       if (commits == null || commits.length() == 0)
         commits = "true";
@@ -222,6 +256,8 @@
         maxDocumentLength = null;
       else
         maxDocumentLength = new Long(docMax);
+      if (maxDocumentLength == null && !useExtractUpdateHandler)
+        throw new ManifoldCFException("Maximum document length required for non-extract-update indexing");
       
       includedMimeTypesString = params.getParameter(SolrConfig.PARAM_INCLUDEDMIMETYPES);
       if (includedMimeTypesString == null || includedMimeTypesString.length() == 0)
@@ -313,8 +349,8 @@
             updatePath,removePath,statusPath,realm,userID,password,
             allowAttributeName,denyAttributeName,idAttributeName,
             modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
-            fileNameAttributeName,mimeTypeAttributeName,
-            keystoreManager,maxDocumentLength,commitWithin);
+            fileNameAttributeName,mimeTypeAttributeName,contentAttributeName,
+            keystoreManager,maxDocumentLength,commitWithin,useExtractUpdateHandler);
           
         }
         catch (NumberFormatException e)
@@ -368,8 +404,8 @@
             updatePath,removePath,statusPath,
             allowAttributeName,denyAttributeName,idAttributeName,
             modifiedDateAttributeName,createdDateAttributeName,indexedDateAttributeName,
-            fileNameAttributeName,mimeTypeAttributeName,
-            maxDocumentLength,commitWithin);
+            fileNameAttributeName,mimeTypeAttributeName,contentAttributeName,
+            maxDocumentLength,commitWithin,useExtractUpdateHandler);
           
         }
         catch (NumberFormatException e)
@@ -456,156 +492,21 @@
   * the document will not need to be sent again to the output data store.
   */
   @Override
-  public String getOutputDescription(OutputSpecification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();
-    
-    StringBuilder sb = new StringBuilder();
+    SpecPacker sp = new SpecPacker(spec);
+    return new VersionContext(sp.toPackedString(),params,spec);
+  }
 
-    // All the arguments need to go into this string, since they affect ingestion.
-    Map args = new HashMap();
-    int i = 0;
-    while (i < params.getChildCount())
-    {
-      ConfigNode node = params.getChild(i++);
-      if (node.getType().equals(SolrConfig.NODE_ARGUMENT))
-      {
-        String attrName = node.getAttributeValue(SolrConfig.ATTRIBUTE_NAME);
-        ArrayList list = (ArrayList)args.get(attrName);
-        if (list == null)
-        {
-          list = new ArrayList();
-          args.put(attrName,list);
-        }
-        list.add(node.getAttributeValue(SolrConfig.ATTRIBUTE_VALUE));
-      }
-    }
-    
-    String[] sortArray = new String[args.size()];
-    Iterator iter = args.keySet().iterator();
-    i = 0;
-    while (iter.hasNext())
-    {
-      sortArray[i++] = (String)iter.next();
-    }
-    
-    // Always use sorted order, because we need this to be comparable.
-    java.util.Arrays.sort(sortArray);
-    
-    String[] fixedList = new String[2];
-    ArrayList nameValues = new ArrayList();
-    i = 0;
-    while (i < sortArray.length)
-    {
-      String name = sortArray[i++];
-      ArrayList values = (ArrayList)args.get(name);
-      java.util.Collections.sort(values);
-      int j = 0;
-      while (j < values.size())
-      {
-        String value = (String)values.get(j++);
-        fixedList[0] = name;
-        fixedList[1] = value;
-        StringBuilder pairBuffer = new StringBuilder();
-        packFixedList(pairBuffer,fixedList,'=');
-        nameValues.add(pairBuffer.toString());
-      }
-    }
-    
-    packList(sb,nameValues,'+');
-    
-    // Do the source/target pairs
-    i = 0;
-    Map<String, List<String>> sourceTargets = new HashMap<String, List<String>>();
-    boolean keepAllMetadata = true;
-    while (i < spec.getChildCount()) {
-      SpecificationNode sn = spec.getChild(i++);
-      
-      if(sn.getType().equals(SolrConfig.NODE_KEEPMETADATA)) {
-        String value = sn.getAttributeValue(SolrConfig.ATTRIBUTE_VALUE);
-        keepAllMetadata = Boolean.parseBoolean(value);
-      } else if (sn.getType().equals(SolrConfig.NODE_FIELDMAP)) {
-        String source = sn.getAttributeValue(SolrConfig.ATTRIBUTE_SOURCE);
-        String target = sn.getAttributeValue(SolrConfig.ATTRIBUTE_TARGET);
-        
-        if (target == null) {
-          target = "";
-        }
-        List<String> list = (List<String>)sourceTargets.get(source);
-        if (list == null) {
-          list = new ArrayList<String>();
-          sourceTargets.put(source, list);
-        }
-        list.add(target);
-      }
-    }
-    
-    sortArray = new String[sourceTargets.size()];
-    iter = sourceTargets.keySet().iterator();
-    i = 0;
-    while (iter.hasNext()) {
-      sortArray[i++] = (String)iter.next();
-    }
-    java.util.Arrays.sort(sortArray);
-    
-    ArrayList sourceTargetsList = new ArrayList();
-    i = 0;
-    while (i < sortArray.length) {
-      String source = sortArray[i++];
-      List<String> values = (List<String>)sourceTargets.get(source);
-      java.util.Collections.sort(values);
-      int j = 0;
-      while (j < values.size()) {
-        String target = (String)values.get(j++);
-        fixedList[0] = source;
-        fixedList[1] = target;
-        StringBuilder pairBuffer = new StringBuilder();
-        packFixedList(pairBuffer,fixedList,'=');
-        sourceTargetsList.add(pairBuffer.toString());
-      }
-    }
-    
-    packList(sb,sourceTargetsList,'+');
-
-    // Keep all metadata flag
-    if (keepAllMetadata)
-      sb.append('+');
-    else
-      sb.append('-');
-
-    // Here, append things which we have no intention of unpacking.  This includes stuff that comes from
-    // the configuration information, for instance.
-    
-    if (maxDocumentLength != null || includedMimeTypesString != null || excludedMimeTypesString != null)
-    {
-      // Length limitation.  We pack this because when it is changed we want to be sure we get any previously excluded documents.
-      if (maxDocumentLength != null)
-      {
-        sb.append('+');
-        pack(sb,maxDocumentLength.toString(),'+');
-      }
-      else
-        sb.append('-');
-      // Included mime types
-      if (includedMimeTypesString != null)
-      {
-        sb.append('+');
-        pack(sb,includedMimeTypesString,'+');
-      }
-      else
-        sb.append('-');
-      // Excluded mime types
-      if (excludedMimeTypesString != null)
-      {
-        sb.append('+');
-        pack(sb,excludedMimeTypesString,'+');
-      }
-      else
-        sb.append('-');
-    }
-    
-    return sb.toString();
+  private final static Set<String> acceptableMimeTypes = new HashSet<String>();
+  static
+  {
+    acceptableMimeTypes.add("text/plain;charset=utf-8");
+    acceptableMimeTypes.add("text/plain;charset=ascii");
+    acceptableMimeTypes.add("text/plain;charset=us-ascii");
+    acceptableMimeTypes.add("text/plain");
   }
 
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
@@ -614,15 +515,20 @@
   *@param mimeType is the mime type of the document.
   *@return true if the mime type is indexable by this connector.
   */
+  @Override
   public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
     throws ManifoldCFException, ServiceInterruption
   {
     getSession();
-    if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
-      return false;
-    if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
-      return false;
-    return super.checkMimeTypeIndexable(outputDescription,mimeType);
+    if (useExtractUpdateHandler)
+    {
+      if (includedMimeTypes != null && includedMimeTypes.get(mimeType) == null)
+        return false;
+      if (excludedMimeTypes != null && excludedMimeTypes.get(mimeType) != null)
+        return false;
+      return super.checkMimeTypeIndexable(outputDescription,mimeType);
+    }
+    return acceptableMimeTypes.contains(mimeType.toLowerCase(Locale.ROOT));
   }
 
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
@@ -631,6 +537,7 @@
   *@param length is the length of the document.
   *@return true if the file is indexable.
   */
+  @Override
   public boolean checkLengthIndexable(String outputDescription, long length)
     throws ManifoldCFException, ServiceInterruption
   {
@@ -658,59 +565,13 @@
   public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
-    // Build the argument map we'll send.
-    Map args = new HashMap();
-    Map<String, List<String>> sourceTargets = new HashMap<String, List<String>>();
-    int index = 0;
-    ArrayList nameValues = new ArrayList();
-    index = unpackList(nameValues,outputDescription,index,'+');
-    ArrayList sts = new ArrayList();
-    index = unpackList(sts,outputDescription,index,'+');
-    // extract keep all metadata Flag
-    boolean keepAllMetadata = true;
-    if (index < outputDescription.length())
-    {
-      keepAllMetadata = (outputDescription.charAt(index++) == '+');
-    }
-    String[] fixedBuffer = new String[2];
-    
-    // Do the name/value pairs
-    int i = 0;
-    while (i < nameValues.size())
-    {
-      String x = (String)nameValues.get(i++);
-      unpackFixedList(fixedBuffer,x,0,'=');
-      String attrName = fixedBuffer[0];
-      ArrayList list = (ArrayList)args.get(attrName);
-      if (list == null)
-      {
-        list = new ArrayList();
-        args.put(attrName,list);
-      }
-      list.add(fixedBuffer[1]);
-    }
-    
-    // Do the source/target pairs
-    i = 0;
-    while (i < sts.size()) {
-      String x = (String)sts.get(i++);
-      unpackFixedList(fixedBuffer,x,0,'=');
-      String source = fixedBuffer[0];
-      String target = fixedBuffer[1];
-      List<String> list = (List<String>)sourceTargets.get(source);
-      if (list == null) {
-        list = new ArrayList<String>();
-        sourceTargets.put(source, list);
-      }
-      list.add(target);
-    }
-
+    SpecPacker sp = new SpecPacker(outputDescription);
 
     // Establish a session
     getSession();
 
     // Now, go off and call the ingest API.
-    if (poster.indexPost(documentURI,document,args,sourceTargets,keepAllMetadata,authorityNameString,activities))
+    if (poster.indexPost(documentURI,document,sp.getArgs(),sp.getMappings(),sp.keepAllMetadata(),authorityNameString,activities))
       return DOCUMENTSTATUS_ACCEPTED;
     return DOCUMENTSTATUS_REJECTED;
   }
@@ -966,6 +827,20 @@
 "    editconnection.maxdocumentlength.focus();\n"+
 "    return false;\n"+
 "  }\n"+
+"  if (editconnection.maxdocumentlength.value == \"\" && ((editconnection.extractupdatecheckbox.value == \"true\" && editconnection.extractupdate.checked == false) || (editconnection.extractupdatecheckbox.value != \"true\" && editconnection.extractupdate.value != \"true\")))\n"+
+"  {\n"+
+"    alert(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.MaximumDocumentLengthRequiredUnlessExtractingUpdateHandler")+"\");\n"+
+"    SelectTab(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.Documents")+"\");\n"+
+"    editconnection.maxdocumentlength.focus();\n"+
+"    return false;\n"+
+"  }\n"+
+"  if (editconnection.contentfield.value == \"\" && ((editconnection.extractupdatecheckbox.value == \"true\" && editconnection.extractupdate.checked == false) || (editconnection.extractupdatecheckbox.value != \"true\" && editconnection.extractupdate.value != \"true\")))\n"+
+"  {\n"+
+"    alert(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.ContentFieldNameRequiredUnlessExtractingUpdateHandler")+"\");\n"+
+"    SelectTab(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.Schema")+"\");\n"+
+"    editconnection.contentfield.focus();\n"+
+"    return false;\n"+
+"  }\n"+
 "  if (editconnection.commitwithin.value != \"\" && !isInteger(editconnection.commitwithin.value))\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.CommitWithinValueMustBeAnInteger")+"\");\n"+
@@ -1147,7 +1022,15 @@
     String mimeTypeField = parameters.getParameter(SolrConfig.PARAM_MIMETYPEFIELD);
     if (mimeTypeField == null)
       mimeTypeField = "";
+
+    String contentField = parameters.getParameter(SolrConfig.PARAM_CONTENTFIELD);
+    if (contentField == null)
+      contentField = "";
     
+    String useExtractUpdate = parameters.getParameter(SolrConfig.PARAM_EXTRACTUPDATE);
+    if (useExtractUpdate == null || useExtractUpdate.length() == 0)
+      useExtractUpdate = "true";
+
     String realm = parameters.getParameter(SolrConfig.PARAM_REALM);
     if (realm == null)
       realm = "";
@@ -1602,6 +1485,33 @@
 "      <input name=\"mimetypefield\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(mimeTypeField)+"\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"SolrConnector.UseExtractUpdateHandler") + "</nobr></td>\n"+
+"    <td class=\"value\">\n"+
+"      <input name=\"extractupdatecheckbox\" type=\"hidden\" value=\"true\"/>\n"+
+"      <input name=\"extractupdatepresent\" type=\"hidden\" value=\"true\"/>\n"
+      );
+      if (!useExtractUpdate.equals("false"))
+      {
+        out.print(
+"      <input name=\"extractupdate\" type=\"checkbox\" value=\"true\" checked=\"true\"/>\n"
+        );
+      }
+      else
+      {
+        out.print(
+"      <input name=\"extractupdate\" type=\"checkbox\" value=\"true\"/>\n"
+        );
+      }
+      out.print(
+"    </td>\n"+
+"  </tr>\n"+
+"  <tr>\n"+
+"    <td class=\"description\"><nobr>" + Messages.getBodyString(locale,"SolrConnector.ContentFieldName") + "</nobr></td>\n"+
+"    <td class=\"value\">\n"+
+"      <input name=\"contentfield\" type=\"text\" size=\"32\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(contentField)+"\"/>\n"+
+"    </td>\n"+
+"  </tr>\n"+
 "</table>\n"
       );
     }
@@ -1613,7 +1523,11 @@
 "<input type=\"hidden\" name=\"createddatefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(createdDateField)+"\"/>\n"+
 "<input type=\"hidden\" name=\"indexeddatefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(indexedDateField)+"\"/>\n"+
 "<input type=\"hidden\" name=\"filenamefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(fileNameField)+"\"/>\n"+
-"<input type=\"hidden\" name=\"mimetypefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(mimeTypeField)+"\"/>\n"
+"<input type=\"hidden\" name=\"mimetypefield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(mimeTypeField)+"\"/>\n"+
+"<input type=\"hidden\" name=\"contentfield\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(contentField)+"\"/>\n"+
+"<input name=\"extractupdatecheckbox\" type=\"hidden\" value=\"false\"/>\n"+
+"<input type=\"hidden\" name=\"extractupdate\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(useExtractUpdate)+"\"/>\n"+
+"<input name=\"extractupdatepresent\" type=\"hidden\" value=\"true\"/>\n"
       );
     }
     
@@ -1913,6 +1827,19 @@
     if (mimeTypeField != null)
       parameters.setParameter(SolrConfig.PARAM_MIMETYPEFIELD,mimeTypeField);
 
+    String contentField = variableContext.getParameter("contentfield");
+    if (contentField != null)
+      parameters.setParameter(SolrConfig.PARAM_CONTENTFIELD,contentField);
+
+    String extractUpdatePresent = variableContext.getParameter("extractupdatepresent");
+    if (extractUpdatePresent != null)
+    {
+      String extractUpdate = variableContext.getParameter("extractupdate");
+      if (extractUpdate == null || extractUpdate.length() == 0)
+        extractUpdate = "false";
+      parameters.setParameter(SolrConfig.PARAM_EXTRACTUPDATE,extractUpdate);
+    }
+
     String realm = variableContext.getParameter("realm");
     if (realm != null)
       parameters.setParameter(SolrConfig.PARAM_REALM,realm);
@@ -2250,49 +2177,73 @@
     );
   }
   
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
   /** Output the specification header section.
-  * This method is called in the head section of a job page which has selected an output connection of the current type.  Its purpose is to add the required tabs
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
   * to the list, and to output any javascript methods that might be needed by the job editing HTML.
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
   */
   @Override
-  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os, List<String> tabsArray)
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
     throws ManifoldCFException, IOException
   {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
     tabsArray.add(Messages.getString(locale,"SolrConnector.SolrFieldMapping"));
     out.print(
 "<script type=\"text/javascript\">\n"+
 "<!--\n"+
-"function checkOutputSpecification()\n"+
+"function "+seqPrefix+"checkSpecification()\n"+
 "{\n"+
 "  return true;\n"+
 "}\n"+
 "\n"+
-"function addFieldMapping()\n"+
+"function "+seqPrefix+"addFieldMapping()\n"+
 "{\n"+
-"  if (editjob.solr_fieldmapping_source.value == \"\")\n"+
+"  if (editjob."+seqPrefix+"solr_fieldmapping_source.value == \"\")\n"+
 "  {\n"+
 "    alert(\""+Messages.getBodyJavascriptString(locale,"SolrConnector.FieldMapMustHaveNonNullSource")+"\");\n"+
-"    editjob.solr_fieldmapping_source.focus();\n"+
+"    editjob."+seqPrefix+"solr_fieldmapping_source.focus();\n"+
 "    return;\n"+
 "  }\n"+
-"  editjob.solr_fieldmapping_op.value=\"Add\";\n"+
-"  postFormSetAnchor(\"solr_fieldmapping\");\n"+
+"  editjob."+seqPrefix+"solr_fieldmapping_op.value=\"Add\";\n"+
+"  postFormSetAnchor(\""+seqPrefix+"+solr_fieldmapping\");\n"+
 "}\n"+
 "\n"+
-"function deleteFieldMapping(i)\n"+
+"function "+seqPrefix+"deleteFieldMapping(i)\n"+
 "{\n"+
 "  // Set the operation\n"+
-"  eval(\"editjob.solr_fieldmapping_\"+i+\"_op.value=\\\"Delete\\\"\");\n"+
+"  eval(\"editjob."+seqPrefix+"solr_fieldmapping_\"+i+\"_op.value=\\\"Delete\\\"\");\n"+
 "  // Submit\n"+
-"  if (editjob.solr_fieldmapping_count.value==i)\n"+
-"    postFormSetAnchor(\"solr_fieldmapping\");\n"+
+"  if (editjob."+seqPrefix+"solr_fieldmapping_count.value==i)\n"+
+"    postFormSetAnchor(\""+seqPrefix+"solr_fieldmapping\");\n"+
 "  else\n"+
-"    postFormSetAnchor(\"solr_fieldmapping_\"+i)\n"+
+"    postFormSetAnchor(\""+seqPrefix+"solr_fieldmapping_\"+i)\n"+
 "  // Undo, so we won't get two deletes next time\n"+
-"  eval(\"editjob.solr_fieldmapping_\"+i+\"_op.value=\\\"Continue\\\"\");\n"+
+"  eval(\"editjob."+seqPrefix+"solr_fieldmapping_\"+i+\"_op.value=\\\"Continue\\\"\");\n"+
 "}\n"+
 "\n"+
 "//-->\n"+
@@ -2301,21 +2252,27 @@
   }
   
   /** Output the specification body section.
-  * This method is called in the body section of a job page which has selected an output connection of the current type.  Its purpose is to present the required form elements for editing.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
   * form is "editjob".
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
   *@param tabName is the current tab name.
   */
   @Override
-  public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
     throws ManifoldCFException, IOException
   {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+    
     int i = 0;
     
     // Field Mapping tab
-    if (tabName.equals(Messages.getString(locale,"SolrConnector.SolrFieldMapping")))
+    if (tabName.equals(Messages.getString(locale,"SolrConnector.SolrFieldMapping")) && connectionSequenceNumber == actualSequenceNumber)
     {
       out.print(
 "<table class=\"displaytable\">\n"+
@@ -2349,12 +2306,12 @@
             targetDisplay = "(remove)";
           }
           // It's prefix will be...
-          String prefix = "solr_fieldmapping_" + Integer.toString(fieldCounter);
+          String prefix = seqPrefix+"solr_fieldmapping_" + Integer.toString(fieldCounter);
           out.print(
 "        <tr class=\""+(((fieldCounter % 2)==0)?"evenformrow":"oddformrow")+"\">\n"+
 "          <td class=\"formcolumncell\">\n"+
 "            <a name=\""+prefix+"\">\n"+
-"              <input type=\"button\" value=\"Delete\" alt=\""+Messages.getAttributeString(locale,"SolrConnector.DeleteFieldMapping")+Integer.toString(fieldCounter+1)+"\" onclick='javascript:deleteFieldMapping("+Integer.toString(fieldCounter)+");'/>\n"+
+"              <input type=\"button\" value=\"Delete\" alt=\""+Messages.getAttributeString(locale,"SolrConnector.DeleteFieldMapping")+Integer.toString(fieldCounter+1)+"\" onclick='javascript:"+seqPrefix+"deleteFieldMapping("+Integer.toString(fieldCounter)+");'/>\n"+
 "              <input type=\"hidden\" name=\""+prefix+"_op\" value=\"Continue\"/>\n"+
 "              <input type=\"hidden\" name=\""+prefix+"_source\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(source)+"\"/>\n"+
 "              <input type=\"hidden\" name=\""+prefix+"_target\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(target)+"\"/>\n"+
@@ -2392,17 +2349,17 @@
 "        <tr class=\"formrow\"><td class=\"formseparator\" colspan=\"3\"><hr/></td></tr>\n"+
 "        <tr class=\"formrow\">\n"+
 "          <td class=\"formcolumncell\">\n"+
-"            <a name=\"solr_fieldmapping\">\n"+
-"              <input type=\"button\" value=\"" + Messages.getAttributeString(locale,"SolrConnector.Add") + "\" alt=\"" + Messages.getAttributeString(locale,"SolrConnector.AddFieldMapping") + "\" onclick=\"javascript:addFieldMapping();\"/>\n"+
+"            <a name=\""+seqPrefix+"solr_fieldmapping\">\n"+
+"              <input type=\"button\" value=\"" + Messages.getAttributeString(locale,"SolrConnector.Add") + "\" alt=\"" + Messages.getAttributeString(locale,"SolrConnector.AddFieldMapping") + "\" onclick=\"javascript:"+seqPrefix+"addFieldMapping();\"/>\n"+
 "            </a>\n"+
-"            <input type=\"hidden\" name=\"solr_fieldmapping_count\" value=\""+fieldCounter+"\"/>\n"+
-"            <input type=\"hidden\" name=\"solr_fieldmapping_op\" value=\"Continue\"/>\n"+
+"            <input type=\"hidden\" name=\""+seqPrefix+"solr_fieldmapping_count\" value=\""+fieldCounter+"\"/>\n"+
+"            <input type=\"hidden\" name=\""+seqPrefix+"solr_fieldmapping_op\" value=\"Continue\"/>\n"+
 "          </td>\n"+
 "          <td class=\"formcolumncell\">\n"+
-"            <nobr><input type=\"text\" size=\"15\" name=\"solr_fieldmapping_source\" value=\"\"/></nobr>\n"+
+"            <nobr><input type=\"text\" size=\"15\" name=\""+seqPrefix+"solr_fieldmapping_source\" value=\"\"/></nobr>\n"+
 "          </td>\n"+
 "          <td class=\"formcolumncell\">\n"+
-"            <nobr><input type=\"text\" size=\"15\" name=\"solr_fieldmapping_target\" value=\"\"/></nobr>\n"+
+"            <nobr><input type=\"text\" size=\"15\" name=\""+seqPrefix+"solr_fieldmapping_target\" value=\"\"/></nobr>\n"+
 "          </td>\n"+
 "        </tr>\n"+
 "      </table>\n"+
@@ -2412,7 +2369,7 @@
 "  <tr>\n"+
 "    <td class=\"description\"><nobr>"+Messages.getBodyString(locale,"SolrConnector.KeepAllMetadata")+"</nobr></td>\n"+
 "    <td class=\"value\">\n"+
-"       <input type=\"checkbox\""+keepMetadataValue+" name=\"solr_keepallmetadata\" value=\"true\"/>\n"+
+"       <input type=\"checkbox\""+keepMetadataValue+" name=\""+seqPrefix+"solr_keepallmetadata\" value=\"true\"/>\n"+
 "    </td>\n"+
 "  </tr>\n"+
 "</table>\n"
@@ -2432,7 +2389,7 @@
           if (target == null)
             target = "";
         // It's prefix will be...
-          String prefix = "solr_fieldmapping_" + Integer.toString(fieldCounter);
+          String prefix = seqPrefix+"solr_fieldmapping_" + Integer.toString(fieldCounter);
           out.print(
 "<input type=\"hidden\" name=\""+prefix+"_source\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(source)+"\"/>\n"+
 "<input type=\"hidden\" name=\""+prefix+"_target\" value=\""+org.apache.manifoldcf.ui.util.Encoder.attributeEscape(target)+"\"/>\n"
@@ -2445,10 +2402,10 @@
         }
       }
       out.print(
-"<input type=\"hidden\" name=\"solr_keepallmetadata\" value=\""+keepMetadataValue+"\"/>\n"
+"<input type=\"hidden\" name=\""+seqPrefix+"solr_keepallmetadata\" value=\""+keepMetadataValue+"\"/>\n"
       );
       out.print(
-"<input type=\"hidden\" name=\"solr_fieldmapping_count\" value=\""+Integer.toString(fieldCounter)+"\"/>\n"
+"<input type=\"hidden\" name=\""+seqPrefix+"solr_fieldmapping_count\" value=\""+Integer.toString(fieldCounter)+"\"/>\n"
       );
     }
 
@@ -2456,17 +2413,21 @@
   
   /** Process a specification post.
   * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
-  * posted.  Its purpose is to gather form information and modify the output specification accordingly.
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
   * The name of the posted form is "editjob".
   *@param variableContext contains the post data, including binary file-upload information.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
   *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
   */
   @Override
-  public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os)
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
     throws ManifoldCFException
   {
-    String x = variableContext.getParameter("solr_fieldmapping_count");
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+    String x = variableContext.getParameter(seqPrefix+"solr_fieldmapping_count");
     if (x != null && x.length() > 0)
     {
       // About to gather the fieldmapping nodes, so get rid of the old ones.
@@ -2483,7 +2444,7 @@
       i = 0;
       while (i < count)
       {
-        String prefix = "solr_fieldmapping_"+Integer.toString(i);
+        String prefix = seqPrefix+"solr_fieldmapping_"+Integer.toString(i);
         String op = variableContext.getParameter(prefix+"_op");
         if (op == null || !op.equals("Delete"))
         {
@@ -2500,11 +2461,11 @@
         i++;
       }
       
-      String addop = variableContext.getParameter("solr_fieldmapping_op");
+      String addop = variableContext.getParameter(seqPrefix+"solr_fieldmapping_op");
       if (addop != null && addop.equals("Add"))
       {
-        String source = variableContext.getParameter("solr_fieldmapping_source");
-        String target = variableContext.getParameter("solr_fieldmapping_target");
+        String source = variableContext.getParameter(seqPrefix+"solr_fieldmapping_source");
+        String target = variableContext.getParameter(seqPrefix+"solr_fieldmapping_target");
         if (target == null)
           target = "";
         SpecificationNode node = new SpecificationNode(SolrConfig.NODE_FIELDMAP);
@@ -2515,7 +2476,7 @@
       
       // Gather the keep all metadata parameter to be the last one
       SpecificationNode node = new SpecificationNode(SolrConfig.NODE_KEEPMETADATA);
-      String keepAll = variableContext.getParameter("solr_keepallmetadata");
+      String keepAll = variableContext.getParameter(seqPrefix+"solr_keepallmetadata");
       if (keepAll != null) {
         node.setAttribute(SolrConfig.ATTRIBUTE_VALUE, keepAll);
       }
@@ -2530,13 +2491,16 @@
   }
   
   /** View specification.
-  * This method is called in the body section of a job's view page.  Its purpose is to present the output specification information to the user.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
   * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
   *@param out is the output to which any HTML should be sent.
-  *@param os is the current output specification for this job.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
   */
   @Override
-  public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
     throws ManifoldCFException, IOException
   {
     // Prep for field mappings
@@ -2609,4 +2573,285 @@
 
   }
 
+  /** This class handles Solr connector version string packing/unpacking/interpretation.
+  */
+  protected class SpecPacker {
+    
+    /** Arguments, from configuration */
+    private final Map<String,List<String>> args = new HashMap<String,List<String>>();
+    /** Source/targets from specification */
+    private final Map<String, List<String>> sourceTargets = new HashMap<String, List<String>>();
+    /** Keep all metadata flag, from specification */
+    private final boolean keepAllMetadata;
+    
+    public SpecPacker(Specification spec) {
+
+      // Process arguments
+      for (int i = 0; i < params.getChildCount(); i++)
+      {
+        ConfigNode node = params.getChild(i);
+        if (node.getType().equals(SolrConfig.NODE_ARGUMENT))
+        {
+          String attrName = node.getAttributeValue(SolrConfig.ATTRIBUTE_NAME);
+          List<String> list = args.get(attrName);
+          if (list == null)
+          {
+            list = new ArrayList<String>();
+            args.put(attrName,list);
+          }
+          list.add(node.getAttributeValue(SolrConfig.ATTRIBUTE_VALUE));
+        }
+      }
+    
+      // Do the source/target pairs
+      boolean keepAllMetadata = true;
+      for (int i = 0; i < spec.getChildCount(); i++)
+      {
+        SpecificationNode sn = spec.getChild(i);
+        
+        if(sn.getType().equals(SolrConfig.NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(SolrConfig.ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(SolrConfig.NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(SolrConfig.ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(SolrConfig.ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          List<String> list = sourceTargets.get(source);
+          if (list == null) {
+            list = new ArrayList<String>();
+            sourceTargets.put(source, list);
+          }
+          list.add(target);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+    
+    }
+    
+    /** Packed string parser.
+    * This method unpacks a packed version string, and makes the formerly packed data available for use.
+    * Note that it is actually *not* a requirement for this method to do the unpacking; that can happen "on demand"
+    * for performance, if deemed helpful.
+    */
+    public SpecPacker(String packedString) {
+      // Build the argument map we'll send.
+      int index = 0;
+      List<String> nameValues = new ArrayList<String>();
+      index = unpackList(nameValues,packedString,index,'+');
+      List<String> sts = new ArrayList<String>();
+      index = unpackList(sts,packedString,index,'+');
+      // extract keep all metadata Flag
+      boolean keepAllMetadata = true;
+      if (index < packedString.length())
+      {
+        keepAllMetadata = (packedString.charAt(index++) == '+');
+      }
+      this.keepAllMetadata = keepAllMetadata;
+      
+      
+      String[] fixedBuffer = new String[2];
+      
+      // Do the name/value pairs
+      for (String x : nameValues)
+      {
+        unpackFixedList(fixedBuffer,x,0,'=');
+        String attrName = fixedBuffer[0];
+        List<String> list = args.get(attrName);
+        if (list == null)
+        {
+          list = new ArrayList<String>();
+          args.put(attrName,list);
+        }
+        list.add(fixedBuffer[1]);
+      }
+      
+      // Do the source/target pairs
+      for (String x : sts)
+      {
+        unpackFixedList(fixedBuffer,x,0,'=');
+        String source = fixedBuffer[0];
+        String target = fixedBuffer[1];
+        List<String> list = sourceTargets.get(source);
+        if (list == null) {
+          list = new ArrayList<String>();
+          sourceTargets.put(source, list);
+        }
+        list.add(target);
+      }
+
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      String[] sortArray = new String[args.size()];
+      Iterator<String> iter = args.keySet().iterator();
+      int i = 0;
+      while (iter.hasNext())
+      {
+        sortArray[i++] = iter.next();
+      }
+      
+      // Always use sorted order, because we need this to be comparable.
+      java.util.Arrays.sort(sortArray);
+      
+      String[] fixedList = new String[2];
+      List<String> nameValues = new ArrayList<String>();
+      for (int k = 0; k < sortArray.length; k++)
+      {
+        String name = sortArray[k];
+        List<String> values = args.get(name);
+        java.util.Collections.sort(values);
+        for (String value : values)
+        {
+          fixedList[0] = name;
+          fixedList[1] = value;
+          StringBuilder pairBuffer = new StringBuilder();
+          packFixedList(pairBuffer,fixedList,'=');
+          nameValues.add(pairBuffer.toString());
+        }
+      }
+      
+      packList(sb,nameValues,'+');
+      
+      // Do the source/target pairs
+      sortArray = new String[sourceTargets.size()];
+      iter = sourceTargets.keySet().iterator();
+      i = 0;
+      while (iter.hasNext()) {
+        sortArray[i++] = iter.next();
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> sourceTargetsList = new ArrayList<String>();
+      for (int k = 0; k < sortArray.length; k++)
+      {
+        String source = sortArray[k];
+        List<String> values = sourceTargets.get(source);
+        java.util.Collections.sort(values);
+        for (String target : values)
+        {
+          fixedList[0] = source;
+          fixedList[1] = target;
+          StringBuilder pairBuffer = new StringBuilder();
+          packFixedList(pairBuffer,fixedList,'=');
+          sourceTargetsList.add(pairBuffer.toString());
+        }
+      }
+      
+      packList(sb,sourceTargetsList,'+');
+
+      // Keep all metadata flag
+      if (keepAllMetadata)
+        sb.append('+');
+      else
+        sb.append('-');
+
+      // Here, append things which we have no intention of unpacking.  This includes stuff that comes from
+      // the configuration information, for instance.
+
+      if (idAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,idAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (modifiedDateAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,modifiedDateAttributeName,'+');
+      }
+      else
+        sb.append('-');
+      
+      if (createdDateAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,createdDateAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (indexedDateAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,indexedDateAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (fileNameAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,fileNameAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (mimeTypeAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,mimeTypeAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (contentAttributeName != null)
+      {
+          sb.append('+');
+          pack(sb,contentAttributeName,'+');
+      }
+      else
+        sb.append('-');
+
+      if (useExtractUpdateHandler)
+        sb.append('+');
+      else
+        sb.append('-');
+
+      // Length limitation.  We pack this because when it is changed we want to be sure we get any previously excluded documents.
+      if (maxDocumentLength != null)
+      {
+        sb.append('+');
+        pack(sb,maxDocumentLength.toString(),'+');
+      }
+      else
+        sb.append('-');
+      // Included mime types
+      if (includedMimeTypesString != null)
+      {
+        sb.append('+');
+        pack(sb,includedMimeTypesString,'+');
+      }
+      else
+        sb.append('-');
+      // Excluded mime types
+      if (excludedMimeTypesString != null)
+      {
+        sb.append('+');
+        pack(sb,excludedMimeTypesString,'+');
+      }
+      else
+        sb.append('-');
+
+      return sb.toString();
+    }
+    
+    public Map<String,List<String>> getArgs() {
+      return args;
+    }
+    
+    public Map<String,List<String>> getMappings() {
+      return sourceTargets;
+    }
+    
+    public boolean keepAllMetadata() {
+      return keepAllMetadata;
+    }
+  }
+
 }

diff --git a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties
index 89747d6..5978338 100644
--- a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties
+++ b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_en_US.properties

@@ -15,6 +15,7 @@
 
 SolrConnector.Arguments=Arguments
 SolrConnector.KeepAllMetadata=Keep all metadata:
+SolrConnector.UseExtractUpdateHandler=Use the Extract Update Handler:
 SolrConnector.Arguments2=Arguments:
 SolrConnector.Commits=Commits
 SolrConnector.Documents=Documents
@@ -57,6 +58,7 @@
 SolrConnector.IndexedDateFieldName=Indexed date field name:
 SolrConnector.FileNameFieldName=File name field name:
 SolrConnector.MimeTypeFieldName=Mime type field name:
+SolrConnector.ContentFieldName=Content field name:
 SolrConnector.MaximumDocumentLength=Maximum document length:
 SolrConnector.IncludedMimeTypes=Included mime types:
 SolrConnector.ExcludedMimeTypes=Excluded mime types:
@@ -94,6 +96,8 @@
 SolrConnector.CommitWithinValueMustBeAnInteger=Commit-within value must be an integer
 SolrConnector.MaximumDocumentLengthMustBeAnInteger=Maximum document length must be an integer
 SolrConnector.ArgumentNameCannotBeAnEmptyString=Argument name cannot be an empty string
+SolrConnector.MaximumDocumentLengthRequiredUnlessExtractingUpdateHandler=Maximum document length required unless using extract update handler
+SolrConnector.ContentFieldNameRequiredUnlessExtractingUpdateHandler=Content field name required unless using extract update handler
 SolrConnector.DeleteCert=Delete cert 
 SolrConnector.Delete=Delete
 SolrConnector.DeleteArgument=Delete argument #

diff --git a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties
index 0779385..8c4033a 100644
--- a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties
+++ b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_ja_JP.properties

@@ -15,6 +15,7 @@
 
 SolrConnector.Arguments=引数
 SolrConnector.KeepAllMetadata=Keep all metadata:
+SolrConnector.UseExtractUpdateHandler=Use the Extract Update Handler:
 SolrConnector.Arguments2=引数：
 SolrConnector.Commits=コミット
 SolrConnector.Documents=コンテンツ
@@ -57,6 +58,7 @@
 SolrConnector.IndexedDateFieldName=Indexed date field name:
 SolrConnector.FileNameFieldName=ファイル名称フィールド名：
 SolrConnector.MimeTypeFieldName=MIMEタイプフィールド名：
+SolrConnector.ContentFieldName=Content field name:
 SolrConnector.MaximumDocumentLength=最大コンテンツ長：
 SolrConnector.IncludedMimeTypes=含むMIMEタイプ：
 SolrConnector.ExcludedMimeTypes=除外するMIMEタイプ：
@@ -94,6 +96,8 @@
 SolrConnector.CommitWithinValueMustBeAnInteger=Commit-within値には整数を入力してください
 SolrConnector.MaximumDocumentLengthMustBeAnInteger=最大コンテンツ長さには整数を入力してください
 SolrConnector.ArgumentNameCannotBeAnEmptyString=引数名を入力してください
+SolrConnector.MaximumDocumentLengthRequiredUnlessExtractingUpdateHandler=Maximum document length required unless using extract update handler
+SolrConnector.ContentFieldNameRequiredUnlessExtractingUpdateHandler=Content field name required unless using extract update handler
 SolrConnector.DeleteCert=証明書を削除 
 SolrConnector.Delete=削除
 SolrConnector.DeleteArgument=引数を削除 #

diff --git a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties
index 3f3256d..2a15360 100644
--- a/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties
+++ b/connectors/solr/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/solr/common_zh_CN.properties

@@ -15,6 +15,7 @@
 
 SolrConnector.Arguments=参数
 SolrConnector.KeepAllMetadata=保持所有元数据:
+SolrConnector.UseExtractUpdateHandler=Use the Extract Update Handler:
 SolrConnector.Arguments2=参数: 
 SolrConnector.Commits=提交
 SolrConnector.Documents=文档
@@ -57,6 +58,7 @@
 SolrConnector.IndexedDateFieldName=索引化的日期字段名:
 SolrConnector.FileNameFieldName=文件名字段名: 
 SolrConnector.MimeTypeFieldName=MIME类型字段名: 
+SolrConnector.ContentFieldName=Content field name:
 SolrConnector.MaximumDocumentLength=最大文档长度: 
 SolrConnector.IncludedMimeTypes=所包含的MIME类型: 
 SolrConnector.ExcludedMimeTypes=被排除的MIME类型: 
@@ -94,6 +96,8 @@
 SolrConnector.CommitWithinValueMustBeAnInteger=CommitWithin值必须为整数
 SolrConnector.MaximumDocumentLengthMustBeAnInteger=最大文档长度必须为整数
 SolrConnector.ArgumentNameCannotBeAnEmptyString=请输入参数名
+SolrConnector.MaximumDocumentLengthRequiredUnlessExtractingUpdateHandler=Maximum document length required unless using extract update handler
+SolrConnector.ContentFieldNameRequiredUnlessExtractingUpdateHandler=Content field name required unless using extract update handler
 SolrConnector.DeleteCert=删除证书 
 SolrConnector.Delete=删除
 SolrConnector.DeleteArgument=删除参数 #

diff --git a/connectors/tika/build.xml b/connectors/tika/build.xml
new file mode 100644
index 0000000..2e34602
--- /dev/null
+++ b/connectors/tika/build.xml

@@ -0,0 +1,134 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="tika" default="all">
+
+    <property environment="env"/>
+    <condition property="mcf-dist" value="${env.MCFDISTPATH}">
+        <isset property="env.MCFDISTPATH"/>
+    </condition>
+    <property name="abs-dist" location="../../dist"/>
+    <condition property="mcf-dist" value="${abs-dist}">
+        <not>
+            <isset property="env.MCFDISTPATH"/>
+        </not>
+    </condition>
+
+    <import file="${mcf-dist}/connector-build.xml"/>
+
+    <path id="connector-classpath">
+        <path refid="mcf-connector-build.connector-classpath"/>
+        <fileset dir="../../lib">
+            <include name="httpclient*.jar"/>
+            <include name="jackson-core*.jar"/>
+            <include name="jackson-databind*.jar"/>
+            <include name="jackson-annotations*.jar"/>
+            <include name="tika-core*.jar"/>
+            <include name="tika-parsers*.jar"/>
+            <include name="tagsoup*.jar"/>
+            <include name="poi*.jar"/>
+            <include name="vorbis-java-tika*.jar"/>
+            <include name="vorbis-java-core*.jar"/>
+            <include name="netcdf*.jar"/>
+            <include name="apache-mime4j-core*.jar"/>
+            <include name="apache-mime4j-dom*.jar"/>
+            <include name="commons-compress*.jar"/>
+            <include name="commons-codec*.jar"/>
+            <include name="pdfbox*.jar"/>
+            <include name="fontbox*.jar"/>
+            <include name="jempbox*.jar"/>
+            <include name="commons-logging*.jar"/>
+            <include name="bcmail-jdk15*.jar"/>
+            <include name="bcprov-jdk15*.jar"/>
+            <include name="poi-scratchpad*.jar"/>
+            <include name="poi-ooxml*.jar"/>
+            <include name="poi-ooxml-schemas*.jar"/>
+            <include name="xmlbeans*.jar"/>
+            <include name="dom4j*.jar"/>
+            <include name="geronimo-stax-api_1.0_spec*.jar"/>
+            <include name="asm-debug-all*.jar"/>
+            <include name="isoparser*.jar"/>
+            <include name="aspectjrt*.jar"/>
+            <include name="metadata-extractor*.jar"/>
+            <include name="xmpcore*.jar"/>
+            <include name="xml-apis*.jar"/>
+            <include name="boilerpipe*.jar"/>
+            <include name="rome*.jar"/>
+            <include name="jdom*.jar"/>
+            <include name="xercesImpl*.jar"/>
+            <include name="vorbis-java-core*.jar"/>
+            <include name="juniversalchardet*.jar"/>
+            <include name="jhighlight*.jar"/>
+        </fileset>
+    </path>
+
+    <target name="lib" depends="mcf-connector-build.lib,precompile-check" if="canBuild">
+        <mkdir dir="dist/lib"/>
+        <copy todir="dist/lib">
+            <fileset dir="../../lib">
+                <include name="httpclient*.jar"/>
+                <include name="jackson-core*.jar"/>
+                <include name="jackson-databind*.jar"/>
+                <include name="jackson-annotations*.jar"/>
+                <include name="tika-core*.jar"/>
+                <include name="tika-parsers*.jar"/>
+                <include name="tagsoup*.jar"/>
+                <include name="poi*.jar"/>
+                <include name="vorbis-java-tika*.jar"/>
+                <include name="vorbis-java-core*.jar"/>
+                <include name="netcdf*.jar"/>
+                <include name="apache-mime4j-core*.jar"/>
+                <include name="apache-mime4j-dom*.jar"/>
+                <include name="commons-compress*.jar"/>
+                <include name="commons-codec*.jar"/>
+                <include name="pdfbox*.jar"/>
+                <include name="fontbox*.jar"/>
+                <include name="jempbox*.jar"/>
+                <include name="commons-logging*.jar"/>
+                <include name="bcmail-jdk15*.jar"/>
+                <include name="bcprov-jdk15*.jar"/>
+                <include name="poi-scratchpad*.jar"/>
+                <include name="poi-ooxml*.jar"/>
+                <include name="poi-ooxml-schemas*.jar"/>
+                <include name="xmlbeans*.jar"/>
+                <include name="dom4j*.jar"/>
+                <include name="geronimo-stax-api_1.0_spec*.jar"/>
+                <include name="asm-debug-all*.jar"/>
+                <include name="isoparser*.jar"/>
+                <include name="aspectjrt*.jar"/>
+                <include name="metadata-extractor*.jar"/>
+                <include name="xmpcore*.jar"/>
+                <include name="xml-apis*.jar"/>
+                <include name="boilerpipe*.jar"/>
+                <include name="rome*.jar"/>
+                <include name="jdom*.jar"/>
+                <include name="xercesImpl*.jar"/>
+                <include name="vorbis-java-core*.jar"/>
+                <include name="juniversalchardet*.jar"/>
+                <include name="jhighlight*.jar"/>
+            </fileset>
+        </copy>
+    </target>
+	
+    <target name="deliver-connector" depends="mcf-connector-build.deliver-connector">
+        <antcall target="general-add-transformation-connector">
+            <param name="connector-label" value="Tika content extractor"/>
+            <param name="connector-class" value="org.apache.manifoldcf.agents.transformation.tika.TikaExtractor"/>
+        </antcall>
+    </target>
+
+</project>

diff --git a/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/Messages.java b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/Messages.java
new file mode 100644
index 0000000..4315ff6
--- /dev/null
+++ b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/Messages.java

@@ -0,0 +1,141 @@
+/* $Id: Messages.java 1596720 2014-05-22 00:57:29Z kwright $ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation.tika;
+
+import java.util.Locale;
+import java.util.Map;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+  public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.transformation.tika.common";
+  public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.transformation.tika";
+  
+  /** Constructor - do no instantiate
+  */
+  protected Messages()
+  {
+  }
+  
+  public static String getString(Locale locale, String messageKey)
+  {
+    return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getAttributeString(Locale locale, String messageKey)
+  {
+    return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getBodyString(Locale locale, String messageKey)
+  {
+    return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getAttributeJavascriptString(Locale locale, String messageKey)
+  {
+    return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getBodyJavascriptString(Locale locale, String messageKey)
+  {
+    return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+  }
+
+  public static String getString(Locale locale, String messageKey, Object[] args)
+  {
+    return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+  
+  public static String getBodyString(Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+  }
+
+  // More general methods which allow bundlenames and class loaders to be specified.
+  
+  public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+  }
+  
+  public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+  {
+    return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+  }
+
+  // Resource output
+  
+  public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,String> substitutionParameters, boolean mapToUpperCase)
+    throws ManifoldCFException
+  {
+    outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+      substitutionParameters,mapToUpperCase);
+  }
+  
+  public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,String> substitutionParameters, boolean mapToUpperCase)
+    throws ManifoldCFException
+  {
+    outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+      substitutionParameters,mapToUpperCase);
+  }
+
+  public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+    Map<String,Object> contextObjects)
+    throws ManifoldCFException
+  {
+    outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+      contextObjects);
+  }
+  
+}
+

diff --git a/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java
new file mode 100644
index 0000000..894d0ff
--- /dev/null
+++ b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaConfig.java

@@ -0,0 +1,37 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.agents.transformation.tika;
+
+/** Parameters for Tika transformation connector.
+ */
+public class TikaConfig {
+
+  // Configuration parameters
+  // None
+
+  // Specification nodes and values
+  public static final String NODE_FIELDMAP = "fieldmap";
+  public static final String NODE_KEEPMETADATA = "keepAllMetadata";
+  public static final String NODE_IGNORETIKAEXCEPTION = "ignoreException";
+  public static final String ATTRIBUTE_SOURCE = "source";
+  public static final String ATTRIBUTE_TARGET = "target";
+  public static final String ATTRIBUTE_VALUE = "value";
+  
+}

diff --git a/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java
new file mode 100644
index 0000000..9bc8a98
--- /dev/null
+++ b/connectors/tika/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tika/TikaExtractor.java

@@ -0,0 +1,857 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.transformation.tika;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.agents.system.Logging;
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.metadata.TikaMetadataKeys;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/** This connector works as a transformation connector, but does nothing other than logging.
+*
+*/
+public class TikaExtractor extends org.apache.manifoldcf.agents.transformation.BaseTransformationConnector
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+  private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+  private static final String EDIT_SPECIFICATION_EXCEPTIONS_HTML = "editSpecification_Exceptions.html";
+  private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+  protected static final String ACTIVITY_EXTRACT = "extract";
+
+  protected static final String[] activitiesList = new String[]{ACTIVITY_EXTRACT};
+  
+  /** We handle up to 64K in memory; after that we go to disk. */
+  protected static final long inMemoryMaximumFile = 65536;
+  
+  /** Return a list of activities that this connector generates.
+  * The connector does NOT need to be connected before this method is called.
+  *@return the set of activities.
+  */
+  @Override
+  public String[] getActivitiesList()
+  {
+    return activitiesList;
+  }
+
+  /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
+  * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
+  * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
+  * is used to describe the version of the actual document.
+  *
+  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+  * necessary.
+  *@param os is the current output specification for the job that is doing the crawling.
+  *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
+  * the document will not need to be sent again to the output data store.
+  */
+  @Override
+  public VersionContext getPipelineDescription(Specification os)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    SpecPacker sp = new SpecPacker(os);
+    return new VersionContext(sp.toPackedString(),params,os);
+  }
+
+  // We intercept checks pertaining to the document format and send modified checks further down
+  
+  /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
+  * in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param mimeType is the mime type of the document.
+  *@param checkActivity is an object including the activities that can be performed by this method.
+  *@return true if the mime type can be accepted by this connector.
+  */
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // We should see what Tika will transform
+    // MHL
+    // Do a downstream check
+    return checkActivity.checkMimeTypeIndexable("text/plain;charset=utf-8");
+  }
+
+  /** Pre-determine whether a document (passed here as a File object) is acceptable or not.  This method is
+  * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
+  * search engines that only handle a small set of accepted file types.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param localFile is the local file to check.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // Document contents are not germane anymore, unless it looks like Tika won't accept them.
+    // Not sure how to check that...
+    return true;
+  }
+
+  /** Pre-determine whether a document's length is acceptable.  This method is used
+  * to determine whether to fetch a document in the first place.
+  *@param pipelineDescription is the document's pipeline version string, for this connection.
+  *@param length is the length of the document.
+  *@param checkActivity is an object including the activities that can be done by this method.
+  *@return true if the file is acceptable, false if not.
+  */
+  @Override
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // Always true
+    return true;
+  }
+
+  /** Add (or replace) a document in the output data store using the connector.
+  * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+  * necessary.
+  * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
+  * output description, since that was what was partly used to determine if output should be taking place.  So it may be necessary for this method to decode
+  * an output description string in order to determine what should be done.
+  *@param documentURI is the URI of the document.  The URI is presumed to be the unique identifier which the output data store will use to process
+  * and serve the document.  This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+  *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
+  *@param document is the document data to be processed (handed to the output data store).
+  *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document.  May be null.
+  *@param activities is the handle to an object that the implementer of a pipeline connector may use to perform operations, such as logging processing activity,
+  * or sending a modified document to the next stage in the pipeline.
+  *@return the document status (accepted or permanently rejected).
+  *@throws IOException only if there's a stream error reading the document data.
+  */
+  @Override
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+    throws ManifoldCFException, ServiceInterruption, IOException
+  {
+    // First, make sure downstream pipeline will now accept text/plain;charset=utf-8
+    if (!activities.checkMimeTypeIndexable("text/plain;charset=utf-8"))
+    {
+      activities.noDocument();
+      return DOCUMENTSTATUS_REJECTED;
+    }
+
+    SpecPacker sp = new SpecPacker(pipelineDescription.getVersionString());
+
+    // Tika's API reads from an input stream and writes to an output Writer.
+    // Since a RepositoryDocument includes readers and inputstreams exclusively, AND all downstream
+    // processing needs to occur in a ManifoldCF thread, we have some constraints on the architecture we need to get this done:
+    // (1) The principle worker thread must call the downstream pipeline send() method.
+    // (2) The callee of the send() method must call a reader in the Repository Document.
+    // (3) The Reader, if its databuffer is empty, must pull more data from the original input stream and hand it to Tika, which populates the Reader's databuffer.
+    // So all this can be done in one thread, with some work, and the creation of a special InputStream or Reader implementation.  Where it fails, though, is the
+    // requirement that tika-extracted metadata be included in the RepositoryDocument right from the beginning.  Effectively this means that the entire document
+    // must be parsed before it is handed downstream -- so basically a temporary file (or in-memory buffer if small enough) must be created.
+    // Instead of the elegant flow above, we have the following:
+    // (1) Create a temporary file (or in-memory buffer if file is small enough)
+    // (2) Run Tika to completion, streaming content output to temporary file
+    // (3) Modify RepositoryDocument to read from temporary file, and include Tika-extracted metadata
+    // (4) Call downstream document processing
+      
+    DestinationStorage ds;
+      
+    if (document.getBinaryLength() <= inMemoryMaximumFile)
+    {
+      ds = new MemoryDestinationStorage((int)document.getBinaryLength());
+    }
+    else
+    {
+      ds = new FileDestinationStorage();
+    }
+    try
+    {
+      Metadata metadata = new Metadata();
+      if (document.getFileName() != null)
+      {
+        metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, document.getFileName());
+        metadata.add("stream_name", document.getFileName());
+      }
+      if (document.getMimeType() != null)
+        metadata.add("Content-Type", document.getMimeType());
+      metadata.add("stream_size", new Long(document.getBinaryLength()).toString());
+
+      // We only log the extraction
+      long startTime = System.currentTimeMillis();
+      String resultCode = "OK";
+      String description = null;
+      Long length = null;
+      try
+      {
+        OutputStream os = ds.getOutputStream();
+        try
+        {
+          Writer w = new OutputStreamWriter(os,"utf-8");
+          try
+          {
+            // Use tika to parse stuff
+            Parser parser = new AutoDetectParser();
+            ContentHandler handler = new BodyContentHandler(w);
+            ParseContext pc = new ParseContext();
+            try
+            {
+              parser.parse(document.getBinaryStream(), handler, metadata, pc);
+            }
+            catch (TikaException e)
+            {
+              if (sp.ignoreTikaException())
+              {
+                resultCode = "TIKAEXCEPTION";
+                description = e.getMessage();
+              }
+              else
+              {
+                resultCode = "TIKAREJECTION";
+                description = e.getMessage();
+                int rval = handleTikaException(e);
+                if (rval == DOCUMENTSTATUS_REJECTED)
+                  activities.noDocument();
+                return rval;
+              }
+            }
+            catch (SAXException e)
+            {
+              resultCode = "SAXEXCEPTION";
+              description = e.getMessage();
+              int rval = handleSaxException(e);
+              if (rval == DOCUMENTSTATUS_REJECTED)
+                activities.noDocument();
+              return rval;
+            }
+            catch (IOException e)
+            {
+              resultCode = "IOEXCEPTION";
+              description = e.getMessage();
+              throw e;
+            }
+          }
+          finally
+          {
+            w.flush();
+          }
+        }
+        finally
+        {
+          os.close();
+          length = new Long(ds.getBinaryLength());
+        }
+      }
+      finally
+      {
+        // Log the extraction processing
+        activities.recordActivity(new Long(startTime), ACTIVITY_EXTRACT, length, documentURI,
+          resultCode, description);
+      }
+      
+      // Check to be sure downstream pipeline will accept document of specified length
+      if (!activities.checkLengthIndexable(ds.getBinaryLength()))
+      {
+        activities.noDocument();
+        return DOCUMENTSTATUS_REJECTED;
+      }
+        
+      // Parsing complete!
+      // Create a copy of Repository Document
+      RepositoryDocument docCopy = document.duplicate();
+        
+      // Get new stream length
+      long newBinaryLength = ds.getBinaryLength();
+      // Open new input stream
+      InputStream is = ds.getInputStream();
+      try
+      {
+        docCopy.setBinary(is,newBinaryLength);
+
+        // Set up all metadata from Tika.  We may want to run this through a mapper eventually...
+        String[] metaNames = metadata.names();
+        for(String mName : metaNames){
+          String value = metadata.get(mName);
+          String target = sp.getMapping(mName);
+          if(target!=null)
+          {
+            docCopy.addField(target, value);
+          }
+          else
+          {
+            if(sp.keepAllMetadata())
+            {
+             docCopy.addField(mName, value);
+            }
+          }
+        }
+
+        // Send new document downstream
+        return activities.sendDocument(documentURI,docCopy);
+      }
+      finally
+      {
+        is.close();
+      }
+    }
+    finally
+    {
+      ds.close();
+    }
+
+  }
+
+  /** Obtain the name of the form check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form check javascript method.
+  */
+  @Override
+  public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecification";
+  }
+
+  /** Obtain the name of the form presave check javascript method to call.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return the name of the form presave check javascript method.
+  */
+  @Override
+  public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
+  {
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
+  }
+
+  /** Output the specification header section.
+  * This method is called in the head section of a job page which has selected a pipeline connection of the current type.  Its purpose is to add the required tabs
+  * to the list, and to output any javascript methods that might be needed by the job editing HTML.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this connection.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param tabsArray is an array of tab names.  Add to this array any tab names that are specific to the connector.
+  */
+  @Override
+  public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, List<String> tabsArray)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+
+    tabsArray.add(Messages.getString(locale, "TikaExtractor.FieldMappingTabName"));
+    tabsArray.add(Messages.getString(locale, "TikaExtractor.ExceptionsTabName"));
+
+    // Fill in the specification header map, using data from all tabs.
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInExceptionsSpecificationMap(paramMap, os);
+    
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
+  }
+  
+  /** Output the specification body section.
+  * This method is called in the body section of a job page which has selected a pipeline connection of the current type.  Its purpose is to present the required form elements for editing.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags.  The name of the
+  * form is "editjob".
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param actualSequenceNumber is the connection within the job that has currently been selected.
+  *@param tabName is the current tab name.
+  */
+  @Override
+  public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber, int actualSequenceNumber, String tabName)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+
+    // Set the tab name
+    paramMap.put("TABNAME", tabName);
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+    paramMap.put("SELECTEDNUM",Integer.toString(actualSequenceNumber));
+
+    // Fill in the field mapping tab data
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInExceptionsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
+    Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_EXCEPTIONS_HTML,paramMap);
+  }
+
+  /** Process a specification post.
+  * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+  * posted.  Its purpose is to gather form information and modify the transformation specification accordingly.
+  * The name of the posted form is "editjob".
+  *@param variableContext contains the post data, including binary file-upload information.
+  *@param locale is the preferred local of the output.
+  *@param os is the current pipeline specification for this job.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+  */
+  @Override
+  public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException {
+    String seqPrefix = "s"+connectionSequenceNumber+"_";
+
+    String x;
+        
+    x = variableContext.getParameter(seqPrefix+"fieldmapping_count");
+    if (x != null && x.length() > 0)
+    {
+      // About to gather the fieldmapping nodes, so get rid of the old ones.
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(TikaConfig.NODE_FIELDMAP) || node.getType().equals(TikaConfig.NODE_KEEPMETADATA))
+          os.removeChild(i);
+        else
+          i++;
+      }
+      int count = Integer.parseInt(x);
+      i = 0;
+      while (i < count)
+      {
+        String prefix = seqPrefix+"fieldmapping_";
+        String suffix = "_"+Integer.toString(i);
+        String op = variableContext.getParameter(prefix+"op"+suffix);
+        if (op == null || !op.equals("Delete"))
+        {
+          // Gather the fieldmap etc.
+          String source = variableContext.getParameter(prefix+"source"+suffix);
+          String target = variableContext.getParameter(prefix+"target"+suffix);
+          if (target == null)
+            target = "";
+          SpecificationNode node = new SpecificationNode(TikaConfig.NODE_FIELDMAP);
+          node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE,source);
+          node.setAttribute(TikaConfig.ATTRIBUTE_TARGET,target);
+          os.addChild(os.getChildCount(),node);
+        }
+        i++;
+      }
+      
+      String addop = variableContext.getParameter(seqPrefix+"fieldmapping_op");
+      if (addop != null && addop.equals("Add"))
+      {
+        String source = variableContext.getParameter(seqPrefix+"fieldmapping_source");
+        String target = variableContext.getParameter(seqPrefix+"fieldmapping_target");
+        if (target == null)
+          target = "";
+        SpecificationNode node = new SpecificationNode(TikaConfig.NODE_FIELDMAP);
+        node.setAttribute(TikaConfig.ATTRIBUTE_SOURCE,source);
+        node.setAttribute(TikaConfig.ATTRIBUTE_TARGET,target);
+        os.addChild(os.getChildCount(),node);
+      }
+      
+      // Gather the keep all metadata parameter to be the last one
+      SpecificationNode node = new SpecificationNode(TikaConfig.NODE_KEEPMETADATA);
+      String keepAll = variableContext.getParameter(seqPrefix+"keepallmetadata");
+      if (keepAll != null)
+      {
+        node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, keepAll);
+      }
+      else
+      {
+        node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, "false");
+      }
+      // Add the new keepallmetadata config parameter 
+      os.addChild(os.getChildCount(), node);
+    }
+    
+    if (variableContext.getParameter(seqPrefix+"ignoretikaexceptions_present") != null)
+    {
+      int i = 0;
+      while (i < os.getChildCount())
+      {
+        SpecificationNode node = os.getChild(i);
+        if (node.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION))
+          os.removeChild(i);
+        else
+          i++;
+      }
+
+      String value = variableContext.getParameter(seqPrefix+"ignoretikaexceptions");
+      if (value == null)
+        value = "false";
+
+      SpecificationNode node = new SpecificationNode(TikaConfig.NODE_IGNORETIKAEXCEPTION);
+      node.setAttribute(TikaConfig.ATTRIBUTE_VALUE, value);
+      os.addChild(os.getChildCount(), node);
+    }
+    
+    return null;
+  }
+  
+
+  /** View specification.
+  * This method is called in the body section of a job's view page.  Its purpose is to present the pipeline specification information to the user.
+  * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+  *@param out is the output to which any HTML should be sent.
+  *@param locale is the preferred local of the output.
+  *@param connectionSequenceNumber is the unique number of this connection within the job.
+  *@param os is the current pipeline specification for this job.
+  */
+  @Override
+  public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
+    int connectionSequenceNumber)
+    throws ManifoldCFException, IOException
+  {
+    Map<String, Object> paramMap = new HashMap<String, Object>();
+    paramMap.put("SEQNUM",Integer.toString(connectionSequenceNumber));
+
+    // Fill in the map with data from all tabs
+    fillInFieldMappingSpecificationMap(paramMap, os);
+    fillInExceptionsSpecificationMap(paramMap, os);
+
+    Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
+    
+  }
+
+  protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, Specification os)
+  {
+    // Prep for field mappings
+    List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
+    String keepAllMetadataValue = "true";
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
+        String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
+        String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
+        String targetDisplay;
+        if (target == null)
+        {
+          target = "";
+          targetDisplay = "(remove)";
+        }
+        else
+          targetDisplay = target;
+        Map<String,String> fieldMapping = new HashMap<String,String>();
+        fieldMapping.put("SOURCE",source);
+        fieldMapping.put("TARGET",target);
+        fieldMapping.put("TARGETDISPLAY",targetDisplay);
+        fieldMappings.add(fieldMapping);
+      }
+      else if (sn.getType().equals(TikaConfig.NODE_KEEPMETADATA))
+      {
+        keepAllMetadataValue = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+      }
+    }
+    paramMap.put("FIELDMAPPINGS",fieldMappings);
+    paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+  }
+
+  protected static void fillInExceptionsSpecificationMap(Map<String,Object> paramMap, Specification os)
+  {
+    String ignoreTikaExceptions = "true";
+    for (int i = 0; i < os.getChildCount(); i++)
+    {
+      SpecificationNode sn = os.getChild(i);
+      if (sn.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION))
+      {
+        ignoreTikaExceptions = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+      }
+    }
+    paramMap.put("IGNORETIKAEXCEPTIONS",ignoreTikaExceptions);
+  }
+
+  protected static int handleTikaException(TikaException e)
+    throws IOException, ManifoldCFException, ServiceInterruption
+  {
+    // MHL - what does Tika throw if it gets an IOException reading the stream??
+    Logging.ingest.warn("Tika: Tika exception extracting: "+e.getMessage(),e);
+    return DOCUMENTSTATUS_REJECTED;
+  }
+  
+  protected static int handleSaxException(SAXException e)
+    throws IOException, ManifoldCFException, ServiceInterruption
+  {
+    // MHL - what does this mean?
+    Logging.ingest.warn("Tika: SAX exception extracting: "+e.getMessage(),e);
+    return DOCUMENTSTATUS_REJECTED;
+  }
+  
+  protected static int handleIOException(IOException e)
+    throws ManifoldCFException
+  {
+    // IOException reading from our local storage...
+    if (e instanceof InterruptedIOException)
+      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    throw new ManifoldCFException(e.getMessage(),e);
+  }
+  
+  protected static interface DestinationStorage
+  {
+    /** Get the output stream to write to.  Caller should explicitly close this stream when done writing.
+    */
+    public OutputStream getOutputStream()
+      throws ManifoldCFException;
+    
+    /** Get new binary length.
+    */
+    public long getBinaryLength()
+      throws ManifoldCFException;
+
+    /** Get the input stream to read from.  Caller should explicitly close this stream when done reading.
+    */
+    public InputStream getInputStream()
+      throws ManifoldCFException;
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
+    public void close()
+      throws ManifoldCFException;
+  }
+  
+  protected static class FileDestinationStorage implements DestinationStorage
+  {
+    protected final File outputFile;
+    protected final OutputStream outputStream;
+
+    public FileDestinationStorage()
+      throws ManifoldCFException
+    {
+      File outputFile;
+      OutputStream outputStream;
+      try
+      {
+        outputFile = File.createTempFile("mcftika","tmp");
+        outputStream = new FileOutputStream(outputFile);
+      }
+      catch (IOException e)
+      {
+        handleIOException(e);
+        outputFile = null;
+        outputStream = null;
+      }
+      this.outputFile = outputFile;
+      this.outputStream = outputStream;
+    }
+    
+    @Override
+    public OutputStream getOutputStream()
+      throws ManifoldCFException
+    {
+      return outputStream;
+    }
+    
+    /** Get new binary length.
+    */
+    @Override
+    public long getBinaryLength()
+      throws ManifoldCFException
+    {
+      return outputFile.length();
+    }
+
+    /** Get the input stream to read from.  Caller should explicitly close this stream when done reading.
+    */
+    @Override
+    public InputStream getInputStream()
+      throws ManifoldCFException
+    {
+      try
+      {
+        return new FileInputStream(outputFile);
+      }
+      catch (IOException e)
+      {
+        handleIOException(e);
+        return null;
+      }
+    }
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
+    @Override
+    public void close()
+      throws ManifoldCFException
+    {
+      outputFile.delete();
+    }
+
+  }
+  
+  protected static class MemoryDestinationStorage implements DestinationStorage
+  {
+    protected final ByteArrayOutputStream outputStream;
+    
+    public MemoryDestinationStorage(int sizeHint)
+    {
+      outputStream = new ByteArrayOutputStream(sizeHint);
+    }
+    
+    @Override
+    public OutputStream getOutputStream()
+      throws ManifoldCFException
+    {
+      return outputStream;
+    }
+
+    /** Get new binary length.
+    */
+    @Override
+    public long getBinaryLength()
+      throws ManifoldCFException
+    {
+      return outputStream.size();
+    }
+    
+    /** Get the input stream to read from.  Caller should explicitly close this stream when done reading.
+    */
+    @Override
+    public InputStream getInputStream()
+      throws ManifoldCFException
+    {
+      return new ByteArrayInputStream(outputStream.toByteArray());
+    }
+    
+    /** Close the object and clean up everything.
+    * This should be called when the data is no longer needed.
+    */
+    public void close()
+      throws ManifoldCFException
+    {
+    }
+
+  }
+
+  protected static class SpecPacker {
+    
+    private final Map<String,String> sourceTargets = new HashMap<String,String>();
+    private final boolean keepAllMetadata;
+    private final boolean ignoreTikaException;
+    
+    public SpecPacker(Specification os) {
+      boolean keepAllMetadata = true;
+      boolean ignoreTikaException = true;
+      for (int i = 0; i < os.getChildCount(); i++) {
+        SpecificationNode sn = os.getChild(i);
+        
+        if(sn.getType().equals(TikaConfig.NODE_KEEPMETADATA)) {
+          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+          keepAllMetadata = Boolean.parseBoolean(value);
+        } else if (sn.getType().equals(TikaConfig.NODE_FIELDMAP)) {
+          String source = sn.getAttributeValue(TikaConfig.ATTRIBUTE_SOURCE);
+          String target = sn.getAttributeValue(TikaConfig.ATTRIBUTE_TARGET);
+          
+          if (target == null) {
+            target = "";
+          }
+          sourceTargets.put(source, target);
+        } else if (sn.getType().equals(TikaConfig.NODE_IGNORETIKAEXCEPTION)) {
+          String value = sn.getAttributeValue(TikaConfig.ATTRIBUTE_VALUE);
+          ignoreTikaException = Boolean.parseBoolean(value);
+        }
+      }
+      this.keepAllMetadata = keepAllMetadata;
+      this.ignoreTikaException = ignoreTikaException;
+    }
+    
+    public SpecPacker(String packedString) {
+      
+      int index = 0;
+      
+      // Mappings
+      final List<String> packedMappings = new ArrayList<String>();
+      index = unpackList(packedMappings,packedString,index,'+');
+      String[] fixedList = new String[2];
+      for (String packedMapping : packedMappings) {
+        unpackFixedList(fixedList,packedMapping,0,':');
+        sourceTargets.put(fixedList[0], fixedList[1]);
+      }
+      
+      // Keep all metadata
+      if (packedString.length() > index)
+        keepAllMetadata = (packedString.charAt(index++) == '+');
+      else
+        keepAllMetadata = true;
+
+      // Ignore tika exception
+      if (packedString.length() > index)
+        ignoreTikaException = (packedString.charAt(index++) == '+');
+      else
+        ignoreTikaException = true;
+      
+    }
+    
+    public String toPackedString() {
+      StringBuilder sb = new StringBuilder();
+      int i;
+      
+      // Mappings
+      final String[] sortArray = new String[sourceTargets.size()];
+      i = 0;
+      for (String source : sourceTargets.keySet()) {
+        sortArray[i++] = source;
+      }
+      java.util.Arrays.sort(sortArray);
+      
+      List<String> packedMappings = new ArrayList<String>();
+      String[] fixedList = new String[2];
+      for (String source : sortArray) {
+        String target = sourceTargets.get(source);
+        StringBuilder localBuffer = new StringBuilder();
+        fixedList[0] = source;
+        fixedList[1] = target;
+        packFixedList(localBuffer,fixedList,':');
+        packedMappings.add(localBuffer.toString());
+      }
+      packList(sb,packedMappings,'+');
+
+      // Keep all metadata
+      if (keepAllMetadata)
+        sb.append('+');
+      else
+        sb.append('-');
+      
+      if (ignoreTikaException)
+        sb.append('+');
+      else
+        sb.append('-');
+
+      return sb.toString();
+    }
+    
+    public String getMapping(String source) {
+      return sourceTargets.get(source);
+    }
+    
+    public boolean keepAllMetadata() {
+      return keepAllMetadata;
+    }
+    
+    public boolean ignoreTikaException() {
+      return ignoreTikaException;
+    }
+  }
+
+}
+
+

diff --git a/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties
new file mode 100644
index 0000000..6218be5
--- /dev/null
+++ b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_en_US.properties

@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TikaExtractor.FieldMappingTabName=Field mapping
+TikaExtractor.ExceptionsTabName=Exceptions
+TikaExtractor.FieldMappings=Field mappings:
+TikaExtractor.MetadataFieldName=Metadata field name
+TikaExtractor.FinalFieldName=Final field name
+TikaExtractor.NoFieldMappingSpecified=No field mapping specified
+TikaExtractor.KeepAllMetadata=Keep all metadata
+TikaExtractor.Add=Add
+TikaExtractor.AddFieldMapping=Add field mapping
+TikaExtractor.Delete=Delete
+TikaExtractor.DeleteFieldMapping=Delete field mapping
+TikaExtractor.NoFieldNameSpecified=Please specify a field name
+TikaExtractor.IgnoreTikaExceptions=Ignore Tika exceptions
\ No newline at end of file

diff --git a/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties
new file mode 100644
index 0000000..6218be5
--- /dev/null
+++ b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_ja_JP.properties

@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TikaExtractor.FieldMappingTabName=Field mapping
+TikaExtractor.ExceptionsTabName=Exceptions
+TikaExtractor.FieldMappings=Field mappings:
+TikaExtractor.MetadataFieldName=Metadata field name
+TikaExtractor.FinalFieldName=Final field name
+TikaExtractor.NoFieldMappingSpecified=No field mapping specified
+TikaExtractor.KeepAllMetadata=Keep all metadata
+TikaExtractor.Add=Add
+TikaExtractor.AddFieldMapping=Add field mapping
+TikaExtractor.Delete=Delete
+TikaExtractor.DeleteFieldMapping=Delete field mapping
+TikaExtractor.NoFieldNameSpecified=Please specify a field name
+TikaExtractor.IgnoreTikaExceptions=Ignore Tika exceptions
\ No newline at end of file

diff --git a/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties
new file mode 100644
index 0000000..6218be5
--- /dev/null
+++ b/connectors/tika/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tika/common_zh_CN.properties

@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+TikaExtractor.FieldMappingTabName=Field mapping
+TikaExtractor.ExceptionsTabName=Exceptions
+TikaExtractor.FieldMappings=Field mappings:
+TikaExtractor.MetadataFieldName=Metadata field name
+TikaExtractor.FinalFieldName=Final field name
+TikaExtractor.NoFieldMappingSpecified=No field mapping specified
+TikaExtractor.KeepAllMetadata=Keep all metadata
+TikaExtractor.Add=Add
+TikaExtractor.AddFieldMapping=Add field mapping
+TikaExtractor.Delete=Delete
+TikaExtractor.DeleteFieldMapping=Delete field mapping
+TikaExtractor.NoFieldNameSpecified=Please specify a field name
+TikaExtractor.IgnoreTikaExceptions=Ignore Tika exceptions
\ No newline at end of file

diff --git a/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js
new file mode 100644
index 0000000..a2a9ae4
--- /dev/null
+++ b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification.js

@@ -0,0 +1,51 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function s${SEQNUM}_checkSpecification()
+{
+  return true;
+}
+
+function s${SEQNUM}_addFieldMapping()
+{
+  if (editjob.s${SEQNUM}_fieldmapping_source.value == "")
+  {
+    alert("$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.NoFieldNameSpecified'))");
+    editjob.s${SEQNUM}_fieldmapping_source.focus();
+    return;
+  }
+  editjob.s${SEQNUM}_fieldmapping_op.value="Add";
+  postFormSetAnchor("s${SEQNUM}_fieldmapping");
+}
+
+function s${SEQNUM}_deleteFieldMapping(i)
+{
+  // Set the operation
+  eval("editjob.s${SEQNUM}_fieldmapping_op_"+i+".value=\"Delete\"");
+  // Submit
+  if (editjob.s${SEQNUM}_fieldmapping_count.value==i)
+    postFormSetAnchor("s${SEQNUM}_fieldmapping");
+  else
+    postFormSetAnchor("s${SEQNUM}_fieldmapping_"+i)
+  // Undo, so we won't get two deletes next time
+  eval("editjob.s${SEQNUM}_fieldmapping_op_"+i+".value=\"Continue\"");
+}
+
+//-->
+</script>

diff --git a/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_Exceptions.html b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_Exceptions.html
new file mode 100644
index 0000000..1ca4e4b
--- /dev/null
+++ b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_Exceptions.html

@@ -0,0 +1,40 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('TikaExtractor.ExceptionsTabName') && ${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.IgnoreTikaExceptions'))</nobr></td>
+    <td class="value">
+        <input type="hidden" name="s${SEQNUM}_ignoretikaexceptions_present" value="true"/>
+  #if($IGNORETIKAEXCEPTIONS == 'true')
+       <input type="checkbox" checked="true" name="s${SEQNUM}_ignoretikaexceptions" value="true"/>
+  #else
+       <input type="checkbox" name="s${SEQNUM}_ignoretikaexceptions" value="true"/>
+  #end
+    </td>
+  </tr>
+</table>
+      
+#else
+
+<input type="hidden" name="s${SEQNUM}_ignoretikaexceptions_present" value="true"/>
+<input type="hidden" name="s${SEQNUM}_ignoretikaexceptions" value="$Encoder.bodyEscape($IGNORETIKAEXCEPTIONS)"/>
+
+#end
\ No newline at end of file

diff --git a/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_FieldMapping.html b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_FieldMapping.html
new file mode 100644
index 0000000..3ecb711
--- /dev/null
+++ b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/editSpecification_FieldMapping.html

@@ -0,0 +1,107 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('TikaExtractor.FieldMappingTabName') && ${SEQNUM} == ${SELECTEDNUM})
+
+<table class="displaytable">
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.FieldMappings'))</nobr></td>
+    <td class="boxcell">
+      <table class="formtable">
+        <tr class="formheaderrow">
+          <td class="formcolumnheader"></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.MetadataFieldName'))</nobr></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.FinalFieldName'))</nobr></td>
+        </tr>
+
+  #set($fieldcounter = 0)
+  #foreach($fieldmapping in $FIELDMAPPINGS)
+    #set($fieldcounterdisplay = $fieldcounter + 1)
+    #if(($fieldcounter % 2) == 0)
+        <tr class="evenformrow">
+    #else
+        <tr class="oddformrow">
+    #end
+          <td class="formcolumncell">
+            <a name="s${SEQNUM}_fieldmapping_$fieldcounter">
+              <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('TikaExtractor.Delete'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('TikaExtractor.DeleteFieldMapping'))$fieldcounterdisplay" onclick='javascript:s${SEQNUM}_deleteFieldMapping("$fieldcounter");'/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_op_$fieldcounter" value="Continue"/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+              <input type="hidden" name="s${SEQNUM}_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+            </a>
+          </td>
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
+          </td>
+          <td class="formcolumncell">
+            <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
+          </td>
+        </tr>
+    #set($fieldcounter = $fieldcounter + 1)
+  #end
+  
+  #if($fieldcounter == 0)
+        <tr class="formrow"><td class="formmessage" colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.NoFieldMappingSpecified'))</td></tr>
+  #end
+      
+        <tr class="formrow"><td class="formseparator" colspan="3"><hr/></td></tr>
+        <tr class="formrow">
+          <td class="formcolumncell">
+            <a name="fieldmapping">
+              <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('TikaExtractor.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('TikaExtractor.AddFieldMapping'))" onclick="javascript:s${SEQNUM}_addFieldMapping();"/>
+            </a>
+            <input type="hidden" name="s${SEQNUM}_fieldmapping_count" value="$fieldcounter"/>
+            <input type="hidden" name="s${SEQNUM}_fieldmapping_op" value="Continue"/>
+          </td>
+          <td class="formcolumncell">
+            <nobr><input type="text" size="15" name="s${SEQNUM}_fieldmapping_source" value=""/></nobr>
+          </td>
+          <td class="formcolumncell">
+            <nobr><input type="text" size="15" name="s${SEQNUM}_fieldmapping_target" value=""/></nobr>
+          </td>
+        </tr>
+      </table>
+    </td>
+  </tr>
+  
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.KeepAllMetadata'))</nobr></td>
+    <td class="value">
+  #if($KEEPALLMETADATA == 'true')
+       <input type="checkbox" checked="true" name="s${SEQNUM}_keepallmetadata" value="true"/>
+  #else
+       <input type="checkbox" name="s${SEQNUM}_keepallmetadata" value="true"/>
+  #end
+    </td>
+  </tr>
+</table>
+      
+#else
+
+  #set($fieldcounter = 0)
+  #foreach($fieldmapping in $FIELDMAPPINGS)
+<input type="hidden" name="s${SEQNUM}_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+<input type="hidden" name="s${SEQNUM}_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+    #set($fieldcounter = $fieldcounter + 1)
+  #end
+<input type="hidden" name="s${SEQNUM}_fieldmapping_count" value="$fieldcounter"/>
+<input type="hidden" name="s${SEQNUM}_keepallmetadata" value="$Encoder.bodyEscape($KEEPALLMETADATA)"/>
+
+#end
\ No newline at end of file

diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html
similarity index 65%
rename from connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
rename to connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html
index a38e893..88c80c7 100644
--- a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
+++ b/connectors/tika/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tika/viewSpecification.html

@@ -17,27 +17,12 @@
 
 <table class="displaytable">
   <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon'))</nobr></td>
-    <td class="value">$Encoder.bodyEscape($MAXFILESIZE)</td>
-  </tr>
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedMIMETypesColon'))</nobr></td>
-    <td class="value">$Encoder.bodyEscape($MIMETYPES)</td>
-  </tr>
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon'))</nobr></td>
-    <td class="value">$Encoder.bodyEscape($EXTENSIONS)</td>
-  </tr>
-  
-  <tr><td class="separator" colspan="2"><hr/></td></tr>
-
-  <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappings'))</nobr></td>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.FieldMappings'))</nobr></td>
     <td class="boxcell">
       <table class="formtable">
         <tr class="formheaderrow">
-          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MetadataFieldName'))</nobr></td>
-          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.CloudSearchFieldName'))</nobr></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.MetadataFieldName'))</nobr></td>
+          <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.FinalFieldName'))</nobr></td>
         </tr>
 #set($fieldcounter = 0)
 #foreach($fieldmapping in $FIELDMAPPINGS)
@@ -56,15 +41,20 @@
   #set($fieldcounter = $fieldcounter + 1)
 #end
 #if($fieldcounter == 0)
-        <tr class="formrow"><td class="formmessage" colspan="2">$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))</td></tr>
+        <tr class="formrow"><td class="formmessage" colspan="2">$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.NoFieldMappingSpecified'))</td></tr>
 #end
       </table>
     </td>
   </tr>
   <tr><td class="separator" colspan="2"><hr/></td></tr>
   <tr>
-    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.KeepAllMetadata'))</nobr></td>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.KeepAllMetadata'))</nobr></td>
     <td class="value"><nobr>$Encoder.bodyEscape($KEEPALLMETADATA)</nobr></td>
   </tr>
+  <tr><td class="separator" colspan="2"><hr/></td></tr>
+  <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.IgnoreTikaExceptions'))</nobr></td>
+    <td class="value"><nobr>$Encoder.bodyEscape($IGNORETIKAEXCEPTIONS)</nobr></td>
+  </tr>
 
 </table>

diff --git a/connectors/tika/pom.xml b/connectors/tika/pom.xml
new file mode 100644
index 0000000..6648c74
--- /dev/null
+++ b/connectors/tika/pom.xml

@@ -0,0 +1,391 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.manifoldcf</groupId>
+    <artifactId>mcf-connectors</artifactId>
+    <version>1.7-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+  </properties>
+
+  <artifactId>mcf-tika-connector</artifactId>
+  <name>ManifoldCF - Connectors - Tika Transformer</name>
+
+  <build>
+    <defaultGoal>integration-test</defaultGoal>
+    <sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+    <testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${basedir}/connector/src/main/native2ascii</directory>
+        <includes>
+          <include>**/*.properties</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>${basedir}/connector/src/main/resources</directory>
+        <includes>
+          <include>**/*.html</include>
+          <include>**/*.js</include>
+        </includes>
+      </resource>
+    </resources> 
+    <testResources>
+      <testResource>
+        <directory>${basedir}/connector/src/test/resources</directory>
+      </testResource>
+    </testResources>
+
+    <plugins>
+
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>native2ascii-maven-plugin</artifactId>
+        <version>1.0-beta-1</version>
+        <configuration>
+            <workDir>target/classes</workDir>
+        </configuration>
+        <executions>
+            <execution>
+                <id>native2ascii-utf8</id>
+                <goals>
+                    <goal>native2ascii</goal>
+                </goals>
+                <configuration>
+                    <encoding>UTF8</encoding>
+                    <includes>
+                      <include>**/*.properties</include>
+                    </includes>
+                </configuration>
+            </execution>
+        </executions>
+      </plugin>
+
+      <!-- Test plugin configuration -->
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+           <execution>
+            <id>copy-war</id>
+            <phase>generate-resources</phase>
+            <goals>
+              <goal>copy</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>target/dependency</outputDirectory>
+              <artifactItems>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-api-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-api-service.war</destFileName>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-authority-service</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-authority-service.war</destFileName>
+                </artifactItem>
+                <artifactItem>
+                  <groupId>${project.groupId}</groupId>
+                  <artifactId>mcf-crawler-ui</artifactId>
+                  <version>${project.version}</version>
+                  <type>war</type>
+                  <overWrite>false</overWrite>
+                  <destFileName>mcf-crawler-ui.war</destFileName>
+                </artifactItem>
+              </artifactItems>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>**/*Postgresql*.java</exclude>
+            <exclude>**/*MySQL*.java</exclude>
+          </excludes>
+          <forkMode>always</forkMode>
+          <workingDirectory>target/test-output</workingDirectory>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-failsafe-plugin</artifactId>
+        <version>2.12.3</version>
+        <configuration>
+          <skipTests>${skipITs}</skipTests>
+          <systemPropertyVariables>
+            <crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+            <authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+            <apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+          </systemPropertyVariables>
+          <excludes>
+            <exclude>**/*Postgresql*.java</exclude>
+            <exclude>**/*MySQL*.java</exclude>
+          </excludes>
+          <forkMode>always</forkMode>
+          <workingDirectory>target/test-output</workingDirectory>
+        </configuration>
+        <executions>
+          <execution>
+            <id>integration-test</id>
+            <goals>
+              <goal>integration-test</goal>
+            </goals>
+          </execution>
+          <execution>
+            <id>verify</id>
+            <goals>
+              <goal>verify</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+
+    </plugins>
+  </build>
+  
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-ui-core</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <version>${httpcomponent.httpclient.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+      <version>2.1.3</version>
+    </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <version>2.3.2</version>
+    </dependency>
+    <dependency>
+	  <groupId>com.fasterxml.jackson.core</groupId>
+	  <artifactId>jackson-annotations</artifactId>
+	  <version>2.3.0</version>
+    </dependency>
+    <dependency>
+	  <groupId>org.apache.tika</groupId>
+	  <artifactId>tika-core</artifactId>
+	  <version>1.5</version>
+    </dependency>
+    <dependency>
+	  <groupId>org.apache.tika</groupId>
+	  <artifactId>tika-parsers</artifactId>
+	  <version>1.5</version>
+    </dependency>
+    
+    <!-- Testing dependencies -->
+    
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>${junit.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-core</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-agents</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+      <type>jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-pull-agent</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>postgresql</groupId>
+      <artifactId>postgresql</artifactId>
+      <version>${postgresql.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.hsqldb</groupId>
+      <artifactId>hsqldb</artifactId>
+      <version>${hsqldb.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.derby</groupId>
+      <artifactId>derby</artifactId>
+      <version>${derby.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>mysql</groupId>
+      <artifactId>mysql-connector-java</artifactId>
+      <version>${mysql.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-api-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-authority-service</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mcf-crawler-ui</artifactId>
+      <version>${project.version}</version>
+      <type>war</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-webapp</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-servlet</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-http</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-io</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-security</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-continuation</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-xml</artifactId>
+      <version>${jetty.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-api-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+      <scope>test</scope>
+    </dependency>    
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jsp-2.1-glassfish</artifactId>
+      <version>${glassfish.version}</version>
+      <scope>test</scope>
+    </dependency>
+    
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <version>${slf4j.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+</project>

diff --git a/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java b/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
index 949e847..e341c68 100644
--- a/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java
+++ b/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/WebcrawlerConnector.java

@@ -34,6 +34,7 @@
 import org.apache.http.HttpException;
 
 import java.io.*;
+import java.nio.charset.StandardCharsets;
 import java.util.*;
 import java.net.*;
 import java.util.regex.*;
@@ -788,7 +789,7 @@
                         String contentType = extractContentType(connection.getResponseHeader("Content-Type"));
                         String encoding = extractEncoding(contentType);
                         if (encoding == null)
-                          encoding = "utf-8";
+                          encoding = StandardCharsets.UTF_8.name();
                         String decodedResponse = "undecodable";
                         try
                         {
@@ -1331,7 +1332,7 @@
         // Leave document in jobqueue, but do NOT get rid of it, or we will wind up seeing it queued again by
         // somebody else.  We *do* have to signal the document to be removed from the index, however, or it will
         // stick around until the job is deleted.
-        activities.deleteDocument(documentIdentifier,version);
+        activities.noDocument(documentIdentifier,version);
         continue;
       }
 
@@ -1465,7 +1466,7 @@
           // We do this by using a null url and a null repository document.  If a document with this identifier was
           // previously indexed, it will be removed.
           
-          activities.deleteDocument(documentIdentifier,version);
+          activities.noDocument(documentIdentifier,version);
           
           if (Logging.connectors.isDebugEnabled())
             Logging.connectors.debug("WEB: Decided not to ingest '"+documentIdentifier+"' because it did not match ingestability criteria");
@@ -6993,7 +6994,7 @@
       String contentType = extractContentType(cache.getContentType(documentURI));
       String encoding = extractEncoding(contentType);
       if (encoding == null)
-        encoding = "utf-8";
+        encoding = StandardCharsets.UTF_8.name();
       
       // Search for A HREF tags in the document stream.  This is brain-dead link location
       InputStream is = cache.getData(documentURI);
@@ -7149,9 +7150,9 @@
             String nextString = is.readLine();
             if (nextString == null)
               break;
+            nextString = nextString.trim();
             if (nextString.length() == 0)
               continue;
-            nextString.trim();
             if (nextString.startsWith("#"))
               continue;
             list.add(nextString);

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
index 9dca5be..f834da4 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/IncrementalIngester.java

@@ -42,6 +42,7 @@
 * <tr><td>id</td><td>BIGINT</td><td>Primary Key</td></tr>
 * <tr><td>connectionname</td><td>VARCHAR(32)</td><td>Reference:outputconnections.connectionname</td></tr>
 * <tr><td>dockey</td><td>VARCHAR(73)</td><td></td></tr>
+* <tr><td>componenthash</td><td>VARCHAR(40)</td><td></td></tr>
 * <tr><td>docuri</td><td>LONGTEXT</td><td></td></tr>
 * <tr><td>urihash</td><td>VARCHAR(40)</td><td></td></tr>
 * <tr><td>lastversion</td><td>LONGTEXT</td><td></td></tr>
@@ -64,6 +65,7 @@
   protected final static String idField = "id";
   protected final static String outputConnNameField = "connectionname";
   protected final static String docKeyField = "dockey";
+  protected final static String componentHashField = "componenthash";
   protected final static String docURIField = "docuri";
   protected final static String uriHashField = "urihash";
   protected final static String lastVersionField = "lastversion";
@@ -122,6 +124,7 @@
         map.put(idField,new ColumnDescription("BIGINT",true,false,null,null,false));
         map.put(outputConnNameField,new ColumnDescription("VARCHAR(32)",false,false,outputConnectionTableName,outputConnectionNameField,false));
         map.put(docKeyField,new ColumnDescription("VARCHAR(73)",false,false,null,null,false));
+        map.put(componentHashField,new ColumnDescription("VARCHAR(40)",false,true,null,null,false));
         // The document URI field, if null, indicates that the document was not actually ingested!
         // This happens when a connector wishes to keep track of a version string, but not actually ingest the doc.
         map.put(docURIField,new ColumnDescription("LONGTEXT",false,true,null,null,false));
@@ -156,10 +159,18 @@
           performAlter(addMap,null,null,null);
         }
 
+        cd = (ColumnDescription)existing.get(componentHashField);
+        if (cd == null)
+        {
+          Map<String,ColumnDescription> addMap = new HashMap<String,ColumnDescription>();
+          addMap.put(componentHashField,new ColumnDescription("VARCHAR(40)",false,true,null,null,false));
+          performAlter(addMap,null,null,null);
+        }
+
       }
 
       // Now, do indexes
-      IndexDescription keyIndex = new IndexDescription(true,new String[]{docKeyField,outputConnNameField});
+      IndexDescription keyIndex = new IndexDescription(true,new String[]{docKeyField,outputConnNameField,componentHashField});
       IndexDescription uriHashIndex = new IndexDescription(false,new String[]{uriHashField,outputConnNameField});
       IndexDescription outputConnIndex = new IndexDescription(false,new String[]{outputConnNameField});
 
@@ -228,26 +239,22 @@
     int count = pipelineSpecificationBasic.getOutputCount();
     if (count == 0)
       return null;
-    return pipelineSpecificationBasic.getStageConnectionName(count-1);
+    return pipelineSpecificationBasic.getStageConnectionName(pipelineSpecificationBasic.getOutputStage(count-1));
   }
 
-  /** Check if a mime type is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param mimeType is the mime type to check.
-  *@return true if the mimeType is indexable.
+  /** From a pipeline specification, get the name of the output connection that will be indexed first
+  * in the pipeline.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
+  *@return the first indexed output connection name.
   */
   @Override
-  @Deprecated
-  public boolean checkMimeTypeIndexable(String outputConnectionName, String outputDescription, String mimeType)
-    throws ManifoldCFException, ServiceInterruption
+  public String getFirstIndexedOutputConnectionName(IPipelineSpecificationBasic pipelineSpecificationBasic)
   {
-    return checkMimeTypeIndexable(
-      new RuntPipelineSpecification(outputConnectionName,outputDescription),
-      mimeType,null);
+    if (pipelineSpecificationBasic.getOutputCount() == 0)
+      return null;
+    return pipelineSpecificationBasic.getStageConnectionName(pipelineSpecificationBasic.getOutputStage(0));
   }
 
-  
   /** Check if a mime type is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param mimeType is the mime type to check.
@@ -277,22 +284,6 @@
   }
 
   /** Check if a file is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param localFile is the local file to check.
-  *@return true if the local file is indexable.
-  */
-  @Override
-  @Deprecated
-  public boolean checkDocumentIndexable(String outputConnectionName, String outputDescription, File localFile)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    return checkDocumentIndexable(
-      new RuntPipelineSpecification(outputConnectionName,outputDescription),
-      localFile,null);
-  }
-  
-  /** Check if a file is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param localFile is the local file to check.
   *@param activity are the activities available to this method.
@@ -322,23 +313,6 @@
 
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that are too long to be indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param length is the length of the document.
-  *@return true if the file is indexable.
-  */
-  @Override
-  @Deprecated
-  public boolean checkLengthIndexable(String outputConnectionName, String outputDescription, long length)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    return checkLengthIndexable(
-      new RuntPipelineSpecification(outputConnectionName,outputDescription),
-      length,null);
-  }
-  
-  /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that are too long to be indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param length is the length of the document.
   *@param activity are the activities available to this method.
@@ -368,23 +342,6 @@
 
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that not indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param url is the url of the document.
-  *@return true if the file is indexable.
-  */
-  @Override
-  @Deprecated
-  public boolean checkURLIndexable(String outputConnectionName, String outputDescription, String url)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    return checkURLIndexable(
-      new RuntPipelineSpecification(outputConnectionName,outputDescription),
-      url,null);
-  }
-  
-  /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that not indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param url is the url of the document.
   *@param activity are the activities available to this method.
@@ -518,7 +475,7 @@
   *@return the description string.
   */
   @Override
-  public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
+  public VersionContext getOutputDescription(String outputConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
     IOutputConnection connection = connectionManager.load(outputConnectionName);
@@ -542,7 +499,8 @@
   *@param spec is the transformation specification.
   *@return the description string.
   */
-  public String getTransformationDescription(String transformationConnectionName, OutputSpecification spec)
+  @Override
+  public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
     ITransformationConnection connection = transformationConnectionManager.load(transformationConnectionName);
@@ -577,12 +535,11 @@
     String newParameterVersion,
     String newAuthorityNameString)
   {
+    if (newAuthorityNameString == null)
+      newAuthorityNameString = "";
     IPipelineSpecification pipelineSpecification = pipelineSpecificationWithVersions.getPipelineSpecification();
     IPipelineSpecificationBasic basicSpecification = pipelineSpecification.getBasicPipelineSpecification();
-    // Empty document version has a special meaning....
-    if (newDocumentVersion.length() == 0)
-      return true;
-    // Otherwise, cycle through the outputs
+    // Cycle through the outputs
     for (int i = 0; i < basicSpecification.getOutputCount(); i++)
     {
       int stage = basicSpecification.getOutputStage(i);
@@ -597,7 +554,7 @@
       if (!oldDocumentVersion.equals(newDocumentVersion) ||
         !oldParameterVersion.equals(newParameterVersion) ||
         !oldAuthorityName.equals(newAuthorityNameString) ||
-        !oldOutputVersion.equals(pipelineSpecification.getStageDescriptionString(stage)))
+        !oldOutputVersion.equals(pipelineSpecification.getStageDescriptionString(stage).getVersionString()))
         return true;
       
       // Everything matches so far.  Next step is to compute a transformation path an corresponding version string.
@@ -639,7 +596,7 @@
       if (newStage == -1)
         break;
       stageNames[stageCount] = basicSpecification.getStageConnectionName(newStage);
-      stageDescriptions[stageCount] = pipelineSpecification.getStageDescriptionString(newStage);
+      stageDescriptions[stageCount] = pipelineSpecification.getStageDescriptionString(newStage).getVersionString();
       stageCount++;
       currentStage = newStage;
     }
@@ -675,335 +632,101 @@
   }
 
   /** Record a document version, but don't ingest it.
-  * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
-  * ServiceInterruption is thrown if this action must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
-  *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
-  */
-  @Override
-  @Deprecated
-  public void documentRecord(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion,
-    long recordTime, IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    documentRecord(
-      new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClass, identifierHash,
-      documentVersion,
-      recordTime, activities);
-  }
-  
-  /** Record a document version, but don't ingest it.
-  * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
-  * ServiceInterruption is thrown if this action must be rescheduled.
+  * The purpose of this method is to update document version information without reindexing the document.
   *@param pipelineSpecificationBasic is the basic pipeline specification needed.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
   *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
-  *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
   */
   @Override
   public void documentRecord(
     IPipelineSpecificationBasic pipelineSpecificationBasic,
-    String identifierClass, String identifierHash,
-    String documentVersion, long recordTime,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption
+    String identifierClass, String identifierHash, String componentHash,
+    String documentVersion, long recordTime)
+    throws ManifoldCFException
   {
+    // This method is called when a connector decides that the last indexed version of the document is in fact just fine,
+    // but the document version information should be updated.
+    // The code pathway is therefore similar to that of document indexing, EXCEPT that no indexing will ever
+    // take place.  This has some interesting side effects.  For example:
+    // (1) In the case of a document collision with another job using the same repository connection, the last document
+    //    indexed cannot be changed.  Updating the version string for the document would therefore be misleading.  This
+    //    case should be detected and prevented from occurring, by refusing to perform the update.
+    //    On the other hand, only one thread at a time can be processing the document at a given time, and therefore
+    //    since the connector detected "no change", we are safe to presume we can just update the version info.
+    // (2) In the case of a URL conflict with another job, since nothing changes and no new URL is recorded, no cleanup
+    //    of conflicting records sharing the same URL should be needed.
+    
     String docKey = makeKey(identifierClass,identifierHash);
 
     String[] outputConnectionNames = extractOutputConnectionNames(pipelineSpecificationBasic);
-    IOutputConnection[] outputConnections = connectionManager.loadMultiple(outputConnectionNames);
 
     if (Logging.ingest.isDebugEnabled())
     {
-      Logging.ingest.debug("Recording document '"+docKey+"' for output connections '"+outputConnectionNames+"'");
+      Logging.ingest.debug("Recording document '"+docKey+"' component hash "+((componentHash==null)?"(None)":("'"+componentHash+"'"))+" for output connections '"+outputConnectionNames+"'");
     }
 
     for (int k = 0; k < outputConnectionNames.length; k++)
     {
       String outputConnectionName = outputConnectionNames[k];
-      IOutputConnection connection = outputConnections[k];
 
-      String oldURI = null;
-      String oldURIHash = null;
-      String oldOutputVersion = null;
-
-      // Repeat if needed
-      while (true)
-      {
-        long sleepAmt = 0L;
-        try
-        {
-          // See what uri was used before for this doc, if any
-          ArrayList list = new ArrayList();
-          String query = buildConjunctionClause(list,new ClauseDescription[]{
-            new UnitaryClause(docKeyField,docKey),
-            new UnitaryClause(outputConnNameField,outputConnectionName)});
-            
-          IResultSet set = performQuery("SELECT "+docURIField+","+uriHashField+","+lastOutputVersionField+" FROM "+getTableName()+
-            " WHERE "+query,list,null,null);
-
-          if (set.getRowCount() > 0)
-          {
-            IResultRow row = set.getRow(0);
-            oldURI = (String)row.getValue(docURIField);
-            oldURIHash = (String)row.getValue(uriHashField);
-            oldOutputVersion = (String)row.getValue(lastOutputVersionField);
-          }
-          
-          break;
-        }
-        catch (ManifoldCFException e)
-        {
-          // Look for deadlock and retry if so
-          if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
-          {
-            if (Logging.perf.isDebugEnabled())
-              Logging.perf.debug("Aborted select looking for status: "+e.getMessage());
-            sleepAmt = getSleepAmt();
-            continue;
-          }
-          throw e;
-        }
-        finally
-        {
-          sleepFor(sleepAmt);
-        }
-      }
-
-      // If uri hashes collide, then we must be sure to eliminate only the *correct* records from the table, or we will leave
-      // dangling documents around.  So, all uri searches and comparisons MUST compare the actual uri as well.
-
-      // But, since we need to insure that any given URI is only worked on by one thread at a time, use critical sections
-      // to block the rare case that multiple threads try to work on the same URI.
-      
-      String[] lockArray = computeLockArray(null,oldURI,outputConnectionName);
-      lockManager.enterLocks(null,null,lockArray);
-      try
-      {
-
-        ArrayList list = new ArrayList();
-        
-        if (oldURI != null)
-        {
-          IOutputConnector connector = outputConnectorPool.grab(connection);
-          if (connector == null)
-            // The connector is not installed; treat this as a service interruption.
-            throw new ServiceInterruption("Output connector not installed",0L);
-          try
-          {
-            connector.removeDocument(oldURI,oldOutputVersion,new OutputRemoveActivitiesWrapper(activities,outputConnectionName));
-          }
-          finally
-          {
-            outputConnectorPool.release(connection,connector);
-          }
-          // Delete all records from the database that match the old URI, except for THIS record.
-          list.clear();
-          String query = buildConjunctionClause(list,new ClauseDescription[]{
-            new UnitaryClause(uriHashField,"=",oldURIHash),
-            new UnitaryClause(outputConnNameField,outputConnectionName)});
-          list.add(docKey);
-          performDelete("WHERE "+query+" AND "+docKeyField+"!=?",list,null);
-        }
-
-        // If we get here, it means we are noting that the document was examined, but that no change was required.  This is signaled
-        // to noteDocumentIngest by having the null documentURI.
-        noteDocumentIngest(outputConnectionName,docKey,documentVersion,null,null,null,null,recordTime,null,null);
-      }
-      finally
-      {
-        lockManager.leaveLocks(null,null,lockArray);
-      }
+      // If we get here, it means we are noting that the document was examined, but that no change was required.  This is signaled
+      // to noteDocumentIngest by having the null documentURI.
+      noteDocumentIngest(outputConnectionName,docKey,componentHash,documentVersion,null,null,null,null,recordTime,null,null);
     }
   }
 
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
+  /** Remove a document from specified indexes, just as if an empty document
+  * was indexed, and record the necessary version information.
+  * This method is conceptually similar to documentIngest(), but does not actually take
+  * a document or allow it to be transformed.  If there is a document already
+  * indexed, it is removed from the index.
+  *@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
+  *@param parameterVersion is the version string for the forced parameters.
   *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
+  *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
   *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
   */
   @Override
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
+  public void documentNoData(
+    IPipelineSpecificationWithVersions pipelineSpecificationWithVersions,
+    String identifierClass, String identifierHash, String componentHash,
     String documentVersion,
-    String outputVersion,
-    String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    return documentIngest(outputConnectionName,
-      identifierClass,
-      identifierHash,
-      documentVersion,
-      outputVersion,
-      null,
-      authorityName,
-      data,
-      ingestTime,
-      documentURI,
-      activities);
-  }
-  
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param parameterVersion is the forced parameter version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
-  *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
-  *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
-  */
-  @Override
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion,
-    String outputVersion,
     String parameterVersion,
     String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
+    long recordTime,
     IOutputActivity activities)
     throws ManifoldCFException, ServiceInterruption
   {
+    PipelineConnectionsWithVersions pipelineConnectionsWithVersions = new PipelineConnectionsWithVersions(pipelineSpecificationWithVersions);
+    
+    String docKey = makeKey(identifierClass,identifierHash);
+
+    if (Logging.ingest.isDebugEnabled())
+    {
+      Logging.ingest.debug("Logging empty document '"+docKey+"' component hash "+((componentHash==null)?"(None)":("'"+componentHash+"'"))+" into output connections '"+extractOutputConnectionNames(pipelineSpecificationWithVersions.getPipelineSpecification().getBasicPipelineSpecification())+"'");
+    }
+
+    // Set up a pipeline
+    PipelineObjectWithVersions pipeline = pipelineGrabWithVersions(pipelineConnectionsWithVersions);
+    if (pipeline == null)
+      // A connector is not installed; treat this as a service interruption.
+      throw new ServiceInterruption("Pipeline connector not installed",0L);
     try
     {
-      return documentIngest(
-        new RuntPipelineSpecificationWithVersions(outputConnectionName,outputVersion,
-          "","","","",""),
-        identifierClass, identifierHash,
-        documentVersion,
-        parameterVersion,
-        authorityName,
-        data,
-        ingestTime, documentURI,
-        activities);
+      pipeline.noDocument(docKey,componentHash,documentVersion,parameterVersion,authorityName,activities,recordTime);
     }
-    catch (IOException e)
+    finally
     {
-      handleIOException(e,"fetching");
-      return false;
+      pipeline.release();
     }
   }
-  
-  // Standard handling for IOExceptions from reading data
-  protected final static long interruptionRetryTime = 5L*60L*1000L;
-  protected static void handleIOException(IOException e, String context)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    if ((e instanceof InterruptedIOException) && (!(e instanceof java.net.SocketTimeoutException)))
-      throw new ManifoldCFException(e.getMessage(), ManifoldCFException.INTERRUPTED);
-
-    long currentTime = System.currentTimeMillis();
-    
-    if (e instanceof java.net.ConnectException)
-    {
-      // Server isn't up at all.  Try for a brief time then give up.
-      String message = "Server could not be contacted during "+context+": "+e.getMessage();
-      Logging.ingest.warn(message,e);
-      throw new ServiceInterruption(message,
-        e,
-        currentTime + interruptionRetryTime,
-        -1L,
-        3,
-        true);
-    }
-    
-    if (e instanceof java.net.SocketTimeoutException)
-    {
-      String message2 = "Socket timeout exception during "+context+": "+e.getMessage();
-      Logging.ingest.warn(message2,e);
-      throw new ServiceInterruption(message2,
-        e,
-        currentTime + interruptionRetryTime,
-        currentTime + 20L * 60000L,
-        -1,
-        false);
-    }
-      
-    if (e.getClass().getName().equals("java.net.SocketException"))
-    {
-      // In the past we would have treated this as a straight document rejection, and
-      // treated it in the same manner as a 400.  The reasoning is that the server can
-      // perfectly legally send out a 400 and drop the connection immediately thereafter,
-      // this a race condition.
-      // However, Solr 4.0 (or the Jetty version that the example runs on) seems
-      // to have a bug where it drops the connection when two simultaneous documents come in
-      // at the same time.  This is the final version of Solr 4.0 so we need to deal with
-      // this.
-      if (e.getMessage().toLowerCase(Locale.ROOT).indexOf("broken pipe") != -1 ||
-        e.getMessage().toLowerCase(Locale.ROOT).indexOf("connection reset") != -1 ||
-        e.getMessage().toLowerCase(Locale.ROOT).indexOf("target server failed to respond") != -1)
-      {
-        // Treat it as a service interruption, but with a limited number of retries.
-        // In that way we won't burden the user with a huge retry interval; it should
-        // give up fairly quickly, and yet NOT give up if the error was merely transient
-        String message = "Server dropped connection during "+context+": "+e.getMessage();
-        Logging.ingest.warn(message,e);
-        throw new ServiceInterruption(message,
-          e,
-          currentTime + interruptionRetryTime,
-          -1L,
-          3,
-          false);
-      }
-      
-      // Other socket exceptions are service interruptions - but if we keep getting them, it means 
-      // that a socket timeout is probably set too low to accept this particular document.  So
-      // we retry for a while, then skip the document.
-      String message2 = "Socket exception during "+context+": "+e.getMessage();
-      Logging.ingest.warn(message2,e);
-      throw new ServiceInterruption(message2,
-        e,
-        currentTime + interruptionRetryTime,
-        currentTime + 20L * 60000L,
-        -1,
-        false);
-    }
-
-    // Otherwise, no idea what the trouble is, so presume that retries might fix it.
-    String message3 = "IO exception during "+context+": "+e.getMessage();
-    Logging.ingest.warn(message3,e);
-    throw new ServiceInterruption(message3,
-      e,
-      currentTime + interruptionRetryTime,
-      currentTime + 2L * 60L * 60000L,
-      -1,
-      true);
-  }
 
   /** Ingest a document.
   * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
@@ -1013,6 +736,7 @@
   *@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
   *@param parameterVersion is the version string for the forced parameters.
   *@param authorityName is the name of the authority associated with the document, if any.
@@ -1026,11 +750,11 @@
   @Override
   public boolean documentIngest(
     IPipelineSpecificationWithVersions pipelineSpecificationWithVersions,
-    String identifierClass, String identifierHash,
+    String identifierClass, String identifierHash, String componentHash,
     String documentVersion,
     String parameterVersion,
     String authorityName,
-    RepositoryDocument document,
+    RepositoryDocument data,
     long ingestTime, String documentURI,
     IOutputActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
@@ -1041,11 +765,11 @@
 
     if (Logging.ingest.isDebugEnabled())
     {
-      Logging.ingest.debug("Ingesting document '"+docKey+"' into output connections '"+extractOutputConnectionNames(pipelineSpecificationWithVersions.getPipelineSpecification().getBasicPipelineSpecification())+"'");
+      Logging.ingest.debug("Ingesting document '"+docKey+"' component hash "+((componentHash==null)?"(None)":("'"+componentHash+"'"))+" into output connections '"+extractOutputConnectionNames(pipelineSpecificationWithVersions.getPipelineSpecification().getBasicPipelineSpecification())+"'");
     }
 
     // Set indexing date
-    document.setIndexingDate(new Date());
+    data.setIndexingDate(new Date());
     
     // Set up a pipeline
     PipelineObjectWithVersions pipeline = pipelineGrabWithVersions(pipelineConnectionsWithVersions);
@@ -1054,7 +778,7 @@
       throw new ServiceInterruption("Pipeline connector not installed",0L);
     try
     {
-      return pipeline.addOrReplaceDocumentWithException(docKey,documentURI,document,documentVersion,parameterVersion,authorityName,activities,ingestTime) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
+      return pipeline.addOrReplaceDocumentWithException(docKey,componentHash,documentURI,data,documentVersion,parameterVersion,authorityName,activities,ingestTime) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
     }
     finally
     {
@@ -1062,24 +786,27 @@
     }
   }
 
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes are the set of document identifier hashes.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
+  /** Remove a document component from the search engine index.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
+  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
+  *@param identifierHash is the hash of the id of the document.
+  *@param componentHash is the hashed component identifier, if any.
+  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
   */
   @Override
-  @Deprecated
-  public void documentCheckMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    long checkTime)
-    throws ManifoldCFException
+  public void documentRemove(
+    IPipelineSpecificationBasic pipelineSpecificationBasic,
+    String identifierClass, String identifierHash, String componentHash,
+    IOutputRemoveActivity activities)
+    throws ManifoldCFException, ServiceInterruption
   {
-    documentCheckMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClasses,identifierHashes,checkTime);
+    documentRemoveMultiple(pipelineSpecificationBasic,
+      new String[]{identifierClass},
+      new String[]{identifierHash},
+      componentHash,
+      activities);
   }
-  
+
   protected static String[] extractOutputConnectionNames(IPipelineSpecificationBasic pipelineSpecificationBasic)
   {
     String[] rval = new String[pipelineSpecificationBasic.getOutputCount()];
@@ -1178,24 +905,6 @@
 
   /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
   * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
-  */
-  @Override
-  @Deprecated
-  public void documentCheck(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    long checkTime)
-    throws ManifoldCFException
-  {
-    documentCheck(new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClass,identifierHash,checkTime);
-  }
-  
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
   *@param pipelineSpecificationBasic is a basic pipeline specification.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
@@ -1234,28 +943,6 @@
 
 
   /** Delete multiple documents from the search engine index.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Override
-  @Deprecated
-  public void documentDeleteMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    IPipelineSpecificationBasic[] pipelineSpecs = new IPipelineSpecificationBasic[outputConnectionNames.length];
-    for (int i = 0; i < pipelineSpecs.length; i++)
-    {
-      pipelineSpecs[i] = new RuntPipelineSpecificationBasic(outputConnectionNames[i]);
-    }
-    documentDeleteMultiple(pipelineSpecs,
-      identifierClasses,identifierHashes,activities);
-  }
-  
-  /** Delete multiple documents from the search engine index.
   *@param pipelineSpecificationBasics are the pipeline specifications associated with the documents.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -1302,23 +989,6 @@
   }
 
   /** Delete multiple documents from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Override
-  @Deprecated
-  public void documentDeleteMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    documentDeleteMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClasses,identifierHashes,activities);
-  }
-  
-  /** Delete multiple documents from the search engine index.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -1529,6 +1199,218 @@
     }
   }
 
+  /** Remove multiple document components from the search engine index.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
+  *@param identifierClasses are the names of the spaces in which the identifier hash should be interpreted.
+  *@param identifierHashes are the hashes of the ids of the documents.
+  *@param componentHash is the hashed component identifier, if any.
+  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
+  */
+  @Override
+  public void documentRemoveMultiple(
+    IPipelineSpecificationBasic pipelineSpecificationBasic,
+    String[] identifierClasses, String[] identifierHashes, String componentHash,
+    IOutputRemoveActivity activities)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    String[] outputConnectionNames = extractOutputConnectionNames(pipelineSpecificationBasic);
+    // Load connection managers up front to save time
+    IOutputConnection[] outputConnections = connectionManager.loadMultiple(outputConnectionNames);
+    
+    // No transactions here, so we can cycle through the connection names one at a time
+    for (int z = 0; z < outputConnectionNames.length; z++)
+    {
+      String outputConnectionName = outputConnectionNames[z];
+      IOutputConnection connection = outputConnections[z];
+
+      activities = new OutputRemoveActivitiesWrapper(activities,outputConnectionName);
+
+      if (Logging.ingest.isDebugEnabled())
+      {
+        for (int i = 0; i < identifierHashes.length; i++)
+        {
+          Logging.ingest.debug("Request to remove document '"+makeKey(identifierClasses[i],identifierHashes[i])+"' component hash "+((componentHash==null)?"(None)":("'"+componentHash+"'"))+" from output connection '"+outputConnectionName+"'");
+        }
+      }
+
+      // No transactions.  Time for the operation may exceed transaction timeout.
+
+      // Obtain the current URIs of all of these.
+      DeleteInfo[] uris = getDocumentURIMultiple(outputConnectionName,identifierClasses,identifierHashes,componentHash);
+
+      // Grab critical section locks so that we can't attempt to ingest at the same time we are deleting.
+      // (This guarantees that when this operation is complete the database reflects reality.)
+      int validURIcount = 0;
+      for (int i = 0; i < uris.length; i++)
+      {
+        if (uris[i] != null && uris[i].getURI() != null)
+          validURIcount++;
+      }
+      String[] lockArray = new String[validURIcount];
+      String[] validURIArray = new String[validURIcount];
+      validURIcount = 0;
+      for (int i = 0; i < uris.length; i++)
+      {
+        if (uris[i] != null && uris[i].getURI() != null)
+        {
+          validURIArray[validURIcount] = uris[i].getURI();
+          lockArray[validURIcount] = outputConnectionName+":"+validURIArray[validURIcount];
+          validURIcount++;
+        }
+      }
+
+      lockManager.enterLocks(null,null,lockArray);
+      try
+      {
+        // Fetch the document URIs for the listed documents
+        for (int i = 0; i < uris.length; i++)
+        {
+          if (uris[i] != null && uris[i].getURI() != null)
+            removeDocument(connection,uris[i].getURI(),uris[i].getOutputVersion(),activities);
+        }
+
+        // Now, get rid of all rows that match the given uris.
+        // Do the queries together, then the deletes
+        beginTransaction();
+        try
+        {
+          // The basic process is this:
+          // 1) Come up with a set of urihash values
+          // 2) Find the matching, corresponding id values
+          // 3) Delete the rows corresponding to the id values, in sequence
+
+          // Process (1 & 2) has to be broken down into chunks that contain the maximum
+          // number of doc hash values each.  We need to avoid repeating doc hash values,
+          // so the first step is to come up with ALL the doc hash values before looping
+          // over them.
+
+          int maxClauses;
+          
+          // Find all the documents that match this set of URIs
+          Set<String> docURIHashValues = new HashSet<String>();
+          Set<String> docURIValues = new HashSet<String>();
+          for (String docDBString : validURIArray)
+          {
+            String docDBHashString = ManifoldCF.hash(docDBString);
+            docURIValues.add(docDBString);
+            docURIHashValues.add(docDBHashString);
+          }
+
+          // Now, perform n queries, each of them no larger the maxInClause in length.
+          // Create a list of row id's from this.
+          Set<Long> rowIDSet = new HashSet<Long>();
+          Iterator<String> iter = docURIHashValues.iterator();
+          int j = 0;
+          List<String> hashList = new ArrayList<String>();
+          maxClauses = maxClausesRowIdsForURIs(outputConnectionName);
+          while (iter.hasNext())
+          {
+            if (j == maxClauses)
+            {
+              findRowIdsForURIs(outputConnectionName,rowIDSet,docURIValues,hashList);
+              hashList.clear();
+              j = 0;
+            }
+            hashList.add(iter.next());
+            j++;
+          }
+
+          if (j > 0)
+            findRowIdsForURIs(outputConnectionName,rowIDSet,docURIValues,hashList);
+
+          // Next, go through the list of row IDs, and delete them in chunks
+          j = 0;
+          List<Long> list = new ArrayList<Long>();
+          Iterator<Long> iter2 = rowIDSet.iterator();
+          maxClauses = maxClausesDeleteRowIds();
+          while (iter2.hasNext())
+          {
+            if (j == maxClauses)
+            {
+              deleteRowIds(list);
+              list.clear();
+              j = 0;
+            }
+            list.add(iter2.next());
+            j++;
+          }
+
+          if (j > 0)
+            deleteRowIds(list);
+
+          // Now, find the set of documents that remain that match the document identifiers.
+          Set<String> docIdValues = new HashSet<String>();
+          for (int i = 0; i < identifierHashes.length; i++)
+          {
+            String docDBString = makeKey(identifierClasses[i],identifierHashes[i]);
+            docIdValues.add(docDBString);
+          }
+
+          // Now, perform n queries, each of them no larger the maxInClause in length.
+          // Create a list of row id's from this.
+          rowIDSet.clear();
+          iter = docIdValues.iterator();
+          j = 0;
+          List<String> list2 = new ArrayList<String>();
+          maxClauses = maxClausesRowIdsForDocIds(outputConnectionName,componentHash);
+          while (iter.hasNext())
+          {
+            if (j == maxClauses)
+            {
+              findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
+              list2.clear();
+              j = 0;
+            }
+            list2.add(iter.next());
+            j++;
+          }
+
+          if (j > 0)
+            findRowIdsForDocIds(outputConnectionName,rowIDSet,list2,componentHash);
+
+          // Next, go through the list of row IDs, and delete them in chunks
+          j = 0;
+          list.clear();
+          iter2 = rowIDSet.iterator();
+          maxClauses = maxClausesDeleteRowIds();
+          while (iter2.hasNext())
+          {
+            if (j == maxClauses)
+            {
+              deleteRowIds(list);
+              list.clear();
+              j = 0;
+            }
+            list.add(iter2.next());
+            j++;
+          }
+
+          if (j > 0)
+            deleteRowIds(list);
+
+        }
+        catch (ManifoldCFException e)
+        {
+          signalRollback();
+          throw e;
+        }
+        catch (Error e)
+        {
+          signalRollback();
+          throw e;
+        }
+        finally
+        {
+          endTransaction();
+        }
+      }
+      finally
+      {
+        lockManager.leaveLocks(null,null,lockArray);
+      }
+    }
+  }
+
   /** Calculate the clauses.
   */
   protected int maxClausesRowIdsForURIs(String outputConnectionName)
@@ -1574,6 +1456,15 @@
       new UnitaryClause(outputConnNameField,outputConnectionName)});
   }
 
+    /** Calculate the maximum number of doc ids we should use.
+  */
+  protected int maxClausesRowIdsForDocIds(String outputConnectionName, String componentHash)
+  {
+    return findConjunctionClauseMax(new ClauseDescription[]{
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      new UnitaryClause(componentHashField,componentHash)});
+  }
+
   /** Calculate the maximum number of doc ids we should use.
   */
   protected int maxClausesRowIdsForDocIds(String[] outputConnectionNames)
@@ -1607,6 +1498,29 @@
   /** Given values and parameters corresponding to a set of hash values, add corresponding
   * table row id's to the output map.
   */
+  protected void findRowIdsForDocIds(String outputConnectionName, Set<Long> rowIDSet, List<String> paramValues, String componentHash)
+    throws ManifoldCFException
+  {
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new MultiClause(docKeyField,paramValues),
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      new UnitaryClause(componentHashField,componentHash)});
+      
+    IResultSet set = performQuery("SELECT "+idField+" FROM "+
+      getTableName()+" WHERE "+query,list,null,null);
+    
+    for (int i = 0; i < set.getRowCount(); i++)
+    {
+      IResultRow row = set.getRow(i);
+      Long rowID = (Long)row.getValue(idField);
+      rowIDSet.add(rowID);
+    }
+  }
+
+  /** Given values and parameters corresponding to a set of hash values, add corresponding
+  * table row id's to the output map.
+  */
   protected void findRowIdsForDocIds(String[] outputConnectionNames, Set<Long> rowIDSet, List<String> paramValues)
     throws ManifoldCFException
   {
@@ -1645,23 +1559,6 @@
   }
 
   /** Delete a document from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Override
-  @Deprecated
-  public void documentDelete(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption
-  {
-    documentDelete(new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClass,identifierHash,activities);
-  }
-  
-  /** Delete a document from the search engine index.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hash of the id of the document.
@@ -1731,6 +1628,63 @@
     }
   }
 
+  /** Find out what URIs a SET of document URIs are currently ingested.
+  *@param outputConnectionName is the output connection name.
+  *@param identifierClasses is the array of identifier classes.
+  *@param identifierHashes is the array of document id's to check.
+  *@param componentHash is the component hash to check.
+  *@return the array of current document uri's.  Null returned for identifiers
+  * that don't exist in the index.
+  */
+  protected DeleteInfo[] getDocumentURIMultiple(String outputConnectionName, String[] identifierClasses, String[] identifierHashes, String componentHash)
+    throws ManifoldCFException
+  {
+    DeleteInfo[] rval = new DeleteInfo[identifierHashes.length];
+    Map<String,Integer> map = new HashMap<String,Integer>();
+    for (int i = 0; i < identifierHashes.length; i++)
+    {
+      map.put(makeKey(identifierClasses[i],identifierHashes[i]),new Integer(i));
+      rval[i] = null;
+    }
+
+    beginTransaction();
+    try
+    {
+      List<String> list = new ArrayList<String>();
+      int maxCount = maxClauseDocumentURIChunk(outputConnectionName,componentHash);
+      int j = 0;
+      Iterator<String> iter = map.keySet().iterator();
+      while (iter.hasNext())
+      {
+        if (j == maxCount)
+        {
+          getDocumentURIChunk(rval,map,outputConnectionName,list,componentHash);
+          j = 0;
+          list.clear();
+        }
+        list.add(iter.next());
+        j++;
+      }
+      if (j > 0)
+        getDocumentURIChunk(rval,map,outputConnectionName,list,componentHash);
+      return rval;
+    }
+    catch (ManifoldCFException e)
+    {
+      signalRollback();
+      throw e;
+    }
+    catch (Error e)
+    {
+      signalRollback();
+      throw e;
+    }
+    finally
+    {
+      endTransaction();
+    }
+  }
+
   /** Look up ingestion data for a set of documents.
   *@param rval is a map of output key to document data, in no particular order, which will be loaded with all matching results.
   *@param pipelineSpecificationBasics are the pipeline specifications corresponding to the identifier classes and hashes.
@@ -1739,7 +1693,7 @@
   */
   @Override
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic[] pipelineSpecificationBasics,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
@@ -1784,7 +1738,7 @@
   */
   @Override
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
@@ -1842,7 +1796,7 @@
   *@param clause is the in clause for the query.
   *@param list is the parameter list for the query.
   */
-  protected void getPipelineDocumentIngestDataChunk(Map<OutputKey,DocumentIngestStatus> rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
+  protected void getPipelineDocumentIngestDataChunk(IngestStatuses rval, Map<String,Integer> map, String[] outputConnectionNames, List<String> list,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException
   {
@@ -1852,7 +1806,7 @@
       new MultiClause(outputConnNameField,outputConnectionNames)});
       
     // Get the primary records associated with this hash value
-    IResultSet set = performQuery("SELECT "+idField+","+outputConnNameField+","+docKeyField+","+lastVersionField+","+lastOutputVersionField+","+authorityNameField+","+forcedParamsField+","+lastTransformationVersionField+
+    IResultSet set = performQuery("SELECT "+idField+","+outputConnNameField+","+docKeyField+","+componentHashField+","+lastVersionField+","+lastOutputVersionField+","+authorityNameField+","+forcedParamsField+","+lastTransformationVersionField+
       " FROM "+getTableName()+" WHERE "+query,newList,null,null);
 
     // Now, go through the original request once more, this time building the result
@@ -1865,6 +1819,7 @@
       {
         Long id = (Long)row.getValue(idField);
         String outputConnectionName = (String)row.getValue(outputConnNameField);
+        String componentHash = (String)row.getValue(componentHashField);
         String lastVersion = (String)row.getValue(lastVersionField);
         if (lastVersion == null)
           lastVersion = "";
@@ -1881,8 +1836,8 @@
         if (authorityName == null)
           authorityName = "";
         int indexValue = position.intValue();
-        rval.put(new OutputKey(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName),
-          new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
+        rval.addStatus(identifierClasses[indexValue],identifierHashes[indexValue],outputConnectionName,
+          componentHash,new DocumentIngestStatus(lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName));
       }
     }
   }
@@ -1895,7 +1850,7 @@
   */
   @Override
   public void getPipelineDocumentIngestData(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String identifierClass, String identifierHash)
     throws ManifoldCFException
@@ -1904,135 +1859,6 @@
       new String[]{identifierClass},new String[]{identifierHash});
   }
 
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Override
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException
-  {
-    // Segregate request by connection names
-    Map<String,List<Integer>> keyMap = new HashMap<String,List<Integer>>();
-    for (int i = 0; i < outputConnectionNames.length; i++)
-    {
-      String outputConnectionName = outputConnectionNames[i];
-      List<Integer> list = keyMap.get(outputConnectionName);
-      if (list == null)
-      {
-        list = new ArrayList<Integer>();
-        keyMap.put(outputConnectionName,list);
-      }
-      list.add(new Integer(i));
-    }
-
-    // Create the return array.
-    DocumentIngestStatus[] rval = new DocumentIngestStatus[outputConnectionNames.length];
-    Iterator<String> iter = keyMap.keySet().iterator();
-    while (iter.hasNext())
-    {
-      String outputConnectionName = iter.next();
-      List<Integer> list = keyMap.get(outputConnectionName);
-      String[] localIdentifierClasses = new String[list.size()];
-      String[] localIdentifierHashes = new String[list.size()];
-      for (int i = 0; i < localIdentifierClasses.length; i++)
-      {
-        int index = list.get(i).intValue();
-        localIdentifierClasses[i] = identifierClasses[index];
-        localIdentifierHashes[i] = identifierHashes[index];
-      }
-      DocumentIngestStatus[] localRval = getDocumentIngestDataMultiple(outputConnectionName,localIdentifierClasses,localIdentifierHashes);
-      for (int i = 0; i < localRval.length; i++)
-      {
-        int index = list.get(i).intValue();
-        rval[index] = localRval[i];
-      }
-    }
-    return rval;
-  }
-
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionName is the names of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Override
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException
-  {
-    // Build the return array
-    DocumentIngestStatus[] rval = new DocumentIngestStatus[identifierHashes.length];
-
-    // Build a map, so we can convert an identifier into an array index.
-    Map<String,Integer> indexMap = new HashMap<String,Integer>();
-    for (int i = 0; i < identifierHashes.length; i++)
-    {
-      indexMap.put(makeKey(identifierClasses[i],identifierHashes[i]),new Integer(i));
-      rval[i] = null;
-    }
-
-    beginTransaction();
-    try
-    {
-      List<String> list = new ArrayList<String>();
-      int maxCount = maxClauseDocumentIngestDataChunk(outputConnectionName);
-      int j = 0;
-      Iterator<String> iter = indexMap.keySet().iterator();
-      while (iter.hasNext())
-      {
-        if (j == maxCount)
-        {
-          getDocumentIngestDataChunk(rval,indexMap,outputConnectionName,list);
-          j = 0;
-          list.clear();
-        }
-        list.add(iter.next());
-        j++;
-      }
-      if (j > 0)
-        getDocumentIngestDataChunk(rval,indexMap,outputConnectionName,list);
-      return rval;
-    }
-    catch (ManifoldCFException e)
-    {
-      signalRollback();
-      throw e;
-    }
-    catch (Error e)
-    {
-      signalRollback();
-      throw e;
-    }
-    finally
-    {
-      endTransaction();
-    }
-  }
-
-  /** Look up ingestion data for a documents.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the current document's ingestion data, or null if the document is not currently ingested.
-  */
-  @Override
-  @Deprecated
-  public DocumentIngestStatus getDocumentIngestData(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException
-  {
-    return getDocumentIngestDataMultiple(outputConnectionName,new String[]{identifierClass},new String[]{identifierHash})[0];
-  }
-
   /** Calculate the average time interval between changes for a document.
   * This is based on the data gathered for the document.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
@@ -2040,6 +1866,7 @@
   *@param identifierHashes is the hashes of the ids of the documents.
   *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
   */
+  @Override
   public long[] getDocumentUpdateIntervalMultiple(
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String[] identifierClasses, String[] identifierHashes)
@@ -2114,39 +1941,6 @@
       new String[]{identifierClass},new String[]{identifierHash})[0];
   }
 
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Override
-  @Deprecated
-  public long getDocumentUpdateInterval(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException
-  {
-    return getDocumentUpdateIntervalMultiple(outputConnectionName,new String[]{identifierClass},new String[]{identifierHash})[0];
-  }
-
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the hashes of the ids of the documents.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Override
-  @Deprecated
-  public long[] getDocumentUpdateIntervalMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException
-  {
-    return getDocumentUpdateIntervalMultiple(new RuntPipelineSpecificationBasic(outputConnectionName),
-      identifierClasses,identifierHashes);
-  }
-  
   /** Calculate the number of clauses.
   */
   protected int maxClauseGetIntervals(String[] outputConnectionNames)
@@ -2244,6 +2038,7 @@
   /** Note the ingestion of a document, or the "update" of a document.
   *@param outputConnectionName is the name of the output connection.
   *@param docKey is the key string describing the document.
+  *@param componentHash is the component identifier hash for this document.
   *@param documentVersion is a string describing the new version of the document.
   *@param transformationVersion is a string describing all current transformations for the document.
   *@param outputVersion is the version string calculated for the output connection.
@@ -2255,7 +2050,7 @@
   *@param documentURIHash is the hash of the document uri.
   */
   protected void noteDocumentIngest(String outputConnectionName,
-    String docKey, String documentVersion, String transformationVersion,
+    String docKey, String componentHash, String documentVersion, String transformationVersion,
     String outputVersion, String packedForcedParameters,
     String authorityNameString,
     long ingestTime, String documentURI, String documentURIHash)
@@ -2284,6 +2079,8 @@
 
       // Try the update first.  Typically this succeeds except in the case where a doc is indexed for the first time.
       map.clear();
+      if (componentHash != null)
+        map.put(componentHashField,componentHash);
       map.put(lastVersionField,documentVersion);
       map.put(lastTransformationVersionField,transformationVersion);
       map.put(lastOutputVersionField,outputVersion);
@@ -2311,7 +2108,8 @@
           ArrayList list = new ArrayList();
           String query = buildConjunctionClause(list,new ClauseDescription[]{
             new UnitaryClause(docKeyField,docKey),
-            new UnitaryClause(outputConnNameField,outputConnectionName)});
+            new UnitaryClause(outputConnNameField,outputConnectionName),
+            ((componentHash==null)?new NullCheckClause(componentHashField,true):new UnitaryClause(componentHashField,componentHash))});
           IResultSet set = performQuery("SELECT "+idField+","+changeCountField+" FROM "+getTableName()+" WHERE "+
             query+" FOR UPDATE",list,null,null);
           IResultRow row = null;
@@ -2363,6 +2161,8 @@
 
       // Set up for insert
       map.clear();
+      if (componentHash != null)
+        map.put(componentHashField,componentHash);
       map.put(lastVersionField,documentVersion);
       map.put(lastTransformationVersionField,transformationVersion);
       map.put(lastOutputVersionField,outputVersion);
@@ -2426,9 +2226,10 @@
   *@param rval is the string array where the uris should be put.
   *@param map is the map from id to index.
   *@param clause is the in clause for the query.
-  *@param list is the parameter list for the query.
+  *@param list are the doc keys for the query.
   */
-  protected void getDocumentURIChunk(DeleteInfo[] rval, Map<String,Integer> map, String outputConnectionName, List<String> list)
+  protected void getDocumentURIChunk(DeleteInfo[] rval, Map<String,Integer> map, String outputConnectionName,
+    List<String> list)
     throws ManifoldCFException
   {
     ArrayList newList = new ArrayList();
@@ -2456,6 +2257,52 @@
     }
   }
 
+  /** Calculate how many clauses at a time
+  */
+  protected int maxClauseDocumentURIChunk(String outputConnectionName, String componentHash)
+  {
+    return findConjunctionClauseMax(new ClauseDescription[]{
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      ((componentHash==null)?new NullCheckClause(componentHashField,true):new UnitaryClause(componentHashField,componentHash))});
+  }
+
+  /** Get a chunk of document uris.
+  *@param rval is the string array where the uris should be put.
+  *@param map is the map from id to index.
+  *@param clause is the in clause for the query.
+  *@param list are the doc keys for the query.
+  *@param componentHash is the component hash, if any, for the query.
+  */
+  protected void getDocumentURIChunk(DeleteInfo[] rval, Map<String,Integer> map, String outputConnectionName,
+    List<String> list, String componentHash)
+    throws ManifoldCFException
+  {
+    ArrayList newList = new ArrayList();
+    String query = buildConjunctionClause(newList,new ClauseDescription[]{
+      new MultiClause(docKeyField,list),
+      new UnitaryClause(outputConnNameField,outputConnectionName),
+      ((componentHash==null)?new NullCheckClause(componentHashField,true):new UnitaryClause(componentHashField,componentHash))});
+      
+    IResultSet set = performQuery("SELECT "+docKeyField+","+docURIField+","+lastOutputVersionField+" FROM "+getTableName()+" WHERE "+
+      query,newList,null,null);
+
+    // Go through list and put into buckets.
+    for (int i = 0; i < set.getRowCount(); i++)
+    {
+      IResultRow row = set.getRow(i);
+      String docHash = row.getValue(docKeyField).toString();
+      Integer position = (Integer)map.get(docHash);
+      if (position != null)
+      {
+        String lastURI = (String)row.getValue(docURIField);
+        if (lastURI != null && lastURI.length() == 0)
+          lastURI = null;
+        String lastOutputVersion = (String)row.getValue(lastOutputVersionField);
+        rval[position.intValue()] = new DeleteInfo(lastURI,lastOutputVersion);
+      }
+    }
+  }
+
   /** Count the clauses
   */
   protected int maxClauseDocumentIngestDataChunk(String outputConnectionName)
@@ -2472,54 +2319,6 @@
       new MultiClause(outputConnNameField,outputConnectionNames)});
   }
   
-  /** Get a chunk of document ingest data records.
-  *@param rval is the document ingest status array where the data should be put.
-  *@param map is the map from id to index.
-  *@param clause is the in clause for the query.
-  *@param list is the parameter list for the query.
-  */
-  protected void getDocumentIngestDataChunk(DocumentIngestStatus[] rval, Map<String,Integer> map, String outputConnectionName, List<String> list)
-    throws ManifoldCFException
-  {
-    ArrayList newList = new ArrayList();
-    String query = buildConjunctionClause(newList,new ClauseDescription[]{
-      new MultiClause(docKeyField,list),
-      new UnitaryClause(outputConnNameField,outputConnectionName)});
-      
-    // Get the primary records associated with this hash value
-    IResultSet set = performQuery("SELECT "+idField+","+docKeyField+","+lastVersionField+","+lastOutputVersionField+","+authorityNameField+","+forcedParamsField+
-      " FROM "+getTableName()+" WHERE "+query,newList,null,null);
-
-    // Now, go through the original request once more, this time building the result
-    for (int i = 0; i < set.getRowCount(); i++)
-    {
-      IResultRow row = set.getRow(i);
-      String docHash = row.getValue(docKeyField).toString();
-      Integer position = map.get(docHash);
-      if (position != null)
-      {
-        Long id = (Long)row.getValue(idField);
-        String lastVersion = (String)row.getValue(lastVersionField);
-        if (lastVersion == null)
-          lastVersion = "";
-        String lastTransformationVersion = (String)row.getValue(lastTransformationVersionField);
-        if (lastTransformationVersion == null)
-          lastTransformationVersion = "";
-        String lastOutputVersion = (String)row.getValue(lastOutputVersionField);
-        if (lastOutputVersion == null)
-          lastOutputVersion = "";
-        String paramVersion = (String)row.getValue(forcedParamsField);
-        if (paramVersion == null)
-          paramVersion = "";
-        String authorityName = (String)row.getValue(authorityNameField);
-        if (authorityName == null)
-          authorityName = "";
-        int indexValue = position.intValue();
-        rval[indexValue] = new DocumentIngestStatus(
-          lastVersion,lastTransformationVersion,lastOutputVersion,paramVersion,authorityName);
-      }
-    }
-  }
 
   // Protected methods
 
@@ -2687,14 +2486,24 @@
     /** Send a document via the pipeline to the next output connection.
     *@param documentURI is the document's URI.
     *@param document is the document data to be processed (handed to the output data store).
-    *@param authorityNameString is the authority name string that should be used to qualify the document's access tokens.
     *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
     *@throws IOException only if there's an IO error reading the data from the document.
     */
-    public int sendDocument(String documentURI, RepositoryDocument document, String authorityNameString)
+    @Override
+    public int sendDocument(String documentURI, RepositoryDocument document)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
-      return addActivities.sendDocument(documentURI,document,authorityNameString);
+      return addActivities.sendDocument(documentURI,document);
+    }
+
+    /** Send NO document via the pipeline to the next output connection.  This is equivalent
+    * to sending an empty document placeholder.
+    */
+    @Override
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      addActivities.noDocument();
     }
 
     /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
@@ -2896,16 +2705,23 @@
       this.pipelineConnectionsWithVersions = pipelineConnectionsWithVersions;
     }
 
-    public int addOrReplaceDocumentWithException(String docKey, String documentURI, RepositoryDocument document, String newDocumentVersion, String newParameterVersion, String authorityNameString, IOutputActivity finalActivity, long ingestTime)
+    public int addOrReplaceDocumentWithException(String docKey, String componentHash, String documentURI, RepositoryDocument document, String newDocumentVersion, String newParameterVersion, String authorityNameString, IOutputActivity finalActivity, long ingestTime)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
-      PipelineAddFanout entryPoint = buildAddPipeline(finalActivity,newDocumentVersion,newParameterVersion,authorityNameString,ingestTime,docKey);
-      return entryPoint.sendDocument(documentURI,document,authorityNameString);
+      PipelineAddFanout entryPoint = buildAddPipeline(finalActivity,newDocumentVersion,newParameterVersion,authorityNameString,ingestTime,docKey,componentHash);
+      return entryPoint.sendDocument(documentURI,document);
     }
-    
+
+    public void noDocument(String docKey, String componentHash, String newDocumentVersion, String newParameterVersion, String authorityNameString, IOutputActivity finalActivity, long ingestTime)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      PipelineAddFanout entryPoint = buildAddPipeline(finalActivity,newDocumentVersion,newParameterVersion,authorityNameString,ingestTime,docKey,componentHash);
+      entryPoint.noDocument();
+    }
+
     protected PipelineAddFanout buildAddPipeline(IOutputActivity finalActivity,
       String newDocumentVersion, String newParameterVersion, String newAuthorityNameString,
-      long ingestTime, String docKey)
+      long ingestTime, String docKey, String componentHash)
     {
       // Algorithm for building a pipeline:
       // (1) We start with the set of final output connection stages, and build an entry point for each one.  That's our "current set".
@@ -2960,7 +2776,9 @@
           ingestTime,
           newDocumentVersion,
           newParameterVersion,
-          docKey);
+          docKey,
+          componentHash,
+          newAuthorityNameString);
         currentSet.put(new Integer(outputStage), outputStageEntryPoint);
       }
       // Cycle through the "current set"
@@ -3010,7 +2828,7 @@
           return pcf;
         PipelineAddEntryPoint newEntry = new PipelineAddEntryPoint(
           transformationConnectors[pipelineConnections.getTransformationConnectionIndex(parent).intValue()],
-          pipelineSpec.getStageDescriptionString(parent),pcf,pcf.checkNeedToReindex());
+          pipelineSpec.getStageDescriptionString(parent),newAuthorityNameString,pcf,pcf.checkNeedToReindex());
         currentSet.put(new Integer(parent), newEntry);
       }
 
@@ -3087,12 +2905,12 @@
   public static class PipelineCheckEntryPoint
   {
     protected final IPipelineConnector pipelineConnector;
-    protected final String pipelineDescriptionString;
+    protected final VersionContext pipelineDescriptionString;
     protected final IOutputCheckActivity checkActivity;
     
     public PipelineCheckEntryPoint(
       IPipelineConnector pipelineConnector,
-      String pipelineDescriptionString,
+      VersionContext pipelineDescriptionString,
       IOutputCheckActivity checkActivity)
     {
       this.pipelineConnector= pipelineConnector;
@@ -3208,11 +3026,11 @@
     /** Send a document via the pipeline to the next output connection.
     *@param documentURI is the document's URI.
     *@param document is the document data to be processed (handed to the output data store).
-    *@param authorityNameString is the authority name string that should be used to qualify the document's access tokens.
     *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
     *@throws IOException only if there's an IO error reading the data from the document.
     */
-    public int sendDocument(String documentURI, RepositoryDocument document, String authorityNameString)
+    @Override
+    public int sendDocument(String documentURI, RepositoryDocument document)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
       // First, count the number of active entry points.
@@ -3230,7 +3048,7 @@
         {
           if (!p.isActive())
             continue;
-          if (p.addOrReplaceDocumentWithException(documentURI,document,authorityNameString) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED)
+          if (p.addOrReplaceDocumentWithException(documentURI,document) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED)
             rval = IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
         }
         return rval;
@@ -3247,7 +3065,7 @@
           {
             if (!p.isActive())
               continue;
-            if (p.addOrReplaceDocumentWithException(documentURI,factory.createDocument(),authorityNameString) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED)
+            if (p.addOrReplaceDocumentWithException(documentURI,factory.createDocument()) == IPipelineConnector.DOCUMENTSTATUS_ACCEPTED)
               rval = IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
           }
           return rval;
@@ -3259,6 +3077,23 @@
       }
     }
 
+    /** Send NO document via the pipeline to the next output connection.  This is equivalent
+    * to sending an empty document placeholder.
+    */
+    @Override
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      for (PipelineAddEntryPoint p : entryPoints)
+      {
+        if (p.isActive())
+        {
+          // Invoke the addEntryPoint method for handling "noDocument"
+          p.noDocument();
+        }
+      }
+    }
+
     /** Qualify an access token appropriately, to match access tokens as returned by mod_aa.  This method
     * includes the authority name with the access token, if any, so that each authority may establish its own token space.
     *@param authorityNameString is the name of the authority to use to qualify the access token.
@@ -3304,17 +3139,20 @@
   public static class PipelineAddEntryPoint
   {
     protected final IPipelineConnector pipelineConnector;
-    protected final String pipelineDescriptionString;
+    protected final VersionContext pipelineDescriptionString;
+    protected final String authorityNameString;
     protected final IOutputAddActivity addActivity;
     protected final boolean isActive;
     
     public PipelineAddEntryPoint(IPipelineConnector pipelineConnector,
-      String pipelineDescriptionString,
+      VersionContext pipelineDescriptionString,
+      String authorityNameString,
       IOutputAddActivity addActivity,
       boolean isActive)
     {
       this.pipelineConnector = pipelineConnector;
       this.pipelineDescriptionString = pipelineDescriptionString;
+      this.authorityNameString = authorityNameString;
       this.addActivity = addActivity;
       this.isActive = isActive;
     }
@@ -3348,12 +3186,27 @@
       return pipelineConnector.checkURLIndexable(pipelineDescriptionString,uri,addActivity);
     }
 
-    public int addOrReplaceDocumentWithException(String documentURI, RepositoryDocument document, String authorityNameString)
+    public int addOrReplaceDocumentWithException(String documentURI, RepositoryDocument document)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
-      return pipelineConnector.addOrReplaceDocumentWithException(
+      // If the transformation connector doesn't do what it should, compensate!
+      MonitoredAddActivityWrapper wrapper = new MonitoredAddActivityWrapper(addActivity);
+      int rval = pipelineConnector.addOrReplaceDocumentWithException(
         documentURI,pipelineDescriptionString,
-        document,authorityNameString,addActivity);
+        document,authorityNameString,wrapper);
+      // The wrapper detects activity by the connector, so if we don't see either sendDocument() or
+      // noDocument(), we issue noDocument() ourselves.  If the connector was an output connector,
+      // this will wind up being a no-op, but otherwise it will guarantee that recording takes place.
+      if (!wrapper.wasDocumentActedUpon())
+        addActivity.noDocument();
+      return rval;
+    }
+    
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      // Call the addActivity method for handling no document
+      addActivity.noDocument();
     }
   }
   
@@ -3366,10 +3219,11 @@
     protected final String documentVersion;
     protected final String parameterVersion;
     protected final String docKey;
+    protected final String componentHash;
     protected final IOutputActivity activity;
     
     public OutputAddEntryPoint(IOutputConnector outputConnector,
-      String outputDescriptionString,
+      VersionContext outputDescriptionString,
       IOutputActivity activity,
       boolean isActive,
       String outputConnectionName,
@@ -3377,9 +3231,11 @@
       long ingestTime,
       String documentVersion,
       String parameterVersion,
-      String docKey)
+      String docKey,
+      String componentHash,
+      String authorityNameString)
     {
-      super(outputConnector,outputDescriptionString,activity,isActive);
+      super(outputConnector,outputDescriptionString,authorityNameString,activity,isActive);
       this.outputConnector = outputConnector;
       this.outputConnectionName = outputConnectionName;
       this.transformationVersion = transformationVersion;
@@ -3387,11 +3243,26 @@
       this.documentVersion = documentVersion;
       this.parameterVersion = parameterVersion;
       this.docKey = docKey;
+      this.componentHash = componentHash;
       this.activity = activity;
     }
     
     @Override
-    public int addOrReplaceDocumentWithException(String documentURI, RepositoryDocument document, String authorityNameString)
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      try
+      {
+        addOrReplaceDocumentWithException(null,null);
+      }
+      catch (IOException e)
+      {
+        throw new RuntimeException("Unexpected IOException: "+e.getMessage(),e);
+      }
+    }
+    
+    @Override
+    public int addOrReplaceDocumentWithException(String documentURI, RepositoryDocument document)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
       // No transactions; not safe because post may take too much time
@@ -3502,15 +3373,15 @@
           // This is a marker that says "something is there"; it has an empty version, which indicates
           // that we don't know anything about it.  That means it will be reingested when the
           // next version comes along, and will be deleted if called for also.
-          noteDocumentIngest(outputConnectionName,docKey,null,null,null,null,null,ingestTime,documentURI,documentURIHash);
-          int result = super.addOrReplaceDocumentWithException(documentURI, document, authorityNameString);
-          noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString,parameterVersion,authorityNameString,ingestTime,documentURI,documentURIHash);
+          noteDocumentIngest(outputConnectionName,docKey,componentHash,null,null,null,null,null,ingestTime,documentURI,documentURIHash);
+          int result = super.addOrReplaceDocumentWithException(documentURI, document);
+          noteDocumentIngest(outputConnectionName,docKey,componentHash,documentVersion,transformationVersion,pipelineDescriptionString.getVersionString(),parameterVersion,authorityNameString,ingestTime,documentURI,documentURIHash);
           return result;
         }
 
         // If we get here, it means we are noting that the document was examined, but that no change was required.  This is signaled
         // to noteDocumentIngest by having the null documentURI.
-        noteDocumentIngest(outputConnectionName,docKey,documentVersion,transformationVersion,pipelineDescriptionString,parameterVersion,authorityNameString,ingestTime,null,null);
+        noteDocumentIngest(outputConnectionName,docKey,componentHash,documentVersion,transformationVersion,pipelineDescriptionString.getVersionString(),parameterVersion,authorityNameString,ingestTime,null,null);
         return IPipelineConnector.DOCUMENTSTATUS_ACCEPTED;
       }
       finally
@@ -3623,9 +3494,9 @@
   /** Pipeline specification for backwards-compatible methods without pipelines */
   protected static class RuntPipelineSpecification extends RuntPipelineSpecificationBasic implements IPipelineSpecification
   {
-    protected final String outputDescriptionString;
+    protected final VersionContext outputDescriptionString;
     
-    public RuntPipelineSpecification(String outputConnectionName, String outputDescriptionString)
+    public RuntPipelineSpecification(String outputConnectionName, VersionContext outputDescriptionString)
     {
       super(outputConnectionName);
       this.outputDescriptionString = outputDescriptionString;
@@ -3645,7 +3516,7 @@
     *@return the description string that stage.
     */
     @Override
-    public String getStageDescriptionString(int stage)
+    public VersionContext getStageDescriptionString(int stage)
     {
       if (stage == 0)
         return outputDescriptionString;
@@ -3663,7 +3534,7 @@
     protected final String oldTransformationVersion;
     protected final String oldAuthorityNameString;
     
-    public RuntPipelineSpecificationWithVersions(String outputConnectionName, String outputDescriptionString,
+    public RuntPipelineSpecificationWithVersions(String outputConnectionName, VersionContext outputDescriptionString,
       String oldDocumentVersion, String oldParameterVersion, String oldOutputVersion, String oldTransformationVersion,
       String oldAuthorityNameString)
     {
@@ -3866,4 +3737,137 @@
     }
     
   }
+  
+  /** This class passes everything through, and monitors what happens so that the
+  * framework can compensate for any transformation connector coding errors.
+  */
+  protected static class MonitoredAddActivityWrapper implements IOutputAddActivity
+  {
+    protected final IOutputAddActivity activities;
+    
+    protected boolean documentProcessed = false;
+    
+    public MonitoredAddActivityWrapper(IOutputAddActivity activities)
+    {
+      this.activities = activities;
+    }
+    
+    public boolean wasDocumentActedUpon()
+    {
+      return documentProcessed;
+    }
+
+    /** Send a document via the pipeline to the next output connection.
+    *@param documentURI is the document's URI.
+    *@param document is the document data to be processed (handed to the output data store).
+    *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
+    *@throws IOException only if there's an IO error reading the data from the document.
+    */
+    @Override
+    public int sendDocument(String documentURI, RepositoryDocument document)
+      throws ManifoldCFException, ServiceInterruption, IOException
+    {
+      int rval = activities.sendDocument(documentURI,document);
+      documentProcessed = true;
+      return rval;
+    }
+
+    /** Send NO document via the pipeline to the next output connection.  This is equivalent
+    * to sending an empty document placeholder.
+    */
+    @Override
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+      activities.noDocument();
+      documentProcessed = true;
+    }
+
+    /** Qualify an access token appropriately, to match access tokens as returned by mod_aa.  This method
+    * includes the authority name with the access token, if any, so that each authority may establish its own token space.
+    *@param authorityNameString is the name of the authority to use to qualify the access token.
+    *@param accessToken is the raw, repository access token.
+    *@return the properly qualified access token.
+    */
+    @Override
+    public String qualifyAccessToken(String authorityNameString, String accessToken)
+      throws ManifoldCFException
+    {
+      return activities.qualifyAccessToken(authorityNameString,accessToken);
+    }
+
+    /** Record time-stamped information about the activity of the output connector.
+    *@param startTime is either null or the time since the start of epoch in milliseconds (Jan 1, 1970).  Every
+    *       activity has an associated time; the startTime field records when the activity began.  A null value
+    *       indicates that the start time and the finishing time are the same.
+    *@param activityType is a string which is fully interpretable only in the context of the connector involved, which is
+    *       used to categorize what kind of activity is being recorded.  For example, a web connector might record a
+    *       "fetch document" activity.  Cannot be null.
+    *@param dataSize is the number of bytes of data involved in the activity, or null if not applicable.
+    *@param entityURI is a (possibly long) string which identifies the object involved in the history record.
+    *       The interpretation of this field will differ from connector to connector.  May be null.
+    *@param resultCode contains a terse description of the result of the activity.  The description is limited in
+    *       size to 255 characters, and can be interpreted only in the context of the current connector.  May be null.
+    *@param resultDescription is a (possibly long) human-readable string which adds detail, if required, to the result
+    *       described in the resultCode field.  This field is not meant to be queried on.  May be null.
+    */
+    @Override
+    public void recordActivity(Long startTime, String activityType, Long dataSize,
+      String entityURI, String resultCode, String resultDescription)
+      throws ManifoldCFException
+    {
+      activities.recordActivity(startTime,activityType,dataSize,entityURI,resultCode,resultDescription);
+    }
+
+    /** Detect if a mime type is acceptable downstream or not.  This method is used to determine whether it makes sense to fetch a document
+    * in the first place.
+    *@param mimeType is the mime type of the document.
+    *@return true if the mime type can be accepted by the downstream connection.
+    */
+    @Override
+    public boolean checkMimeTypeIndexable(String mimeType)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return activities.checkMimeTypeIndexable(mimeType);
+    }
+
+    /** Pre-determine whether a document (passed here as a File object) is acceptable downstream.  This method is
+    * used to determine whether a document needs to be actually transferred.  This hook is provided mainly to support
+    * search engines that only handle a small set of accepted file types.
+    *@param localFile is the local file to check.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    @Override
+    public boolean checkDocumentIndexable(File localFile)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return activities.checkDocumentIndexable(localFile);
+    }
+
+    /** Pre-determine whether a document's length is acceptable downstream.  This method is used
+    * to determine whether to fetch a document in the first place.
+    *@param length is the length of the document.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    @Override
+    public boolean checkLengthIndexable(long length)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return activities.checkLengthIndexable(length);
+    }
+
+    /** Pre-determine whether a document's URL is acceptable downstream.  This method is used
+    * to help filter out documents that cannot be indexed in advance.
+    *@param url is the URL of the document.
+    *@return true if the file is acceptable by the downstream connection.
+    */
+    @Override
+    public boolean checkURLIndexable(String url)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      return activities.checkURLIndexable(url);
+    }
+
+  }
+  
 }

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java
index 861935d..fd7d46a 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/incrementalingest/RepositoryDocumentFactory.java

@@ -40,10 +40,10 @@
   protected final RepositoryDocument original;
 
   // The binary stream file and stream (if any)
-  protected BinaryTracker binaryTracker;
+  protected BinaryInput binaryTracker;
   
   // Readers (organized by metadata)
-  protected Map<String,ReaderTracker[]> metadataReaders = new HashMap<String,ReaderTracker[]>();
+  protected Map<String,CharacterInput[]> metadataReaders = new HashMap<String,CharacterInput[]>();
   
   /** Constructor.
   * Pass a RepositoryDocument.  This constructor reads all streams and stores them in
@@ -59,7 +59,7 @@
     
     try
     {
-      this.binaryTracker = new BinaryTracker(document.getBinaryStream());
+      this.binaryTracker = new TempFileInput(document.getBinaryStream());
       // Copy all reader streams
       Iterator<String> iter = document.getFields();
       if (iter.hasNext())
@@ -68,12 +68,12 @@
         Object[] objects = document.getField(fieldName);
         if (objects instanceof Reader[])
         {
-          ReaderTracker[] newValues = new ReaderTracker[objects.length];
+          CharacterInput[] newValues = new CharacterInput[objects.length];
           metadataReaders.put(fieldName,newValues);
           // Populate newValues
           for (int i = 0; i < newValues.length; i++)
           {
-            newValues[i] = new ReaderTracker((Reader)objects[i]);
+            newValues[i] = new TempFileCharacterInput((Reader)objects[i]);
           }
         }
       }
@@ -82,14 +82,14 @@
     {
       // Clean up everything we've done so far.
       if (this.binaryTracker != null)
-        this.binaryTracker.close();
+        this.binaryTracker.discard();
       for (String key : metadataReaders.keySet())
       {
-        ReaderTracker[] rt = metadataReaders.get(key);
-        for (ReaderTracker r : rt)
+        CharacterInput[] rt = metadataReaders.get(key);
+        for (CharacterInput r : rt)
         {
           if (r != null)
-            r.close();
+            r.discard();
         }
       }
       if (e instanceof IOException)
@@ -129,7 +129,8 @@
     }
     
     // Copy binary
-    rd.setBinary(binaryTracker.createNewInputStream(),original.getBinaryLength());
+    binaryTracker.doneWithStream();
+    rd.setBinary(binaryTracker.getStream(),original.getBinaryLength());
     // Copy metadata fields (including minting new Readers where needed)
     Iterator<String> iter = original.getFields();
     if (iter.hasNext())
@@ -138,11 +139,12 @@
       Object[] objects = original.getField(fieldName);
       if (objects instanceof Reader[])
       {
-        ReaderTracker[] rts = metadataReaders.get(fieldName);
+        CharacterInput[] rts = metadataReaders.get(fieldName);
         Reader[] newReaders = new Reader[rts.length];
         for (int i = 0; i < rts.length; i++)
         {
-          newReaders[i] = rts[i].createNewReader();
+          rts[i].doneWithStream();
+          newReaders[i] = rts[i].getStream();
         }
         rd.addField(fieldName,newReaders);
       }
@@ -166,199 +168,15 @@
   public void close()
     throws ManifoldCFException
   {
-    binaryTracker.close();
+    binaryTracker.discard();
     for (String key : metadataReaders.keySet())
     {
-      ReaderTracker[] rt = metadataReaders.get(key);
-      for (ReaderTracker r : rt)
+      CharacterInput[] rt = metadataReaders.get(key);
+      for (CharacterInput r : rt)
       {
-        r.close();
+        r.discard();
       }
     }
   }
   
-  protected static class ReaderTracker
-  {
-    protected File readerFile;
-    protected Reader reader = null;
-    
-    public ReaderTracker(Reader r)
-      throws IOException
-    {
-      // Make a local copy
-      readerFile = File.createTempFile("mcfrdr","tmp");
-      try
-      {
-        FileOutputStream os = new FileOutputStream(readerFile);
-        try
-        {
-          OutputStreamWriter ow = new OutputStreamWriter(os,"utf-8");
-          try
-          {
-            char[] byteArray = new char[65536];
-            while (true)
-            {
-              int amt = r.read(byteArray,0,byteArray.length);
-              if (amt == -1)
-                break;
-              ow.write(byteArray,0,amt);
-            }
-          }
-          finally
-          {
-            ow.flush();
-          }
-        }
-        finally
-        {
-          os.close();
-        }
-      }
-      catch (Throwable e)
-      {
-        readerFile.delete();
-        if (e instanceof IOException)
-          throw (IOException)e;
-        else if (e instanceof RuntimeException)
-          throw (RuntimeException)e;
-        else if (e instanceof Error)
-          throw (Error)e;
-        else
-          throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e);
-      }
-    }
-    
-    public Reader createNewReader()
-      throws ManifoldCFException
-    {
-      try
-      {
-        // Close existing inputstream and create a new one.
-        if (reader != null)
-        {
-          reader.close();
-          reader = null;
-        }
-        reader = new InputStreamReader(new FileInputStream(readerFile),"utf-8");
-        return reader;
-      }
-      catch (IOException e)
-      {
-        handleIOException(e);
-        return null;
-      }
-    }
-    
-    public void close()
-      throws ManifoldCFException
-    {
-      try
-      {
-        // Close all streams and delete file
-        if (reader != null)
-        {
-          reader.close();
-          reader = null;
-        }
-        readerFile.delete();
-      }
-      catch (IOException e)
-      {
-        handleIOException(e);
-      }
-    }
-  }
-  
-  protected static class BinaryTracker
-  {
-    protected File binaryFile;
-    protected InputStream inputStream = null;
-    
-    public BinaryTracker(InputStream is)
-      throws IOException
-    {
-      // Make a local copy
-      binaryFile = File.createTempFile("mcfbin","tmp");
-      try
-      {
-        FileOutputStream os = new FileOutputStream(binaryFile);
-        try
-        {
-          byte[] byteArray = new byte[65536];
-          while (true)
-          {
-            int amt = is.read(byteArray,0,byteArray.length);
-            if (amt == -1)
-              break;
-            os.write(byteArray,0,amt);
-          }
-        }
-        finally
-        {
-          os.close();
-        }
-      }
-      catch (Throwable e)
-      {
-        binaryFile.delete();
-        if (e instanceof IOException)
-          throw (IOException)e;
-        else if (e instanceof RuntimeException)
-          throw (RuntimeException)e;
-        else if (e instanceof Error)
-          throw (Error)e;
-        else
-          throw new RuntimeException("Unknown error type: "+e.getClass().getName()+": "+e.getMessage(),e);
-      }
-    }
-    
-    public InputStream createNewInputStream()
-      throws ManifoldCFException
-    {
-      try
-      {
-        // Close existing inputstream and create a new one.
-        if (inputStream != null)
-        {
-          inputStream.close();
-          inputStream = null;
-        }
-        inputStream = new FileInputStream(binaryFile);
-        return inputStream;
-      }
-      catch (IOException e)
-      {
-        handleIOException(e);
-        return null;
-      }
-    }
-    
-    public void close()
-      throws ManifoldCFException
-    {
-      try
-      {
-        // Close all streams and delete file
-        if (inputStream != null)
-        {
-          inputStream.close();
-          inputStream = null;
-        }
-        binaryFile.delete();
-      }
-      catch (IOException e)
-      {
-        handleIOException(e);
-      }
-    }
-  }
-
-  protected static void handleIOException(IOException e)
-    throws ManifoldCFException
-  {
-    if (e instanceof InterruptedIOException)
-      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    throw new ManifoldCFException(e.getMessage(),e);
-  }
-  
 }

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java
new file mode 100644
index 0000000..bf50771
--- /dev/null
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/DocumentIngestStatusSet.java

@@ -0,0 +1,76 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This object contains statuses for the primary document and all component documents.
+*/
+public class DocumentIngestStatusSet
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected DocumentIngestStatus primary = null;
+  protected final Map<String,DocumentIngestStatus> components = new HashMap<String,DocumentIngestStatus>();
+  
+  /** Constructor */
+  public DocumentIngestStatusSet()
+  {
+  }
+  
+  /** Add document status.
+  *@param componentHash is the component identifier hash, or null.
+  *@param status is the document ingest status.
+  */
+  public void addDocumentStatus(String componentHash, DocumentIngestStatus status)
+  {
+    if (componentHash == null)
+      primary = status;
+    else
+      components.put(componentHash,status);
+  }
+  
+  /** Get primary status.
+  *@return the primary status.
+  */
+  public DocumentIngestStatus getPrimary()
+  {
+    return primary;
+  }
+  
+  /** Get component status.
+  *@param componentHash is the component identifier hash, or null.
+  *@return the component status.
+  */
+  public DocumentIngestStatus getComponent(String componentHash)
+  {
+    if (componentHash == null)
+      return primary;
+    return components.get(componentHash);
+  }
+  
+  /** Iterate over components.
+  *@return an iterator over component hashes.
+  */
+  public Iterator<String> componentIterator()
+  {
+    return components.keySet().iterator();
+  }
+}

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
index 4d33ac4..1c6c82c 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IIncrementalIngester.java

@@ -66,13 +66,20 @@
   *@return the last indexed output connection name.
   */
   public String getLastIndexedOutputConnectionName(IPipelineSpecificationBasic pipelineSpecificationBasic);
-  
+
+  /** From a pipeline specification, get the name of the output connection that will be indexed first
+  * in the pipeline.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
+  *@return the first indexed output connection name.
+  */
+  public String getFirstIndexedOutputConnectionName(IPipelineSpecificationBasic pipelineSpecificationBasic);
+
   /** Get an output version string for a document.
   *@param outputConnectionName is the name of the output connection associated with this action.
   *@param spec is the output specification.
   *@return the description string.
   */
-  public String getOutputDescription(String outputConnectionName, OutputSpecification spec)
+  public VersionContext getOutputDescription(String outputConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Get transformation version string for a document.
@@ -80,7 +87,7 @@
   *@param spec is the transformation specification.
   *@return the description string.
   */
-  public String getTransformationDescription(String transformationConnectionName, OutputSpecification spec)
+  public VersionContext getTransformationDescription(String transformationConnectionName, Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Check if a mime type is indexable.
@@ -95,16 +102,6 @@
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Check if a mime type is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param mimeType is the mime type to check.
-  *@return true if the mimeType is indexable.
-  */
-  @Deprecated
-  public boolean checkMimeTypeIndexable(String outputConnectionName, String outputDescription, String mimeType)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Check if a file is indexable.
   *@param pipelineSpecification is the pipeline specification.
   *@param localFile is the local file to check.
@@ -117,16 +114,6 @@
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Check if a file is indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param localFile is the local file to check.
-  *@return true if the local file is indexable.
-  */
-  @Deprecated
-  public boolean checkDocumentIndexable(String outputConnectionName, String outputDescription, File localFile)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that are too long to be indexable.
   *@param pipelineSpecification is the pipeline specification.
@@ -140,17 +127,6 @@
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that are too long to be indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param length is the length of the document.
-  *@return true if the file is indexable.
-  */
-  @Deprecated
-  public boolean checkLengthIndexable(String outputConnectionName, String outputDescription, long length)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
   * to help filter out documents that not indexable.
   *@param pipelineSpecification is the pipeline specification.
@@ -164,17 +140,6 @@
     IOutputCheckActivity activity)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
-  * to help filter out documents that not indexable.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param outputDescription is the output description string.
-  *@param url is the url of the document.
-  *@return true if the file is indexable.
-  */
-  @Deprecated
-  public boolean checkURLIndexable(String outputConnectionName, String outputDescription, String url)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Determine whether we need to fetch or refetch a document.
   * Pass in information including the pipeline specification with existing version info, plus new document and parameter version strings.
   * If no outputs need to be updated, then this method will return false.  If any outputs need updating, then true is returned.
@@ -192,94 +157,42 @@
     String newAuthorityNameString);
 
   /** Record a document version, but don't ingest it.
-  * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
-  * ServiceInterruption is thrown if this action must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
-  *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
-  */
-  @Deprecated
-  public void documentRecord(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion, long recordTime,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Record a document version, but don't ingest it.
-  * The purpose of this method is to keep track of the frequency at which ingestion "attempts" take place.
-  * ServiceInterruption is thrown if this action must be rescheduled.
+  * The purpose of this method is to update document version information without reindexing the document.
   *@param pipelineSpecificationBasic is the basic pipeline specification needed.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
   *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
-  *@param activities is the object used in case a document needs to be removed from the output index as the result of this operation.
   */
   public void documentRecord(
     IPipelineSpecificationBasic pipelineSpecificationBasic,
-    String identifierClass, String identifierHash,
-    String documentVersion, long recordTime,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
+    String identifierClass, String identifierHash, String componentHash,
+    String documentVersion, long recordTime)
+    throws ManifoldCFException;
 
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
+  /** Remove a document from specified indexes, just as if an empty document
+  * was indexed, and record the necessary version information.
+  * This method is conceptually similar to documentIngest(), but does not actually take
+  * a document or allow it to be transformed.  If there is a document already
+  * indexed, it is removed from the index.
+  *@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
+  *@param parameterVersion is the version string for the forced parameters.
   *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
+  *@param recordTime is the time at which the recording took place, in milliseconds since epoch.
   *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
   */
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
+  public void documentNoData(
+    IPipelineSpecificationWithVersions pipelineSpecificationWithVersions,
+    String identifierClass, String identifierHash, String componentHash,
     String documentVersion,
-    String outputVersion,
-    String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
-    IOutputActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Ingest a document.
-  * This ingests the document, and notes it.  If this is a repeat ingestion of the document, this
-  * method also REMOVES ALL OLD METADATA.  When complete, the index will contain only the metadata
-  * described by the RepositoryDocument object passed to this method.
-  * ServiceInterruption is thrown if the document ingestion must be rescheduled.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param documentVersion is the document version.
-  *@param parameterVersion is the forced parameter version.
-  *@param outputVersion is the output version string constructed from the output specification by the output connector.
-  *@param authorityName is the name of the authority associated with the document, if any.
-  *@param data is the document data.  The data is closed after ingestion is complete.
-  *@param ingestTime is the time at which the ingestion took place, in milliseconds since epoch.
-  *@param documentURI is the URI of the document, which will be used as the key of the document in the index.
-  *@param activities is an object providing a set of methods that the implementer can use to perform the operation.
-  *@return true if the ingest was ok, false if the ingest is illegal (and should not be repeated).
-  */
-  @Deprecated
-  public boolean documentIngest(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    String documentVersion,
-    String outputVersion,
     String parameterVersion,
     String authorityName,
-    RepositoryDocument data,
-    long ingestTime, String documentURI,
+    long recordTime,
     IOutputActivity activities)
     throws ManifoldCFException, ServiceInterruption;
 
@@ -291,6 +204,7 @@
   *@param pipelineSpecificationWithVersions is the pipeline specification with already-fetched output versioning information.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hashed document identifier.
+  *@param componentHash is the hashed component identifier, if any.
   *@param documentVersion is the document version.
   *@param parameterVersion is the version string for the forced parameters.
   *@param authorityName is the name of the authority associated with the document, if any.
@@ -303,7 +217,7 @@
   */
   public boolean documentIngest(
     IPipelineSpecificationWithVersions pipelineSpecificationWithVersions,
-    String identifierClass, String identifierHash,
+    String identifierClass, String identifierHash, String componentHash,
     String documentVersion,
     String parameterVersion,
     String authorityName,
@@ -312,31 +226,31 @@
     IOutputActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException;
 
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes are the set of document identifier hashes.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
-  */
-  @Deprecated
-  public void documentCheckMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    long checkTime)
-    throws ManifoldCFException;
-
-  /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
-  * versions agreed).
-  *@param outputConnectionName is the name of the output connection associated with this action.
+  /** Remove a document component from the search engine index.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hashed document identifier.
-  *@param checkTime is the time at which the check took place, in milliseconds since epoch.
+  *@param identifierHash is the hash of the id of the document.
+  *@param componentHash is the hashed component identifier, if any.
+  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
   */
-  @Deprecated
-  public void documentCheck(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    long checkTime)
-    throws ManifoldCFException;
+  public void documentRemove(
+    IPipelineSpecificationBasic pipelineSpecificationBasic,
+    String identifierClass, String identifierHash, String componentHash,
+    IOutputRemoveActivity activities)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Remove multiple document components from the search engine index.
+  *@param pipelineSpecificationBasic is the basic pipeline specification.
+  *@param identifierClasses are the names of the spaces in which the identifier hash should be interpreted.
+  *@param identifierHashes are the hashes of the ids of the documents.
+  *@param componentHash is the hashed component identifier, if any.
+  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
+  */
+  public void documentRemoveMultiple(
+    IPipelineSpecificationBasic pipelineSpecificationBasic,
+    String[] identifierClasses, String[] identifierHashes, String componentHash,
+    IOutputRemoveActivity activities)
+    throws ManifoldCFException, ServiceInterruption;
 
   /** Note the fact that we checked a document (and found that it did not need to be ingested, because the
   * versions agreed).
@@ -364,43 +278,7 @@
     long checkTime)
     throws ManifoldCFException;
 
-  /** Delete multiple documents from the search engine index.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDeleteMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete multiple documents from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is tha array of document identifier hashes if the documents.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDeleteMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete a document from the search engine index.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@param activities is the object to use to log the details of the ingestion attempt.  May be null.
-  */
-  @Deprecated
-  public void documentDelete(String outputConnectionName,
-    String identifierClass, String identifierHash,
-    IOutputRemoveActivity activities)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete multiple documents from the search engine index.
+  /** Delete multiple documents, and their components, from the search engine index.
   *@param pipelineSpecificationBasics are the pipeline specifications associated with the documents.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -412,7 +290,7 @@
     IOutputRemoveActivity activities)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Delete multiple documents from the search engine index.
+  /** Delete multiple documents, and their components, from the search engine index.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is tha array of document identifier hashes if the documents.
@@ -424,7 +302,7 @@
     IOutputRemoveActivity activities)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Delete a document from the search engine index.
+  /** Delete a document, and all its components, from the search engine index.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
   *@param identifierHash is the hash of the id of the document.
@@ -436,41 +314,6 @@
     IOutputRemoveActivity activities)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionNames are the names of the output connections associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String[] outputConnectionNames,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Look up ingestion data for a SET of documents.
-  *@param outputConnectionName is the names of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the array of document identifier hashes to look up.
-  *@return the array of document data.  Null will come back for any identifier that doesn't
-  * exist in the index.
-  */
-  @Deprecated
-  public DocumentIngestStatus[] getDocumentIngestDataMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Look up ingestion data for a documents.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the current document's ingestion data, or null if the document is not currently ingested.
-  */
-  @Deprecated
-  public DocumentIngestStatus getDocumentIngestData(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException;
-
   /** Look up ingestion data for a set of documents.
   *@param rval is a map of output key to document data, in no particular order, which will be loaded with all matching results.
   *@param pipelineSpecificationBasics are the pipeline specifications corresponding to the identifier classes and hashes.
@@ -478,7 +321,7 @@
   *@param identifierHashes is the array of document identifier hashes to look up.
   */
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic[] pipelineSpecificationBasics,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException;
@@ -490,7 +333,7 @@
   *@param identifierHashes is the array of document identifier hashes to look up.
   */
   public void getPipelineDocumentIngestDataMultiple(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String[] identifierClasses, String[] identifierHashes)
     throws ManifoldCFException;
@@ -502,37 +345,13 @@
   *@param identifierHash is the hash of the id of the document.
   */
   public void getPipelineDocumentIngestData(
-    Map<OutputKey,DocumentIngestStatus> rval,
+    IngestStatuses rval,
     IPipelineSpecificationBasic pipelineSpecificationBasic,
     String identifierClass, String identifierHash)
     throws ManifoldCFException;
 
   /** Calculate the average time interval between changes for a document.
   * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
-  *@param identifierHashes is the hashes of the ids of the documents.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Deprecated
-  public long[] getDocumentUpdateIntervalMultiple(String outputConnectionName,
-    String[] identifierClasses, String[] identifierHashes)
-    throws ManifoldCFException;
-
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
-  *@param outputConnectionName is the name of the output connection associated with this action.
-  *@param identifierClass is the name of the space in which the identifier hash should be interpreted.
-  *@param identifierHash is the hash of the id of the document.
-  *@return the number of milliseconds between changes, or 0 if this cannot be calculated.
-  */
-  @Deprecated
-  public long getDocumentUpdateInterval(String outputConnectionName,
-    String identifierClass, String identifierHash)
-    throws ManifoldCFException;
-
-  /** Calculate the average time interval between changes for a document.
-  * This is based on the data gathered for the document.
   *@param pipelineSpecificationBasic is the basic pipeline specification.
   *@param identifierClasses are the names of the spaces in which the identifier hashes should be interpreted.
   *@param identifierHashes is the hashes of the ids of the documents.

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
index 3545896..7626c15 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IOutputAddActivity.java

@@ -33,11 +33,16 @@
   /** Send a document via the pipeline to the next output connection.
   *@param documentURI is the document's URI.
   *@param document is the document data to be processed (handed to the output data store).
-  *@param authorityNameString is the authority name string that should be used to qualify the document's access tokens.
   *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
   *@throws IOException only if there's an IO error reading the data from the document.
   */
-  public int sendDocument(String documentURI, RepositoryDocument document, String authorityNameString)
+  public int sendDocument(String documentURI, RepositoryDocument document)
     throws ManifoldCFException, ServiceInterruption, IOException;
 
+  /** Send NO document via the pipeline to the next output connection.  This is equivalent
+  * to sending an empty document placeholder.
+  */
+  public void noDocument()
+    throws ManifoldCFException, ServiceInterruption;
+
 }

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
index 004c108..fee5042 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineConnector.java

@@ -43,7 +43,7 @@
   /** Document permanently rejected */
   public final static int DOCUMENTSTATUS_REJECTED = 1;
 
-  /** Get a pipeline version string, given a pipeline specification object.  The version string is used to
+  /** Get a pipeline version object, given a pipeline specification object.  The version string is used to
   * uniquely describe the pertinent details of the specification and the configuration, to allow the Connector 
   * Framework to determine whether a document will need to be processed again.
   * Note that the contents of any document cannot be considered by this method; only configuration and specification information
@@ -51,10 +51,10 @@
   *
   * This method presumes that the underlying connector object has been configured.
   *@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
-  *@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
+  *@return a version object, including a string of unlimited length, which uniquely describes configuration and specification in such a way that
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Detect if a mime type is acceptable or not.  This method is used to determine whether it makes sense to fetch a document
@@ -64,7 +64,7 @@
   *@param checkActivity is an object including the activities that can be performed by this method.
   *@return true if the mime type can be accepted by this connector.
   */
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document (passed here as a File object) is acceptable or not.  This method is
@@ -75,7 +75,7 @@
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document's length is acceptable.  This method is used
@@ -85,7 +85,7 @@
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Pre-determine whether a document's URL is acceptable.  This method is used
@@ -95,7 +95,7 @@
   *@param checkActivity is an object including the activities that can be done by this method.
   *@return true if the file is acceptable, false if not.
   */
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -114,7 +114,7 @@
   *@return the document status (accepted or permanently rejected).
   *@throws IOException only if there's a stream error reading the document data.
   */
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException;
 
   // UI support methods.

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java
index 6bcef9d..e46dabc 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IPipelineSpecification.java

@@ -36,6 +36,6 @@
   *@param stage is the stage to get the connection name for.
   *@return the description string that stage.
   */
-  public String getStageDescriptionString(int stage);
+  public VersionContext getStageDescriptionString(int stage);
   
 }

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java
new file mode 100644
index 0000000..ccc91dd
--- /dev/null
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/IngestStatuses.java

@@ -0,0 +1,116 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This object is part of the IIncrementalIngester API.
+* It is an accumulator and organizer of DocumentIngestStatus records
+*/
+public class IngestStatuses
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected final Map<OutputKey,DocumentIngestStatusSet> statuses = new HashMap<OutputKey,DocumentIngestStatusSet>();
+  
+  public IngestStatuses()
+  {
+  }
+  
+  /** Add a status record.
+  *@param documentClass is the document class.
+  *@param documentIDHash is the document id's hash value.
+  *@param outputConnectionName is the output connection name.
+  *@param componentIDHash is the component ID hash value.
+  *@param status is the status.
+  */
+  public void addStatus(String documentClass, String documentIDHash, String outputConnectionName,
+    String componentIDHash, DocumentIngestStatus status)
+  {
+    OutputKey ok = new OutputKey(documentClass,documentIDHash,outputConnectionName);
+    DocumentIngestStatusSet set = statuses.get(ok);
+    if (set == null)
+    {
+      set = new DocumentIngestStatusSet();
+      statuses.put(ok,set);
+    }
+    set.addDocumentStatus(componentIDHash,status);
+  }
+  
+  /** Retrieve a status record.
+  *@param documentClass is the document class.
+  *@param documentIDHash is the document id's hash value.
+  *@param outputConnectionName is the output connection name.
+  *@return the status record, if exists.
+  */
+  public DocumentIngestStatusSet getStatus(String documentClass, String documentIDHash, String outputConnectionName)
+  {
+    return statuses.get(new OutputKey(documentClass,documentIDHash,outputConnectionName));
+  }
+
+  protected static class OutputKey
+  {
+    protected final String documentClass;
+    protected final String documentIDHash;
+    protected final String outputConnectionName;
+    
+    /** Constructor */
+    public OutputKey(String documentClass, String documentIDHash, String outputConnectionName)
+    {
+      // Identifying information
+      this.documentClass = documentClass;
+      this.documentIDHash = documentIDHash;
+      this.outputConnectionName = outputConnectionName;
+    }
+
+    /** Get the document class */
+    public String getDocumentClass()
+    {
+      return documentClass;
+    }
+    
+    /** Get the document ID hash */
+    public String getDocumentIDHash()
+    {
+      return documentIDHash;
+    }
+    
+    /** Get the output connection name */
+    public String getOutputConnectionName()
+    {
+      return outputConnectionName;
+    }
+    
+    public int hashCode()
+    {
+      return documentClass.hashCode() + documentIDHash.hashCode() + outputConnectionName.hashCode();
+    }
+    
+    public boolean equals(Object o)
+    {
+      if (!(o instanceof OutputKey))
+        return false;
+      OutputKey dis = (OutputKey)o;
+      return dis.documentClass.equals(documentClass) &&
+        dis.documentIDHash.equals(documentIDHash) &&
+        dis.outputConnectionName.equals(outputConnectionName);
+    }
+  }
+}

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/OutputKey.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/OutputKey.java
deleted file mode 100644
index 0c8fcb2..0000000
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/OutputKey.java
+++ /dev/null

@@ -1,79 +0,0 @@
-/* $Id$ */
-
-/**
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements. See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.manifoldcf.agents.interfaces;
-
-import org.apache.manifoldcf.core.interfaces.*;
-import java.util.*;
-
-/** This object functions as a key describing a unique output, consisting of:
-* - a document class
-* - a document ID hash
-* - an output connection name
-*/
-public class OutputKey
-{
-  public static final String _rcsid = "@(#)$Id$";
-
-  protected final String documentClass;
-  protected final String documentIDHash;
-  protected final String outputConnectionName;
-  
-  /** Constructor */
-  public OutputKey(String documentClass, String documentIDHash, String outputConnectionName)
-  {
-    // Identifying information
-    this.documentClass = documentClass;
-    this.documentIDHash = documentIDHash;
-    this.outputConnectionName = outputConnectionName;
-  }
-
-  /** Get the document class */
-  public String getDocumentClass()
-  {
-    return documentClass;
-  }
-  
-  /** Get the document ID hash */
-  public String getDocumentIDHash()
-  {
-    return documentIDHash;
-  }
-  
-  /** Get the output connection name */
-  public String getOutputConnectionName()
-  {
-    return outputConnectionName;
-  }
-  
-  public int hashCode()
-  {
-    return documentClass.hashCode() + documentIDHash.hashCode() + outputConnectionName.hashCode();
-  }
-  
-  public boolean equals(Object o)
-  {
-    if (!(o instanceof OutputKey))
-      return false;
-    OutputKey dis = (OutputKey)o;
-    return dis.documentClass.equals(documentClass) &&
-      dis.documentIDHash.equals(documentIDHash) &&
-      dis.outputConnectionName.equals(outputConnectionName);
-  }
-      
-}

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
index 41f19f6..a31c531 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/interfaces/RepositoryDocument.java

@@ -63,6 +63,55 @@
   {
   }
 
+  /** Create an exact duplicate of this Repository Document.  This is how you are expected to write
+  * transformation connectors: you create a duplicate, and override the fields you want to change.
+  * For streams etc, only the overridden fields need to be explicitly managed by the transformation
+  * connector, since the original fields will be handled by the connector's caller.
+  *@return the exact duplicate.
+  */
+  public RepositoryDocument duplicate()
+  {
+    RepositoryDocument rval = new RepositoryDocument();
+    rval.binaryFieldData = binaryFieldData;
+    rval.binaryLength = binaryLength;
+    rval.fileName = fileName;
+    rval.contentMimeType = contentMimeType;
+    rval.createdDate = createdDate;
+    rval.modifiedDate = modifiedDate;
+    rval.indexingDate = indexingDate;
+    for (String key : fields.keySet())
+    {
+      rval.fields.put(key,fields.get(key));
+    }
+    for (String key : stringFields.keySet())
+    {
+      rval.stringFields.put(key,stringFields.get(key));
+    }
+    for (String key : readerFields.keySet())
+    {
+      rval.readerFields.put(key,readerFields.get(key));
+    }
+    for (String key : dateFields.keySet())
+    {
+      rval.dateFields.put(key,dateFields.get(key));
+    }
+    for (String key : securityLevels.keySet())
+    {
+      rval.securityLevels.put(key,securityLevels.get(key));
+    }
+    return rval;
+  }
+  
+  /** Clear all fields.
+  */
+  public void clearFields()
+  {
+    fields.clear();
+    stringFields.clear();
+    dateFields.clear();
+    readerFields.clear();
+  }
+  
   /** Set the document's created date.  Use null to indicate that the date is unknown.
   *@param date is the date.
   */

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
index 6ca16e4..bdf7a5d 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/output/BaseOutputConnector.java

@@ -89,10 +89,10 @@
   *@return true if the mime type can be accepted by this connector.
   */
   @Override
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkMimeTypeIndexable(pipelineDescription, mimeType);
+    return checkMimeTypeIndexable(pipelineDescription.getVersionString(), mimeType);
   }
 
   /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
@@ -127,10 +127,10 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkDocumentIndexable(pipelineDescription, localFile);
+    return checkDocumentIndexable(pipelineDescription.getVersionString(), localFile);
   }
 
   /** Pre-determine whether a document (passed here as a File object) is indexable by this connector.  This method is used by participating
@@ -166,10 +166,10 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkLengthIndexable(pipelineDescription, length);
+    return checkLengthIndexable(pipelineDescription.getVersionString(), length);
   }
 
   /** Pre-determine whether a document's length is indexable by this connector.  This method is used by participating repository connectors
@@ -192,10 +192,10 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
-    return checkURLIndexable(pipelineDescription, url);
+    return checkURLIndexable(pipelineDescription.getVersionString(), url);
   }
 
   /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
@@ -222,10 +222,10 @@
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
   @Override
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    return getOutputDescription((OutputSpecification)spec);
+    return new VersionContext(getOutputDescription((OutputSpecification)spec),params,spec);
   }
 
   /** Get an output version string, given an output specification.  The output version string is used to uniquely describe the pertinent details of
@@ -263,10 +263,10 @@
   *@throws IOException only if there's a stream error reading the document data.
   */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
-    return addOrReplaceDocument(documentURI, pipelineDescription, document, authorityNameString, activities);
+    return addOrReplaceDocument(documentURI, pipelineDescription.getVersionString(), document, authorityNameString, activities);
   }
 
   /** Add (or replace) a document in the output data store using the connector.

diff --git a/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java b/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
index 80cfdc8..c982fab 100644
--- a/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java
+++ b/framework/agents/src/main/java/org/apache/manifoldcf/agents/transformation/BaseTransformationConnector.java

@@ -78,7 +78,7 @@
   *@return true if the mime type can be accepted by this connector.
   */
   @Override
-  public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
+  public boolean checkMimeTypeIndexable(VersionContext pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkMimeTypeIndexable(mimeType);
@@ -93,7 +93,7 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
+  public boolean checkDocumentIndexable(VersionContext pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkDocumentIndexable(localFile);
@@ -107,7 +107,7 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
+  public boolean checkLengthIndexable(VersionContext pipelineDescription, long length, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkLengthIndexable(length);
@@ -121,7 +121,7 @@
   *@return true if the file is acceptable, false if not.
   */
   @Override
-  public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
+  public boolean checkURLIndexable(VersionContext pipelineDescription, String url, IOutputCheckActivity checkActivity)
     throws ManifoldCFException, ServiceInterruption
   {
     return checkActivity.checkURLIndexable(url);
@@ -139,10 +139,10 @@
   * if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
   */
   @Override
-  public String getPipelineDescription(Specification spec)
+  public VersionContext getPipelineDescription(Specification spec)
     throws ManifoldCFException, ServiceInterruption
   {
-    return "";
+    return new VersionContext("",params,spec);
   }
 
   /** Add (or replace) a document in the output data store using the connector.
@@ -162,7 +162,7 @@
   *@throws IOException only if there's a stream error reading the document data.
   */
   @Override
-  public int addOrReplaceDocumentWithException(String documentURI, String pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+  public int addOrReplaceDocumentWithException(String documentURI, VersionContext pipelineDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
     throws ManifoldCFException, ServiceInterruption, IOException
   {
     return DOCUMENTSTATUS_REJECTED;
@@ -183,7 +183,7 @@
   @Override
   public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
   {
-    return "checkSpecification_"+connectionSequenceNumber;
+    return "s"+connectionSequenceNumber+"_checkSpecification";
   }
 
   /** Obtain the name of the form presave check javascript method to call.
@@ -193,7 +193,7 @@
   @Override
   public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
   {
-    return "checkSpecificationForSave_"+connectionSequenceNumber;
+    return "s"+connectionSequenceNumber+"_checkSpecificationForSave";
   }
 
   /** Output the specification header section.

diff --git a/framework/build.xml b/framework/build.xml
index 1a33d65..631d34c 100644
--- a/framework/build.xml
+++ b/framework/build.xml

@@ -100,7 +100,9 @@
     
     <target name="doc">
         <mkdir dir="dist/doc"/>
-        <javadoc destdir="dist/doc" maxmemory="256M" source="1.7" useexternalfile="true">
+        <javadoc destdir="dist/doc" maxmemory="256M" source="1.7" useexternalfile="true" use="true" locale="en_US">
+            <link href="http://docs.oracle.com/javase/7/docs/api/"/>
+            <link href="http://manifoldcf.apache.org/release/trunk/api/framework/"/>
             <classpath>
                 <path refid="framework-classpath"/>
             </classpath>
@@ -1657,6 +1659,7 @@
 
             <test name="org.apache.manifoldcf.core.common.DateTest" todir="test-output"/>
             <test name="org.apache.manifoldcf.core.fuzzyml.TestFuzzyML" todir="test-output"/>
+            <test name="org.apache.manifoldcf.core.jsongen.TestJsonGen" todir="test-output"/>
             <test name="org.apache.manifoldcf.core.lockmanager.TestZooKeeperLocks" todir="test-output"/>
             <test name="org.apache.manifoldcf.core.throttler.TestThrottler" todir="test-output"/>
 
@@ -1678,6 +1681,7 @@
             <formatter type="brief" usefile="false"/>
 
             <test name="org.apache.manifoldcf.crawler.tests.SchedulerHSQLDBTest" todir="test-output"/>
+            <test name="org.apache.manifoldcf.crawler.tests.InterruptionHSQLDBTest" todir="test-output"/>
 
         </junit>
     </target>

diff --git a/framework/buildfiles/connector-build.xml b/framework/buildfiles/connector-build.xml
index 3b5c66a..8bf78aa 100644
--- a/framework/buildfiles/connector-build.xml
+++ b/framework/buildfiles/connector-build.xml

@@ -428,7 +428,9 @@
 
     <target name="doc-rmi" depends="compile-wsdls,compile-xsds,has-RMI-check" if="hasRMI">
         <mkdir dir="dist/doc"/>
-        <javadoc destdir="dist/doc" source="1.7">
+        <javadoc destdir="dist/doc" source="1.7" use="true" locale="en_US">
+            <link href="http://docs.oracle.com/javase/7/docs/api/"/>
+            <link href="http://manifoldcf.apache.org/release/trunk/api/framework/"/>
             <classpath>
                 <path refid="connector-classpath"/>
             </classpath>
@@ -439,7 +441,9 @@
 
     <target name="doc" depends="compile-stubs,doc-rmi,compile-wsdls,compile-xsds,has-RMI-check,precompile-check" if="canBuild" unless="hasRMI">
         <mkdir dir="dist/doc"/>
-        <javadoc destdir="dist/doc" source="1.7">
+        <javadoc destdir="dist/doc" source="1.7" use="true" locale="en_US">
+            <link href="http://docs.oracle.com/javase/7/docs/api/"/>
+            <link href="http://manifoldcf.apache.org/release/trunk/api/framework/"/>
             <classpath>
                 <path refid="connector-classpath"/>
             </classpath>
@@ -733,21 +737,27 @@
         </copy>
     </target>
 
-    <target name="general-connector-proprietary-runnable-check" depends="lib-proprietary-instructions">
+    <target name="general-connector-proprietary-runnable-check" depends="lib-proprietary-instructions,has-RMI-check">
         <available file="dist/lib-proprietary-only/${ant.project.name}-PLACEHOLDER.txt" property="has-placeholder"/>
         <condition property="is-proprietary-runnable">
-            <not>
-                <isset property="has-placeholder"/>
-            </not>
+            <or>
+                <not>
+                    <isset property="has-placeholder"/>
+                </not>
+                <isset property="hasRMI"/>
+            </or>
         </condition>
     </target>
 
-    <target name="general-connector-runnable-check" depends="general-connector-proprietary-runnable-check">
+    <target name="general-connector-runnable-check" depends="general-connector-proprietary-runnable-check,has-RMI-check">
         <available file="lib-proprietary" type="dir" property="has-lib-proprietary"/>
         <condition property="is-runnable">
-            <not>
-                <isset property="has-lib-proprietary"/>
-            </not>
+            <or>
+                <not>
+                    <isset property="has-lib-proprietary"/>
+                </not>
+                <isset property="hasRMI"/>
+            </or>
         </condition>
     </target>
     

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java
index b965979..23594a0 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/BOMEncodingDetector.java

@@ -19,6 +19,8 @@
 package org.apache.manifoldcf.core.fuzzyml;
 
 import org.apache.manifoldcf.core.interfaces.*;
+
+import java.nio.charset.StandardCharsets;
 import java.util.*;
 import java.io.*;
 
@@ -139,7 +141,7 @@
       {
         // Encoding detected as utf-8
         mark();
-        return establishEncoding("UTF-8");
+        return establishEncoding(StandardCharsets.UTF_8.name());
       }
       else
         return replay();
@@ -153,7 +155,7 @@
       {
         // Encoding detected as UTF-16LE.  Do NOT re-mark, we need this
         // character for later.
-        return establishEncoding("UTF-16LE");
+        return establishEncoding(StandardCharsets.UTF_16LE.name());
       }
       break;
 
@@ -173,7 +175,7 @@
       else
       {
         // Leave mark alone.
-        return establishEncoding("UTF-16LE");
+        return establishEncoding(StandardCharsets.UTF_16LE.name());
       }
 
     case BOM_SEEN_0000FE:

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/Parser.java b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/Parser.java
index 23d21fc..055f188 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/Parser.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/fuzzyml/Parser.java

@@ -20,6 +20,7 @@
 
 import org.apache.manifoldcf.core.interfaces.*;
 import java.io.*;
+import java.nio.charset.StandardCharsets;
 
 /** This is the main parser class.
 * This class has an entry point for both parsing XML and HTML.  The way the
@@ -61,7 +62,7 @@
     // Update our notion of what the character set is
     startingCharset = bomEncodingDetector.getEncoding();
     if (startingCharset == null)
-      startingCharset = "utf-8";
+      startingCharset = StandardCharsets.UTF_8.name();
     // Reset the stream
     replayableInputStream.restart(false);
     // Set up a detection chain that includes the XML detector.
@@ -92,7 +93,7 @@
     throws IOException, ManifoldCFException
   {
     if (startingCharset == null)
-      startingCharset = "utf-8";
+      startingCharset = StandardCharsets.UTF_8.name();
     ByteReceiver byteReceiver = new DecodingByteReceiver(65536, startingCharset, characterReceiver);
     // Process to completion
     if (byteReceiver.dealWithBytes(inputStream) == false)

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/BinaryInput.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/BinaryInput.java
index ceef1ef..09c9188 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/BinaryInput.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/BinaryInput.java

@@ -95,14 +95,18 @@
       stream.close();
       stream = null;
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
     catch (IOException e)
     {
-      throw new ManifoldCFException("IO exception closing stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      handleIOException(e,"closing stream");
     }
   }
 
+  protected static void handleIOException(IOException e, String context)
+    throws ManifoldCFException
+  {
+    if (e instanceof InterruptedIOException)
+      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    throw new ManifoldCFException("IO exception while "+context+": "+e.getMessage(),e);
+  }
+
 }

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/CharacterInput.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/CharacterInput.java
index 6d6564c..778a32a 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/CharacterInput.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/CharacterInput.java

@@ -107,13 +107,9 @@
       stream.close();
       stream = null;
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
     catch (IOException e)
     {
-      throw new ManifoldCFException("Error closing stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      handleIOException(e, "closing stream");
     }
   }
 
@@ -125,4 +121,12 @@
   protected abstract void calculateHashValue()
     throws ManifoldCFException;
 
+  protected static void handleIOException(IOException e, String context)
+    throws ManifoldCFException
+  {
+    if (e instanceof InterruptedIOException)
+      throw new ManifoldCFException(e.getMessage(),e,ManifoldCFException.INTERRUPTED);
+    throw new ManifoldCFException("IO exception while "+context+": "+e.getMessage(),e);
+  }
+
 }

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/Specification.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/Specification.java
index a53b781..2b5e8fa 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/Specification.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/Specification.java

@@ -68,4 +68,22 @@
     return (SpecificationNode)findChild(index);
   }
 
+  /** Duplicate.
+  *@return an exact duplicate
+  */
+  public Specification duplicate(boolean readOnly)
+  {
+    if (readOnly && this.readOnly)
+      return this;
+    Specification rval = new Specification();
+    int i = 0;
+    while (i < children.size())
+    {
+      SpecificationNode node = (SpecificationNode)children.get(i++);
+      rval.children.add(node.duplicate(readOnly));
+    }
+    rval.readOnly = readOnly;
+    return rval;
+  }
+
 }

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
index 306dd7e..8614e06 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileCharacterInput.java

@@ -26,25 +26,29 @@
 /** This class represents a temporary file character input
 * stream.  Call the "done" method to clean up the
 * file when done.
-* NOTE: The implied flow of this method is to be handled
+* NOTE: The implied flow of this method is to be handed
 * a file that has already been created by some means.  The
 * file must be a dedicated temporary file, which can be
-* destroyed when the data has been used.
+* destroyed when the data has been used.  However, this class can also
+* buffer data in memory if the data is not too large (that is, less than a
+* supplied cutoff value).
 */
 public class TempFileCharacterInput extends CharacterInput
 {
   public static final String _rcsid = "@(#)$Id: TempFileCharacterInput.java 988245 2010-08-23 18:39:35Z kwright $";
 
   protected File file;
+  protected byte[] inMemoryBuffer;
 
   protected final static int CHUNK_SIZE = 65536;
-
+  protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
+  
   /** Construct from a non-length-delimited reader.
   *@param is is a reader to transfer from, to the end of the data.  This will, as a side effect, also calculate the character length
   *          and hash value for the data.
   */
   public TempFileCharacterInput(Reader is)
-    throws ManifoldCFException
+    throws ManifoldCFException, IOException
   {
     this(is,-1L);
   }
@@ -55,13 +59,93 @@
   *@param length is the length limit to transfer, or -1 if no limit
   */
   public TempFileCharacterInput(Reader is, long length)
-    throws ManifoldCFException
+    throws ManifoldCFException, IOException
+  {
+    this(is,length,DEFAULT_MAX_MEM_SIZE);
+  }
+
+  /** Construct from a length-delimited reader.
+  *@param is is a reader to transfer from, to the end of the data.  This will, as a side effect, also calculate the character length
+  *          and hash value for the data.
+  *@param length is the length limit to transfer, or -1 if no limit
+  *@param maxInMemoryLength is the maximum size to keep in memory, before using a backing File object.  The amount possibly
+  *        saved in memory will be guaranteed less than this size.
+  */
+  public TempFileCharacterInput(Reader is, long length, int maxInMemoryLength)
+    throws ManifoldCFException, IOException
   {
     super();
-    try
+    
+
+    // Before we do anything else, we read the first chunk.  This will allow
+    // us to determine if we're going to buffer the data in memory or not.  However,
+    // it may need to be read in chunks, since there's no guarantee it will come in
+    // in the size requested.
+    int chunkSize = CHUNK_SIZE;
+
+    char[] buffer = new char[chunkSize];
+    int chunkTotal = 0;
+    boolean eofSeen = false;
+    while (true)
     {
+      int chunkAmount;
+      if (length == -1L || length > chunkSize)
+        chunkAmount = chunkSize-chunkTotal;
+      else
+      {
+        chunkAmount = (int)(length-chunkTotal);
+        eofSeen = true;
+      }
+      if (chunkAmount == 0)
+        break;
+      int readsize = is.read(buffer,chunkTotal,chunkAmount);
+      if (readsize == -1)
+      {
+        eofSeen = true;
+        break;
+      }
+      chunkTotal += readsize;
+    }
+    
+    // Set up hash digest, and calculate the initial hash.
+    java.security.MessageDigest md = ManifoldCF.startHash();
+    String chunkString = new String(buffer,0,chunkTotal);
+    ManifoldCF.addToHash(md,chunkString);
+
+    // In order to compute the byte length, we need to convert to a byte array, which is
+    // also our final form for in-memory storage.  But we don't want to  do the work
+    // unless there's a chance it will be needed.
+    byte[] byteBuffer;
+    if (eofSeen)
+      byteBuffer = chunkString.getBytes(StandardCharsets.UTF_8);
+    else
+      byteBuffer = null;
+
+    if (eofSeen && byteBuffer.length <= maxInMemoryLength)
+    {
+      // Buffer locally; don't create a temp file
+      file = null;
+      inMemoryBuffer = byteBuffer;
+      charLength = chunkTotal;
+      hashValue = ManifoldCF.getHashValue(md);
+    }
+    else
+    {
+      inMemoryBuffer = null;
+      // Create a temporary file!
+      long totalMoved = 0;
+      
       // Create a temporary file to put the stuff in
-      File outfile = File.createTempFile("_MC_","");
+      File outfile;
+      try
+      {
+        outfile = File.createTempFile("_MC_","");
+      }
+      catch (IOException e)
+      {
+        handleIOException(e,"creating backing file");
+        outfile = null;
+      }
       try
       {
         // Register the file for autodeletion, using our infrastructure.
@@ -69,21 +153,38 @@
         // deleteOnExit() causes memory leakage!
         // outfile.deleteOnExit();
 
-        // Set up hash digest and character length counter before we start anything.
-        java.security.MessageDigest md = ManifoldCF.startHash();
-
-        FileOutputStream outStream = new FileOutputStream(outfile);
-        // Create a Writer corresponding to the file output stream, and encode using utf-8
-        OutputStreamWriter outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+        FileOutputStream outStream;
+        OutputStreamWriter outWriter;
         try
         {
-          char[] buffer = new char[CHUNK_SIZE];
-          long totalMoved = 0;
+          outStream = new FileOutputStream(outfile);
+          // Create a Writer corresponding to the file output stream, and encode using utf-8
+          outWriter = new OutputStreamWriter(outStream,StandardCharsets.UTF_8);
+        }
+        catch (IOException e)
+        {
+          handleIOException(e,"opening backing file");
+          outStream = null;
+          outWriter = null;
+        }
+        try
+        {
+          //  Transfor what we've already read.
+          try
+          {
+            outWriter.write(buffer,0,chunkTotal);
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"writing backing file");
+          }
+          totalMoved += chunkTotal;
+          // Now, transfer the remainder
           while (true)
           {
             int moveAmount;
-            if (length == -1L || length-totalMoved > CHUNK_SIZE)
-              moveAmount = CHUNK_SIZE;
+            if (length == -1L || length-totalMoved > chunkSize)
+              moveAmount = chunkSize;
             else
               moveAmount = (int)(length-totalMoved);
             if (moveAmount == 0)
@@ -92,22 +193,36 @@
             int readsize = is.read(buffer,0,moveAmount);
             if (readsize == -1)
               break;
-            outWriter.write(buffer,0,readsize);
+            try
+            {
+              outWriter.write(buffer,0,readsize);
+            }
+            catch (IOException e)
+            {
+              handleIOException(e,"writing backing file");
+            }
             ManifoldCF.addToHash(md,new String(buffer,0,readsize));
             totalMoved += readsize;
           }
 
-          charLength = totalMoved;
-          hashValue = ManifoldCF.getHashValue(md);
         }
         finally
         {
-          outWriter.close();
+          try
+          {
+            outWriter.close();
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"closing backing file");
+          }
         }
 
         // Now, create the input stream.
         // Save the file name
         file = outfile;
+        charLength = totalMoved;
+        hashValue = ManifoldCF.getHashValue(md);
 
       }
       catch (Throwable e)
@@ -119,20 +234,14 @@
           throw (Error)e;
         if (e instanceof RuntimeException)
           throw (RuntimeException)e;
-        if (e instanceof Exception)
-          throw (Exception)e;
-        throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+        if (e instanceof ManifoldCFException)
+          throw (ManifoldCFException)e;
+        if (e instanceof IOException)
+          throw (IOException)e;
+        throw new RuntimeException("Unexpected throwable of type "+e.getClass().getName()+": "+e.getMessage(),e);
       }
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
-    catch (Exception e)
-    {
-      throw new ManifoldCFException("Cannot write temporary file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
-    }
-
+    
   }
 
   /** Construct from an existing temporary fle.
@@ -141,6 +250,7 @@
   public TempFileCharacterInput(File tempFile)
   {
     super();
+    inMemoryBuffer = null;
     file = tempFile;
     ManifoldCF.addFile(file);
     // deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -168,6 +278,10 @@
         throw new ManifoldCFException("No such file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
       }
     }
+    else if (inMemoryBuffer != null)
+    {
+      return new ByteArrayInputStream(inMemoryBuffer);
+    }
     return null;
   }
 
@@ -178,6 +292,8 @@
   {
     if (file != null)
       return file.length();
+    else if (inMemoryBuffer != null)
+      return inMemoryBuffer.length;
     return 0L;
   }
 
@@ -185,15 +301,22 @@
   protected void openStream()
     throws ManifoldCFException
   {
-    try
+    if (file != null)
     {
-      // Open the file and create a stream.
-      InputStream binaryStream = new FileInputStream(file);
-      stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+      try
+      {
+        // Open the file and create a stream.
+        InputStream binaryStream = new FileInputStream(file);
+        stream = new InputStreamReader(binaryStream, StandardCharsets.UTF_8);
+      }
+      catch (FileNotFoundException e)
+      {
+        throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      }
     }
-    catch (FileNotFoundException e)
+    else if (inMemoryBuffer != null)
     {
-      throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      stream = new InputStreamReader(new ByteArrayInputStream(inMemoryBuffer),StandardCharsets.UTF_8);
     }
   }
 
@@ -204,10 +327,12 @@
     // Create a new TempFileCharacterInput object, and fill it with our current stuff
     TempFileCharacterInput rval = new TempFileCharacterInput();
     rval.file = file;
+    rval.inMemoryBuffer = inMemoryBuffer;
     rval.stream = stream;
     rval.charLength = charLength;
     rval.hashValue = hashValue;
     file = null;
+    inMemoryBuffer = null;
     stream = null;
     charLength = -1L;
     hashValue = null;
@@ -250,7 +375,13 @@
     try
     {
       // Open the file and create a stream.
-      InputStream binaryStream = new FileInputStream(file);
+      InputStream binaryStream;
+      if (file != null)
+        binaryStream = new FileInputStream(file);
+      else if (inMemoryBuffer != null)
+        binaryStream = new ByteArrayInputStream(inMemoryBuffer);
+      else
+        binaryStream = null;
       Reader reader = new InputStreamReader(binaryStream,StandardCharsets.UTF_8);
       try
       {
@@ -277,13 +408,9 @@
         reader.close();
       }
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
     catch (IOException e)
     {
-      throw new ManifoldCFException("Can't scan file: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      handleIOException(e,"scanning file");
     }
   }
 

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java
index b422b56..47b1cee 100644
--- a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/TempFileInput.java

@@ -34,15 +34,17 @@
   public static final String _rcsid = "@(#)$Id: TempFileInput.java 988245 2010-08-23 18:39:35Z kwright $";
 
   protected File file;
+  protected byte[] inMemoryBuffer;
 
   protected final static int CHUNK_SIZE = 65536;
+  protected final static int DEFAULT_MAX_MEM_SIZE = 8192;
 
   /** Construct from an input stream.
   * This will also create a temporary, backing file.
   *@param is is the input stream to use to construct the temporary file.
   */
   public TempFileInput(InputStream is)
-    throws ManifoldCFException
+    throws ManifoldCFException, IOException
   {
     this(is,-1L);
   }
@@ -52,29 +54,112 @@
   *@param length is the maximum number of bytes to transfer, or -1 if no limit.
   */
   public TempFileInput(InputStream is, long length)
-    throws ManifoldCFException
+    throws ManifoldCFException, IOException
+  {
+    this(is,length,DEFAULT_MAX_MEM_SIZE);
+  }
+  
+  /** Construct from a length-delimited input stream.
+  *@param is is the input stream.
+  *@param length is the maximum number of bytes to transfer, or -1 if no limit.
+  *@param maxMemSize is the maximum bytes we keep in memory in lieu of using a file.
+  */
+  public TempFileInput(InputStream is, long length, int maxMemSize)
+    throws ManifoldCFException, IOException
   {
     super();
-    try
+    
+    // Before we do anything else, we read the first chunk.  This will allow
+    // us to determine if we're going to buffer the data in memory or not.  However,
+    // it may need to be read in chunks, since there's no guarantee it will come in
+    // in the size requested.
+    int chunkSize = CHUNK_SIZE;
+
+    byte[] buffer = new byte[chunkSize];
+    int chunkTotal = 0;
+    boolean eofSeen = false;
+    while (true)
     {
+      int chunkAmount;
+      if (length == -1L || length > chunkSize)
+        chunkAmount = chunkSize-chunkTotal;
+      else
+      {
+        chunkAmount = (int)(length-chunkTotal);
+        eofSeen = true;
+      }
+      if (chunkAmount == 0)
+        break;
+      int readsize = is.read(buffer,chunkTotal,chunkAmount);
+      if (readsize == -1)
+      {
+        eofSeen = true;
+        break;
+      }
+      chunkTotal += readsize;
+    }
+
+    if (eofSeen && chunkTotal < maxMemSize)
+    {
+      // In memory!!
+      file = null;
+      inMemoryBuffer = new byte[chunkTotal];
+      for (int i = 0; i < inMemoryBuffer.length; i++)
+      {
+        inMemoryBuffer[i] = buffer[i];
+      }
+      this.length = chunkTotal;
+    }
+    else
+    {
+      inMemoryBuffer = null;
       // Create a temporary file to put the stuff in
-      File outfile = File.createTempFile("_MC_","");
+      File outfile;
+      try
+      {
+        outfile = File.createTempFile("_MC_","");
+      }
+      catch (IOException e)
+      {
+        handleIOException(e,"creating backing file");
+        outfile = null;
+      }
       try
       {
         // Register the file for autodeletion, using our infrastructure.
         ManifoldCF.addFile(outfile);
         // deleteOnExit() causes memory leakage!
         // outfile.deleteOnExit();
-        FileOutputStream outStream = new FileOutputStream(outfile);
+        FileOutputStream outStream;
         try
         {
-          byte[] buffer = new byte[CHUNK_SIZE];
+          outStream = new FileOutputStream(outfile);
+        }
+        catch (IOException e)
+        {
+          handleIOException(e,"opening backing file");
+          outStream = null;
+        }
+        try
+        {
           long totalMoved = 0;
+            
+          //  Transfor what we've already read.
+          try
+          {
+            outStream.write(buffer,0,chunkTotal);
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"writing backing file");
+          }
+          totalMoved += chunkTotal;
+
           while (true)
           {
             int moveAmount;
-            if (length == -1L || length-totalMoved > CHUNK_SIZE)
-              moveAmount = CHUNK_SIZE;
+            if (length == -1L || length-totalMoved > chunkSize)
+              moveAmount = chunkSize;
             else
               moveAmount = (int)(length-totalMoved);
             if (moveAmount == 0)
@@ -83,14 +168,28 @@
             int readsize = is.read(buffer,0,moveAmount);
             if (readsize == -1)
               break;
-            outStream.write(buffer,0,readsize);
+            try
+            {
+              outStream.write(buffer,0,readsize);
+            }
+            catch (IOException e)
+            {
+              handleIOException(e,"writing backing file");
+            }
             totalMoved += readsize;
           }
           // System.out.println(" Moved "+Long.toString(totalMoved));
         }
         finally
         {
-          outStream.close();
+          try
+          {
+            outStream.close();
+          }
+          catch (IOException e)
+          {
+            handleIOException(e,"closing backing file");
+          }
         }
 
         // Now, create the input stream.
@@ -108,20 +207,13 @@
           throw (Error)e;
         if (e instanceof RuntimeException)
           throw (RuntimeException)e;
-        if (e instanceof Exception)
-          throw (Exception)e;
-        throw new Exception("Unexpected throwable: "+e.getMessage(),e);
+        if (e instanceof ManifoldCFException)
+          throw (ManifoldCFException)e;
+        if (e instanceof IOException)
+          throw (IOException)e;
+        throw new RuntimeException("Unexpected throwable of type "+e.getClass().getName()+": "+e.getMessage(),e);
       }
     }
-    catch (InterruptedIOException e)
-    {
-      throw new ManifoldCFException("Interrupted: "+e.getMessage(),e,ManifoldCFException.INTERRUPTED);
-    }
-    catch (Exception e)
-    {
-      throw new ManifoldCFException("Cannot write temporary file",e,ManifoldCFException.GENERAL_ERROR);
-    }
-
   }
 
   /** Construct from an existing temporary fle.
@@ -130,6 +222,7 @@
   public TempFileInput(File tempFile)
   {
     super();
+    inMemoryBuffer = null;
     file = tempFile;
     ManifoldCF.addFile(file);
     // deleteOnExit() causes memory leakage; better to leak files on hard shutdown than memory.
@@ -146,9 +239,11 @@
   {
     TempFileInput rval = new TempFileInput();
     rval.file = file;
+    rval.inMemoryBuffer = inMemoryBuffer;
     rval.stream = stream;
     rval.length = length;
     file = null;
+    inMemoryBuffer = null;
     stream = null;
     length = -1L;
     return rval;
@@ -168,21 +263,31 @@
   protected void openStream()
     throws ManifoldCFException
   {
-    try
+    if (file != null)
     {
-      // Open the file and create a stream.
-      stream = new FileInputStream(file);
+      try
+      {
+        // Open the file and create a stream.
+        stream = new FileInputStream(file);
+      }
+      catch (FileNotFoundException e)
+      {
+        throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      }
     }
-    catch (FileNotFoundException e)
+    else if (inMemoryBuffer != null)
     {
-      throw new ManifoldCFException("Can't create stream: "+e.getMessage(),e,ManifoldCFException.GENERAL_ERROR);
+      stream = new ByteArrayInputStream(inMemoryBuffer);
     }
   }
 
   protected void calculateLength()
     throws ManifoldCFException
   {
-    this.length = file.length();
+    if (file != null)
+      this.length = file.length();
+    else if (inMemoryBuffer != null)
+      this.length = inMemoryBuffer.length;
   }
 
 }

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java
new file mode 100644
index 0000000..391c1ee
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/interfaces/VersionContext.java

@@ -0,0 +1,67 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.interfaces;
+
+/** An instance of this class represents a version string, in combination with the configuration parameters
+* and specification that produced it.  Some clients will use the version string (e.g. the database), while others
+* may find it more convenient to use the parameters or the specification.  However:
+* (1) It is ALWAYS wrong to use data from configuration or specification that is NOT represented in some
+*     way in the version string, either by exact representation, or by some proxy value;
+* (2) Configuration and Specification are guaranteed to be the identical ones which were used during creation
+*     of the version string;
+* (3) Configuration and Specification are provided as CONVENIENCES; they are not to be considered primary
+*    data for these objects.
+*/
+public class VersionContext
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  // Member variables
+  protected final String versionString;
+  protected final ConfigParams params;
+  protected final Specification specification;
+
+  /** Constructor.
+  */
+  public VersionContext(String versionString, ConfigParams params, Specification specification)
+  {
+    this.versionString = versionString;
+    this.params = params;
+    this.specification = specification;
+  }
+
+  /** Retrieve the version String */
+  public String getVersionString()
+  {
+    return versionString;
+  }
+  
+  /** Retrieve the configuration parameters */
+  public ConfigParams getParams()
+  {
+    return params;
+  }
+  
+  /** Retrieve the specification */
+  public Specification getSpecification()
+  {
+    return specification;
+  }
+  
+}

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONArrayReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONArrayReader.java
new file mode 100644
index 0000000..9f669d2
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONArrayReader.java

@@ -0,0 +1,97 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+import java.util.*;
+
+/** This class describes a JSON array reader. */
+public class JSONArrayReader extends JSONReader
+{
+  protected final static int STATE_PREBRACKET = 0;
+  protected final static int STATE_ELEMENT = 1;
+  protected final static int STATE_PREEND = 2;
+  protected final static int STATE_DONE = 3;
+  
+  protected int state = STATE_PREBRACKET;
+  protected final List<JSONReader> elements = new ArrayList<JSONReader>();
+  protected int elementIndex;
+  
+  public JSONArrayReader()
+  {
+  }
+  
+  public JSONArrayReader(JSONReader[] elements)
+  {
+    for (JSONReader element : elements)
+    {
+      addArrayElement(element);
+    }
+  }
+  
+  public JSONArrayReader addArrayElement(JSONReader element)
+  {
+    elements.add(element);
+    return this;
+  }
+
+  @Override
+  public int read()
+    throws IOException
+  {
+    int newState;
+    switch (state)
+    {
+    case STATE_PREBRACKET:
+      if (elements.size() == 0)
+        state = STATE_PREEND;
+      else
+      {
+        state = STATE_ELEMENT;
+        elementIndex = 0;
+      }
+      return '[';
+    case STATE_PREEND:
+      state = STATE_DONE;
+      return ']';
+    case STATE_DONE:
+      return -1;
+    case STATE_ELEMENT:
+      int x = elements.get(elementIndex).read();
+      if (x == -1)
+      {
+        elementIndex++;
+        if (elementIndex == elements.size())
+        {
+          state = STATE_DONE;
+          return ']';
+        }
+        else
+          return ',';
+      }
+      else
+        return x;
+    default:
+      throw new IllegalStateException("Unknown state: "+state);
+    }
+  }
+
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONDoubleReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONDoubleReader.java
new file mode 100644
index 0000000..0d47615
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONDoubleReader.java

@@ -0,0 +1,33 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This class describes a JSON double reader. */
+public class JSONDoubleReader extends JSONValueReader
+{
+  public JSONDoubleReader(double value)
+  {
+    super(new StringReader(new Double(value).toString()));
+  }
+  
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONIntegerReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONIntegerReader.java
new file mode 100644
index 0000000..fc831a2
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONIntegerReader.java

@@ -0,0 +1,33 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This class describes a JSON integer reader. */
+public class JSONIntegerReader extends JSONValueReader
+{
+  public JSONIntegerReader(int value)
+  {
+    super(new StringReader(Integer.toString(value)));
+  }
+  
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONNameValueReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONNameValueReader.java
new file mode 100644
index 0000000..deb240a
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONNameValueReader.java

@@ -0,0 +1,73 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This class describes a JSON name/value object reader. */
+public class JSONNameValueReader extends JSONReader
+{
+  protected final static int STATE_NAME = 0;
+  protected final static int STATE_VALUE = 1;
+  protected final static int STATE_DONE = 2;
+  
+  protected final JSONReader name;
+  protected final JSONReader value;
+  
+  protected int state = STATE_NAME;
+  
+  public JSONNameValueReader(JSONStringReader name, JSONReader value)
+  {
+    this.name = name;
+    this.value = value;
+  }
+
+  @Override
+  public int read()
+    throws IOException
+  {
+    int x;
+    switch (state)
+    {
+    case STATE_NAME:
+      x = name.read();
+      if (x == -1)
+      {
+        state = STATE_VALUE;
+        return ':';
+      }
+      return x;
+    case STATE_VALUE:
+      x = value.read();
+      if (x == -1)
+      {
+        state = STATE_DONE;
+        return -1;
+      }
+      return x;
+    case STATE_DONE:
+      return -1;
+    default:
+      throw new IllegalStateException("Unknown state: "+state);
+    }
+  }
+
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONObjectReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONObjectReader.java
new file mode 100644
index 0000000..0023140
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONObjectReader.java

@@ -0,0 +1,88 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+import java.util.*;
+
+/** This class describes a JSON object reader. */
+public class JSONObjectReader extends JSONReader
+{
+  protected final static int STATE_PREBRACE = 0;
+  protected final static int STATE_PAIRBEGIN = 1;
+  protected final static int STATE_PREEND = 2;
+  protected final static int STATE_DONE = 3;
+  
+  protected int state = STATE_PREBRACE;
+  protected final List<JSONReader> pairs = new ArrayList<JSONReader>();
+  protected int readerIndex;
+
+  public JSONObjectReader()
+  {
+  }
+  
+  public JSONObjectReader addNameValuePair(JSONNameValueReader pair)
+  {
+    pairs.add(pair);
+    return this;
+  }
+  
+  @Override
+  public int read()
+    throws IOException
+  {
+    switch (state)
+    {
+    case STATE_PREBRACE:
+      if (pairs.size() == 0)
+        state = STATE_PREEND;
+      else
+      {
+        state = STATE_PAIRBEGIN;
+        readerIndex = 0;
+      }
+      return '{';
+    case STATE_PREEND:
+      state = STATE_DONE;
+      return '}';
+    case STATE_DONE:
+      return -1;
+    case STATE_PAIRBEGIN:
+      int x = pairs.get(readerIndex).read();
+      if (x == -1)
+      {
+        readerIndex++;
+        if (readerIndex == pairs.size())
+        {
+          state = STATE_DONE;
+          return '}';
+        }
+        else
+          return ',';
+      }
+      else
+        return x;
+    default:
+      throw new IllegalStateException("Unknown state: "+state);
+    }
+  }
+
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONReader.java
new file mode 100644
index 0000000..0811bda
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONReader.java

@@ -0,0 +1,60 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This base class describes a JSON reader. */
+public abstract class JSONReader extends Reader
+{
+
+  @Override
+  public int read(char[] cbuf, int off, int len)
+    throws IOException
+  {
+    int amt = 0;
+    while (true)
+    {
+      if (len == 0)
+        return amt;
+      int theChar = read();
+      if (theChar == -1)
+      {
+        if (amt == 0)
+          return -1;
+        return amt;
+      }
+      cbuf[off++] = (char)theChar;
+      len--;
+    }
+  }
+  
+  @Override
+  public abstract int read()
+    throws IOException;
+  
+  @Override
+  public void close()
+    throws IOException
+  {
+  }
+  
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONStringReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONStringReader.java
new file mode 100644
index 0000000..bc7e87d
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONStringReader.java

@@ -0,0 +1,123 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This class describes a JSON string reader. */
+public class JSONStringReader extends JSONReader
+{
+  // Strings need to be escaped, therefore we have our own.
+  /*
+   All Unicode characters may be placed within the
+   quotation marks except for the characters that must be escaped:
+   quotation mark, reverse solidus, and the control characters (U+0000
+   through U+001F).
+
+   Any character may be escaped.  If the character is in the Basic
+   Multilingual Plane (U+0000 through U+FFFF), then it may be
+   represented as a six-character sequence: a reverse solidus, followed
+   by the lowercase letter u, followed by four hexadecimal digits that
+   encode the character's code point.  The hexadecimal letters A though
+   F can be upper or lowercase.  So, for example, a string containing
+   only a single reverse solidus character may be represented as
+   "\u005C".
+
+   Alternatively, there are two-character sequence escape
+   representations of some popular characters.  So, for example, a
+   string containing only a single reverse solidus character may be
+   represented more compactly as "\\".
+  */
+  
+  protected final static int STATE_PREQUOTE = 0;
+  protected final static int STATE_U = 1;
+  protected final static int STATE_1ST = 2;
+  protected final static int STATE_2ND = 3;
+  protected final static int STATE_3RD = 4;
+  protected final static int STATE_4TH = 5;
+  protected final static int STATE_NEXTCHAR = 6;
+  protected final static int STATE_DONE = 7;
+
+  protected final Reader inputReader;
+  
+  protected int state = STATE_PREQUOTE;
+  protected String escapedChar;
+
+  public JSONStringReader(String value)
+  {
+    inputReader = new StringReader(value);
+  }
+  
+  public JSONStringReader(Reader value)
+  {
+    inputReader = value;
+  }
+
+  @Override
+  public int read()
+    throws IOException
+  {
+    int x;
+    switch (state)
+    {
+    case STATE_PREQUOTE:
+      state = STATE_NEXTCHAR;
+      return '"';
+    case STATE_NEXTCHAR:
+      x = inputReader.read();
+      if (x == -1)
+      {
+        state = STATE_DONE;
+        return '"';
+      }
+      else
+      {
+        if (x < ' ' || x == '"' || x == '\\')
+        {
+          escapedChar = "000" + Integer.toHexString(x);
+          escapedChar = escapedChar.substring(escapedChar.length()-4);
+          state = STATE_U;
+          return '\\';
+        }
+        return x;
+      }
+    case STATE_U:
+      state = STATE_1ST;
+      return 'u';
+    case STATE_1ST:
+      state = STATE_2ND;
+      return escapedChar.charAt(0);
+    case STATE_2ND:
+      state = STATE_3RD;
+      return escapedChar.charAt(1);
+    case STATE_3RD:
+      state = STATE_4TH;
+      return escapedChar.charAt(2);
+    case STATE_4TH:
+      state = STATE_NEXTCHAR;
+      return escapedChar.charAt(3);
+    case STATE_DONE:
+      return -1;
+    default:
+      throw new IllegalStateException("Unknown state: "+state);
+    }
+  }
+}
+
+

diff --git a/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONValueReader.java b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONValueReader.java
new file mode 100644
index 0000000..ed12b70
--- /dev/null
+++ b/framework/core/src/main/java/org/apache/manifoldcf/core/jsongen/JSONValueReader.java

@@ -0,0 +1,42 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+
+/** This class describes a JSON value reader, which can be any terminal value (e.g. string, integer, float). */
+public class JSONValueReader extends JSONReader
+{
+  /** Wrapped reader */
+  protected final Reader value;
+  
+  public JSONValueReader(Reader value)
+  {
+    this.value = value;
+  }
+
+  @Override
+  public int read()
+    throws IOException
+  {
+    return value.read();
+  }
+}
+
+

diff --git a/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java b/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java
index 24e23f3..ff3ef3b 100644
--- a/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java
+++ b/framework/core/src/test/java/org/apache/manifoldcf/core/common/DateTest.java

@@ -43,6 +43,9 @@
     assertNotNull(d);
     d = DateParser.parseISO8601Date("2012-11-15T01:32:33.001-04:00");
     assertNotNull(d);
+    // Microsoft variation
+    d = DateParser.parseISO8601Date("2014-06-03 11:21:37");
+    assertNotNull(d);
   }
 
 

diff --git a/framework/core/src/test/java/org/apache/manifoldcf/core/jsongen/TestJsonGen.java b/framework/core/src/test/java/org/apache/manifoldcf/core/jsongen/TestJsonGen.java
new file mode 100644
index 0000000..085e5f6
--- /dev/null
+++ b/framework/core/src/test/java/org/apache/manifoldcf/core/jsongen/TestJsonGen.java

@@ -0,0 +1,72 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.core.jsongen;
+
+import java.io.*;
+import org.junit.*;
+import static org.junit.Assert.*;
+
+public class TestJsonGen
+{
+  
+  @Test
+  public void testArrayFormation()
+    throws IOException
+  {
+    JSONArrayReader jr = new JSONArrayReader();
+    jr.addArrayElement(new JSONStringReader("hello"))
+      .addArrayElement(new JSONStringReader("world"));
+    compare("[\"hello\",\"world\"]",jr);
+    compare("[]",new JSONArrayReader());
+  }
+
+  @Test
+  public void testObjectFormation()
+    throws IOException
+  {
+    JSONObjectReader jr = new JSONObjectReader();
+    jr.addNameValuePair(new JSONNameValueReader(new JSONStringReader("hi"),new JSONIntegerReader(1)))
+      .addNameValuePair(new JSONNameValueReader(new JSONStringReader("there"),new JSONDoubleReader(1.0)));
+    compare("{\"hi\":1,\"there\":1.0}",jr);
+    compare("{}",new JSONObjectReader());
+  }
+  
+  @Test
+  public void testStringEscaping()
+    throws IOException
+  {
+    compare("\"t1\\u000da\"",new JSONStringReader("t1\ra"));
+    compare("\"t2\\u0009\\u0022\\u005c\"",new JSONStringReader("t2\t\"\\"));
+  }
+  
+  protected void compare(String value, Reader reader)
+    throws IOException
+  {
+    StringBuilder sb = new StringBuilder();
+    while (true)
+    {
+      int character = reader.read();
+      if (character == -1)
+        break;
+      sb.append((char)character);
+    }
+    assertEquals(value,sb.toString());
+  }
+
+}

diff --git a/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py b/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py
index 8953c11..6bf6014 100644
--- a/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py
+++ b/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/Javascript.py

@@ -53,12 +53,18 @@
         # an error
         raise Exception("Attempt to construct a non-class object: %s" % unicode(self))
 
+    def get_type( self, member_name ):
+        return JSString( "undefined" )
+
     def get_value( self, member_name ):
         raise Exception("Object %s has no such property '%s'" % (unicode(self), member_name) )
 
     def set_value( self, member_name, value ):
         raise Exception("Object %s has no such property '%s'" % (unicode(self), member_name) )
 
+    def type_value( self ):
+        raise Exception("Object %s has no type value" % unicode(self) )
+
     def str_value( self ):
         raise Exception("Object %s has no string value" % unicode(self) )
 
@@ -78,6 +84,11 @@
         # return self.
         return self
 
+    def dereference_type( self ):
+        # For objects that are references, this operation dereferences them.  All others
+        # return self.
+        return self.type_value( )
+
 # Array object.
 class JSArray( JSObject ):
 
@@ -87,6 +98,15 @@
         self.array_size = array_size
         self.array = { }
 
+    def get_type( self, member_name ):
+        if member_name == "length":
+            return "number"
+        index = int(member_name)
+        assert index >=0 and index < self.array_size
+        if index >= len(self.array):
+            return JSObject.get_type( member_name )
+        return self.array[ index ].type_value( )
+
     def get_value( self, member_name ):
         if member_name == "length":
             return JSNumber( self.array_size )
@@ -274,6 +294,9 @@
         JSObject.__init__( self )
         self.value = value
 
+    def type_value( self ):
+        return unicode( "number" )
+
     def num_value( self ):
         return self.value
 
@@ -326,6 +349,9 @@
         self.is_global = is_global
         self.is_insensitive = is_insensitive
 
+    def type_value( self ):
+        return unicode( "regexp" )
+
     def get_value( self, member_name ):
         # A regexp has a method property for the test method (which is the only one
         # we currently support)
@@ -511,6 +537,9 @@
             return JSSubstring( self )
         return JSObject.get_value( self, member_name )
 
+    def type_value( self ):
+        return unicode( "string" )
+
     def str_value( self ):
         return unicode( self.value )
 
@@ -552,12 +581,18 @@
     def call( self, argset, context ):
         return self.dereference().call(argset,context)
 
+    def get_type( self, member_name ):
+        return self.dereference().get_type(member_name)
+
     def get_value( self, member_name ):
         return self.dereference().get_value(member_name)
 
     def set_value( self, member_name, value ):
         self.dereference().set_value(member_name,value)
 
+    def type_value( self ):
+        return self.dereference_type()
+
     def str_value( self ):
         return self.dereference().str_value()
 
@@ -582,6 +617,9 @@
     def dereference( self ):
         return self.object.get_value( self.member )
 
+    def dereference_type( self ):
+        return self.object.get_type( self.member )
+
     def set_reference( self, newobject ):
         self.object.set_value( self.member, newobject )
 
@@ -1508,6 +1546,15 @@
             if parse_only:
                 return JSNull()
             return self.pre_plusplus ( nextvalue )
+        elif token != None and token.get_symbol( ) == "typeof":
+            # typeof operator
+            self.advance( )
+            nextvalue = self.evaluate_expr9( context, place, parse_only )
+            if nextvalue == None:
+                raise Exception("Missing expression after 'typeof' in %s" % place)
+            if parse_only:
+                return JSNull()
+            return self.typeof( nextvalue ) 
         elif token != None and token.get_symbol( ) == "new":
             self.advance( )
             token = self.peek( )
@@ -1576,6 +1623,9 @@
     def positive( self, value1 ):
         return JSNumber( +value1.num_value( ) )
 
+    def typeof( self, value1 ):
+        return JSString( value1.type_value( ) )
+
     def post_minusminus( self, value1 ):
         rval = value1.dereference( )
         value1.set_reference( self.minus( value1, JSNumber( 1 ) ).dereference( ) )

diff --git a/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/VirtualBrowser.py b/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/VirtualBrowser.py
index 19f5416..57ccb12 100755
--- a/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/VirtualBrowser.py
+++ b/framework/core/src/test/resources/org/apache/manifoldcf/core/tests/VirtualBrowser.py

@@ -88,6 +88,9 @@
     def get_property( self, property_name ):
         raise Exception("Form element '%s' has no such property '%s'" % (self.element_name, property_name))
 
+    # Get the type of a property (via javascript)
+    def get_property_type( self, property_name ):
+        return "undefined"
 
 # Button base class
 class VirtualButton( VirtualFormElement ):
@@ -217,6 +220,14 @@
         else:
             return VirtualFormDataElement.get_property( self, property_name )
 
+    def get_property_type( self, property_name ):
+        if property_name == "type":
+            return "string"
+        if property_name == "value":
+            return "string"
+        else:
+            return VirtualFormDataElement.get_property_type( self, property_name )
+
 # File
 class VirtualFileBrowser( VirtualFormDataElement ):
 
@@ -249,6 +260,12 @@
         else:
             return VirtualFormDataElement.get_property( self, property_name )
 
+    # Get a property type (via javascript)
+    def get_property_type( self, property_name ):
+        if property_name == "value":
+            return "string"
+        else:
+            return VirtualFormDataElement.get_property_type( self, property_name )
 
 # Read a specified file entirely into a "string"
 def read_file( filename ):
@@ -301,6 +318,11 @@
             return Javascript.JSBoolean(self.selected)
         return VirtualFormDataElement.get_property( self, property_name )
 
+    def get_property_type( self, property_name ):
+        if property_name == "checked":
+            return "boolean"
+        return VirtualFormDataElement.get_property_type( self, property_name )
+
 # Radio
 class VirtualRadiobutton( VirtualFormDataElement ):
 
@@ -512,6 +534,23 @@
         else:
             return VirtualFormDataElement.get_property( self, property_name )
 
+    # Get a property type (via javascript)
+    def get_property_type( self, property_name ):
+        if property_name == "type":
+            return "string"
+        elif property_name == "value":
+            return "string"
+        elif property_name == "options":
+            # Return a JSArray describing the options objects underlying this selectbox
+            return "array"
+        elif property_name == "length":
+            # Return as JSNumber describing the length of the array
+            return "number"
+        elif property_name == "selectedIndex":
+            return "number"
+        else:
+            return VirtualFormDataElement.get_property_type( self, property_name )
+
     # Get an option object
     def get_option_object( self, index ):
         assert index < len(self.option_value_list)
@@ -575,6 +614,13 @@
         else:
             return VirtualFormDataElement.get_property( self, property_name )
 
+    # Get a property type (via javascript)
+    def get_property_type( self, property_name ):
+        if property_name == "value":
+            return "string"
+        else:
+            return VirtualFormDataElement.get_property_type( self, property_name )
+
 # Class that describes a virtual form.  Each form has an identifier (the form name), plus form elements
 # that live in the form.
 class VirtualForm:
@@ -1489,6 +1535,12 @@
         assert isinstance( element_object, VirtualFormDataElement )
         self.element_object = element_object
 
+    def get_type( self, member_name ):
+        # We need to return the proper types of the javascript object
+        # properties
+        value = self.element_object.get_property_type( member_name )
+        return value
+        
     def get_value( self, member_name ):
         # The object itself knows what its javascript properties are, so call the right
         # method inside.  All properties are currently strings.
@@ -1502,6 +1554,9 @@
         # as strings, though.
         self.element_object.set_property( member_name, value )
 
+    def type_value( self ):
+        return "formelement"
+
     def bool_value( self ):
         # Return true because the object clearly exists
         return True

diff --git a/framework/crawler-ui/src/main/webapp/editjob.jsp b/framework/crawler-ui/src/main/webapp/editjob.jsp
index 77c5221..d934e75 100644
--- a/framework/crawler-ui/src/main/webapp/editjob.jsp
+++ b/framework/crawler-ui/src/main/webapp/editjob.jsp

@@ -87,7 +87,7 @@
 	String[] pipelineDescriptions = new String[0];
 	boolean[] pipelineIsOutputs = new boolean[0];
 	int[] pipelinePrerequisites = new int[0];
-	OutputSpecification[] pipelineSpecifications = new OutputSpecification[0];
+	Specification[] pipelineSpecifications = new Specification[0];
 	
 	ArrayList scheduleRecords = new ArrayList();
 
@@ -132,7 +132,7 @@
 		pipelineDescriptions = new String[job.countPipelineStages()];
 		pipelineIsOutputs = new boolean[job.countPipelineStages()];
 		pipelinePrerequisites = new int[job.countPipelineStages()];
-		pipelineSpecifications = new OutputSpecification[job.countPipelineStages()];
+		pipelineSpecifications = new Specification[job.countPipelineStages()];
 		for (int j = 0; j < job.countPipelineStages(); j++)
 		{
 			pipelineConnectionNames[j] = job.getPipelineStageConnectionName(j);

diff --git a/framework/crawler-ui/src/main/webapp/viewjob.jsp b/framework/crawler-ui/src/main/webapp/viewjob.jsp
index ca9b62a..116b2fc 100644
--- a/framework/crawler-ui/src/main/webapp/viewjob.jsp
+++ b/framework/crawler-ui/src/main/webapp/viewjob.jsp

@@ -719,7 +719,7 @@
 			<tr>
 				<td colspan="4">
 <%
-			OutputSpecification os = job.getPipelineStageSpecification(j);
+			Specification os = job.getPipelineStageSpecification(j);
 			if (job.getPipelineStageIsOutputConnection(j))
 			{
 				IOutputConnection thisConnection = outputManager.load(job.getPipelineStageConnectionName(j));

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
index e127a71..8db45f0 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/connectors/BaseRepositoryConnector.java

@@ -132,6 +132,50 @@
   * It is not a big problem if the connector chooses to create more seeds than are
   * strictly necessary; it is merely a question of overall work required.
   *
+  * The end time and seeding version string passed to this method may be interpreted for greatest efficiency.
+  * For continuous crawling jobs, this method will
+  * be called once, when the job starts, and at various periodic intervals as the job executes.
+  *
+  * When a job's specification is changed, the framework automatically resets the seeding version string to null.  The
+  * seeding version string may also be set to null on each job run, depending on the connector model returned by
+  * getConnectorModel().
+  *
+  * Note that it is always ok to send MORE documents rather than less to this method.
+  * The connector will be connected before this method can be called.
+  *@param activities is the interface this method should use to perform whatever framework actions are desired.
+  *@param spec is a document specification (that comes from the job).
+  *@param seedTime is the end of the time range of documents to consider, exclusive.
+  *@param lastSeedVersionString is the last seeding version string for this job, or null if the job has no previous seeding version string.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@return an updated seeding version string, to be stored with the job.
+  */
+  @Override
+  public String addSeedDocuments(ISeedingActivity activities, Specification spec,
+    String lastSeedVersion, long seedTime, int jobMode)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    long startTime;
+    if (lastSeedVersion == null)
+      startTime = 0L;
+    else
+    {
+      // Unpack seed time from seed version string
+      startTime = new Long(lastSeedVersion).longValue();
+    }
+    addSeedDocuments(activities,spec,startTime,seedTime,jobMode);
+    return new Long(seedTime).toString();
+  }
+
+  /** Queue "seed" documents.  Seed documents are the starting places for crawling activity.  Documents
+  * are seeded when this method calls appropriate methods in the passed in ISeedingActivity object.
+  *
+  * This method can choose to find repository changes that happen only during the specified time interval.
+  * The seeds recorded by this method will be viewed by the framework based on what the
+  * getConnectorModel() method returns.
+  *
+  * It is not a big problem if the connector chooses to create more seeds than are
+  * strictly necessary; it is merely a question of overall work required.
+  *
   * The times passed to this method may be interpreted for greatest efficiency.  The time ranges
   * any given job uses with this connector will not overlap, but will proceed starting at 0 and going
   * to the "current time", each time the job is run.  For continuous crawling jobs, this method will
@@ -148,7 +192,6 @@
   *@param endTime is the end of the time range to consider, exclusive.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   */
-  @Override
   public void addSeedDocuments(ISeedingActivity activities, Specification spec,
     long startTime, long endTime, int jobMode)
     throws ManifoldCFException, ServiceInterruption
@@ -281,6 +324,128 @@
     return null;
   }
 
+  /** Process a set of documents.
+  * This is the method that should cause each document to be fetched, processed, and the results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
+  * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param statuses are the currently-stored document versions for each document in the set of document identifiers
+  * passed in above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
+  */
+  @Override
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    // The backwards-compatible base implementation does the following:
+    // (1) Uses the deprecated methods to obtain a set of version information
+    // (2) Based on the provided version information, determines whether processing is required
+    // (3) Uses deprecated methods to process documents
+    // (4) Releases document versions
+
+    // We need to get the old version strings together in order to use the deprecated methods
+    String[] oldVersions = new String[documentIdentifiers.length];
+    for (int i = 0; i < oldVersions.length; i++)
+    {
+      oldVersions[i] = statuses.getIndexedVersionString(documentIdentifiers[i]);
+    }
+    DocumentVersions dv = new DocumentVersions();
+    getDocumentVersions(dv,documentIdentifiers,oldVersions,activities,spec,jobMode,usesDefaultAuthority);
+    try
+    {
+      // Next, we determine what part of the set of documents were unchanged, and what part we need to refetch.
+      Set<String> fetchDocuments = new HashSet<String>();
+      Set<String> scanDocuments = new HashSet<String>();
+      for (int i = 0; i < documentIdentifiers.length; i++)
+      {
+        String documentIdentifier = documentIdentifiers[i];
+        VersionContext vc = dv.getDocumentVersion(documentIdentifier);
+        if (vc != null)
+        {
+          if (dv.isAlwaysRefetch(documentIdentifier) || activities.checkDocumentNeedsReindexing(documentIdentifier,vc.getVersionString()))
+          {
+            // These documents need reprocessing
+            fetchDocuments.add(documentIdentifier);
+          }
+          else
+          {
+            // Document is unchanged.  We leave it up to the framework to decide what that means.
+          }
+          scanDocuments.add(documentIdentifier);
+        }
+        else
+        {
+          // These documents must go away permanently
+          // MHL to collect these and do them as a group
+          activities.deleteDocument(documentIdentifier);
+        }
+      }
+
+      // Construct the appropriate data to call processDocuments() with
+      String[] processIDs = new String[scanDocuments.size()];
+      boolean[] scanOnly = new boolean[scanDocuments.size()];
+      int index = 0;
+      for (int i = 0; i < documentIdentifiers.length; i++)
+      {
+        String documentIdentifier = documentIdentifiers[i];
+        if (scanDocuments.contains(documentIdentifier))
+        {
+          processIDs[index] = documentIdentifier;
+          scanOnly[index] = !fetchDocuments.contains(documentIdentifier);
+          index++;
+        }
+      }
+      processDocuments(processIDs,dv,activities,scanOnly,jobMode);
+      
+    }
+    finally
+    {
+      // Release document versions
+      releaseDocumentVersions(documentIdentifiers,dv);
+    }
+  }
+
+  /** Get document versions given an array of document identifiers.
+  * This method is called for EVERY document that is considered. It is therefore important to perform
+  * as little work as possible here.
+  * The connector will be connected before this method can be called.
+  *@param documentVersions is the versions object, to be filled in by this method.
+  *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
+  *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
+  *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
+  *   had an empty version string.
+  *@param activities is the interface this method should use to perform whatever framework actions are desired.
+  *@param spec is the current document specification for the current job.  If there is a dependency on this
+  * specification, then the version string should include the pertinent data, so that reingestion will occur
+  * when the specification changes.  This is primarily useful for metadata.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
+  */
+  public void getDocumentVersions(
+    DocumentVersions documentVersions,
+    String[] documentIdentifiers, String[] oldVersions,
+    IVersionActivity activities,
+    Specification spec, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    String[] rval = getDocumentVersions(documentIdentifiers,oldVersions,activities,
+      spec,jobMode,usesDefaultAuthority);
+    for (int i = 0; i < rval.length; i++)
+    {
+      if (rval[i] != null)
+      {
+        documentVersions.setDocumentVersion(documentIdentifiers[i],new VersionContext(rval[i],params,spec));
+        if (rval[i].length() == 0)
+          documentVersions.alwaysRefetch(documentIdentifiers[i]);
+      }
+    }
+  }
+
   /** Get document versions given an array of document identifiers.
   * This method is called for EVERY document that is considered. It is
   * therefore important to perform as little work as possible here.
@@ -298,7 +463,6 @@
   * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
   * will always be processed.
   */
-  @Override
   public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
     Specification spec, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption
@@ -405,10 +569,35 @@
   * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
   * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
   * processDocuments() for the documents in question.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers.
+  *@param versions is the corresponding set of version strings (individual identifiers may have no version).
+  */
+  public void releaseDocumentVersions(String[] documentIdentifiers, DocumentVersions versions)
+    throws ManifoldCFException
+  {
+    String[] versionStrings = new String[documentIdentifiers.length];
+    for (int i = 0; i < versionStrings.length; i++)
+    {
+      VersionContext vc = versions.getDocumentVersion(documentIdentifiers[i]);
+      boolean alwaysFetch = versions.isAlwaysRefetch(documentIdentifiers[i]);
+      if (alwaysFetch)
+        versionStrings[i] = "";
+      else if (vc == null)
+        versionStrings[i] = null;
+      else
+        versionStrings[i] = vc.getVersionString();
+    }
+    releaseDocumentVersions(documentIdentifiers,versionStrings);
+  }
+
+  /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
+  * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
+  * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
+  * processDocuments() for the documents in question.
   *@param documentIdentifiers is the set of document identifiers.
   *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
   */
-  @Override
   public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
     throws ManifoldCFException
   {
@@ -429,6 +618,41 @@
   * This is the method that should cause each document to be fetched, processed, and the results either added
   * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
   * The document specification allows this class to filter what is done based on the job.
+  * The connector will be connected before this method can be called.
+  *@param documentIdentifiers is the set of document identifiers to process.
+  *@param versions are the version strings returned by getDocumentVersions() above.
+  *@param activities is the interface this method should use to queue up new document references
+  * and ingest documents.
+  *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
+  * should only find other references, and should not actually call the ingestion methods.
+  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  */
+  public void processDocuments(String[] documentIdentifiers, DocumentVersions versions, IProcessActivity activities,
+    boolean[] scanOnly, int jobMode)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    Specification spec = null;
+    String[] versionStrings = new String[documentIdentifiers.length];
+    for (int i = 0; i < versionStrings.length; i++)
+    {
+      VersionContext vc = versions.getDocumentVersion(documentIdentifiers[i]);
+      if (vc != null)
+        spec = vc.getSpecification();
+      boolean alwaysFetch = versions.isAlwaysRefetch(documentIdentifiers[i]);
+      if (alwaysFetch)
+        versionStrings[i] = "";
+      else if (vc == null)
+        versionStrings[i] = null;
+      else
+        versionStrings[i] = vc.getVersionString();
+    }
+    processDocuments(documentIdentifiers,versionStrings,activities,spec,scanOnly,jobMode);
+  }
+
+  /** Process a set of documents.
+  * This is the method that should cause each document to be fetched, processed, and the results either added
+  * to the queue of documents for the current job, and/or entered into the incremental ingestion manager.
+  * The document specification allows this class to filter what is done based on the job.
   *@param documentIdentifiers is the set of document identifiers to process.
   *@param versions is the corresponding document versions to process, as returned by getDocumentVersions() above.
   *       The implementation may choose to ignore this parameter and always process the current version.
@@ -439,7 +663,6 @@
   * should only find other references, and should not actually call the ingestion methods.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
   */
-  @Override
   public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
     Specification spec, boolean[] scanOnly, int jobMode)
     throws ManifoldCFException, ServiceInterruption

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java
new file mode 100644
index 0000000..29d3279
--- /dev/null
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/DocumentVersions.java

@@ -0,0 +1,74 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This class represents a set of document versions, organized by document identifier.
+* It's part of the IRepositoryConnector API.
+*/
+public class DocumentVersions
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  protected final Map<String,VersionContext> documentVersions = new HashMap<String,VersionContext>();
+  protected final Set<String> alwaysRefetch = new HashSet<String>();
+  
+  /** Constructor */
+  public DocumentVersions()
+  {
+  }
+  
+  /** Set a non-special document version.
+  *@param documentIdentifier is the document identifier.
+  *@param documentVersion is the document version.
+  */
+  public void setDocumentVersion(String documentIdentifier, VersionContext documentVersion)
+  {
+    documentVersions.put(documentIdentifier,documentVersion);
+  }
+  
+  /** Signal to always refetch document.
+  *@param documentIdentifier is the document identifier.
+  */
+  public void alwaysRefetch(String documentIdentifier)
+  {
+    alwaysRefetch.add(documentIdentifier);
+  }
+  
+  /** Get the document version, if any.
+  *@param documentIdentifier is the document identifier.
+  *@return the document version, if any.  Null indicates that no such document was found.
+  */
+  public VersionContext getDocumentVersion(String documentIdentifier)
+  {
+    return documentVersions.get(documentIdentifier);
+  }
+  
+  /** Check whether we should always refetch a specified document.
+  *@param documentIdentifier is the document identifier.
+  *@return true if we are directed to always refetch.  False will be returned by default.
+  */
+  public boolean isAlwaysRefetch(String documentIdentifier)
+  {
+    return alwaysRefetch.contains(documentIdentifier);
+  }
+  
+}

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IExistingVersions.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IExistingVersions.java
new file mode 100644
index 0000000..af7196c
--- /dev/null
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IExistingVersions.java

@@ -0,0 +1,46 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.interfaces;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import java.util.*;
+
+/** This interface describes functionality designed to allow retrieval of existing
+* version information from previous crawls.  It is part of the IRepositoryConnector API.
+*/
+public interface IExistingVersions
+{
+  public static final String _rcsid = "@(#)$Id$";
+
+  /** Retrieve the primary existing version string given a document identifier.
+  *@param documentIdentifier is the document identifier.
+  *@return the document version string, or null if the document was never previously indexed.
+  */
+  public String getIndexedVersionString(String documentIdentifier)
+    throws ManifoldCFException;
+
+  /** Retrieve a component existing version string given a document identifier.
+  *@param documentIdentifier is the document identifier.
+  *@param componentIdentifier is the component identifier, if any.
+  *@return the document version string, or null of the document component was never previously indexed.
+  */
+  public String getIndexedVersionString(String documentIdentifier, String componentIdentifier)
+    throws ManifoldCFException;
+  
+}

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobDescription.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobDescription.java
index 88a4ee1..231f373 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobDescription.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobDescription.java

@@ -92,7 +92,7 @@
   *@param pipelineStageDescription is a description of the pipeline stage being added.
   *@return the empty output specification for this pipeline stage.
   */
-  public OutputSpecification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription);
+  public Specification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription);
   
   /** Get a count of pipeline connections.
   *@return the current number of pipeline connections.
@@ -127,7 +127,7 @@
   *@param index is the index of the pipeline stage whose specification is needed.
   *@return the specification for the connection.
   */
-  public OutputSpecification getPipelineStageSpecification(int index);
+  public Specification getPipelineStageSpecification(int index);
 
   /** Delete a pipeline stage.
   *@param index is the index of the pipeline stage to delete.
@@ -140,7 +140,7 @@
   *@param pipelineStageDescription is the description.
   *@return the newly-created output specification.
   */
-  public OutputSpecification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription);
+  public Specification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription);
   
   /** Set the job type.
   *@param type is the type (as an integer).

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
index b02a501..30cc854 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IJobManager.java

@@ -548,7 +548,15 @@
   */
   public void retryNotification(JobNotifyRecord jobNotifyRecord, long failTime, int failRetryCount)
     throws ManifoldCFException;
-  
+
+  /** Retry delete notification.
+  *@param jnr is the current job notification record.
+  *@param failTime is the new fail time (-1L if none).
+  *@param failCount is the new fail retry count (-1 if none).
+  */
+  public void retryDeleteNotification(JobNotifyRecord jnr, long failTime, int failCount)
+    throws ManifoldCFException;
+
   /** Add an initial set of documents to the queue.
   * This method is called during job startup, when the queue is being loaded.
   * A set of document references is passed to this method, which updates the status of the document
@@ -709,6 +717,18 @@
     String[] parentIdentifierHashes, int hopcountMethod)
     throws ManifoldCFException;
 
+  /** Undo the addition of child documents to the queue, for a set of documents.
+  * This method is called at the end of document processing, to back out any incomplete additions to the queue, and restore
+  * the status quo ante prior to the incomplete additions.  Call this method instead of finishDocuments() if the
+  * addition of documents was not completed.
+  *@param jobID is the job identifier.
+  *@param legalLinkTypes is the set of legal link types that this connector generates.
+  *@param parentIdentifierHashes are the hashes of the document identifiers for whom child link extraction just took place.
+  */
+  public void revertDocuments(Long jobID, String[] legalLinkTypes,
+    String[] parentIdentifierHashes)
+    throws ManifoldCFException;
+
   /** Retrieve specific parent data for a given document.
   *@param jobID is the job identifier.
   *@param docIDHash is the hash of the document identifier.
@@ -825,11 +845,11 @@
   public void resetSeedJob(Long jobID)
     throws ManifoldCFException;
 
-  /** Get the list of jobs that are ready for deletion.
+  /** Get the list of jobs that are ready for delete cleanup.
   *@param processID is the current process ID.
   *@return jobs that were in the "readyfordelete" state.
   */
-  public JobDeleteRecord[] getJobsReadyForDelete(String processID)
+  public JobDeleteRecord[] getJobsReadyForDeleteCleanup(String processID)
     throws ManifoldCFException;
     
   /** Get the list of jobs that are ready for startup.
@@ -846,12 +866,25 @@
   public JobNotifyRecord[] getJobsReadyForInactivity(String processID)
     throws ManifoldCFException;
 
+  /** Find the list of jobs that need to have their connectors notified of job deletion.
+  *@param processID is the process ID.
+  *@return the ID's of jobs that need their output connectors notified in order to be removed.
+  */
+  public JobNotifyRecord[] getJobsReadyForDelete(String processID)
+    throws ManifoldCFException;
+
   /** Inactivate a job, from the notification state.
   *@param jobID is the ID of the job to inactivate.
   */
   public void inactivateJob(Long jobID)
     throws ManifoldCFException;
 
+  /** Remove a job, from the notification state.
+  *@param jobID is the ID of the job to remove.
+  */
+  public void removeJob(Long jobID)
+    throws ManifoldCFException;
+
   /** Reset a job starting for delete back to "ready for delete"
   * state.
   *@param jobID is the job id.
@@ -866,6 +899,13 @@
   public void resetNotifyJob(Long jobID)
     throws ManifoldCFException;
 
+  /** Reset a job that is delete notifying back to "ready for delete notify"
+  * state.
+  *@param jobID is the job id.
+  */
+  public void resetDeleteNotifyJob(Long jobID)
+    throws ManifoldCFException;
+
   /** Reset a starting job back to "ready for startup" state.
   *@param jobID is the job id.
   */
@@ -905,15 +945,16 @@
   /** Note job started.
   *@param jobID is the job id.
   *@param startTime is the job start time.
+  *@param seedingVersion is the seeding version to record with the job start.
   */
-  public void noteJobStarted(Long jobID, long startTime)
+  public void noteJobStarted(Long jobID, long startTime, String seedingVersion)
     throws ManifoldCFException;
 
   /** Note job seeded.
   *@param jobID is the job id.
-  *@param startTime is the job seed time.
+  *@param seedingVersion is the seeding version string to record.
   */
-  public void noteJobSeeded(Long jobID, long startTime)
+  public void noteJobSeeded(Long jobID, String seedingVersion)
     throws ManifoldCFException;
 
   /**  Note the deregistration of a connector used by the specified connections.

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
index 0cb8913..b20a95e 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IProcessActivity.java

@@ -23,15 +23,54 @@
 import org.apache.manifoldcf.core.interfaces.*;
 import org.apache.manifoldcf.agents.interfaces.*;
 
-/** This interface abstracts from the activities that a fetched document processor can do.
+/** This interface abstracts from the activities that a connector's processDocuments() method can do.
+* The processing flow for a document is expected to go something like this:
+* (1) The connector's processDocuments() method is called with a set of documents to be processed.
+* (2) The connector computes a version string for each document in the set as part of determining
+*    whether the document indeed needs to be refetched.
+* (3) For each document processed, there can be one of several dispositions:
+*   (a) There is no such document (anymore): deleteDocument() called for the document.
+*   (b) The document is (re)indexed: ingestDocumentWithException() is called for the document.
+*   (c) The document is determined to be unchanged and no updates are needed: nothing needs to be called
+*     for the document.
+*   (d) The document is determined to be unchanged BUT the version string needs to be updated: recordDocument()
+*     is called for the document.
+*   (e) The document is determined to be unindexable BUT it still exists in the repository: noDocument()
+*    is called for the document.
+*   (f) There was a service interruption: ServiceInterruption is thrown.
+* (4) In order to determine whether a document needs to be reindexed, the method checkDocumentNeedsReindexing()
+*    is available to return an opinion on that matter.
 */
-public interface IProcessActivity extends IHistoryActivity, IEventActivity, IAbortActivity, IFingerprintActivity,
-    ICarrydownActivity
+public interface IProcessActivity extends IVersionActivity
 {
   public static final String _rcsid = "@(#)$Id: IProcessActivity.java 988245 2010-08-23 18:39:35Z kwright $";
 
+  /** Check if a document needs to be reindexed, based on a computed version string.
+  * Call this method to determine whether reindexing is necessary.  Pass in a newly-computed version
+  * string.  This method will return "true" if the document needs to be re-indexed.
+  *@param documentIdentifier is the document identifier.
+  *@param newVersionString is the newly-computed version string.
+  *@return true if the document needs to be reindexed.
+  */
+  public boolean checkDocumentNeedsReindexing(String documentIdentifier,
+    String newVersionString)
+    throws ManifoldCFException;
+
+  /** Check if a document needs to be reindexed, based on a computed version string.
+  * Call this method to determine whether reindexing is necessary.  Pass in a newly-computed version
+  * string.  This method will return "true" if the document needs to be re-indexed.
+  *@param documentIdentifier is the document identifier.
+  *@param componentIdentifier is the component document identifier, if any.
+  *@param newVersionString is the newly-computed version string.
+  *@return true if the document needs to be reindexed.
+  */
+  public boolean checkDocumentNeedsReindexing(String documentIdentifier,
+    String componentIdentifier,
+    String newVersionString)
+    throws ManifoldCFException;
+
   /** Add a document description to the current job's queue.
-  *@param localIdentifier is the local document identifier to add (for the connector that
+  *@param documentIdentifier is the local document identifier to add (for the connector that
   * fetched the document).
   *@param parentIdentifier is the document identifier that is considered to be the "parent"
   * of this identifier.  May be null, if no hopcount filtering desired for this kind of relationship.
@@ -45,12 +84,12 @@
   *@param originationTime is the time, in ms since epoch, that the document originated.  Pass null if none or unknown.
   *@param prereqEventNames are the names of the prerequisite events which this document requires prior to processing.  Pass null if none.
   */
-  public void addDocumentReference(String localIdentifier, String parentIdentifier, String relationshipType,
+  public void addDocumentReference(String documentIdentifier, String parentIdentifier, String relationshipType,
     String[] dataNames, Object[][] dataValues, Long originationTime, String[] prereqEventNames)
     throws ManifoldCFException;
 
   /** Add a document description to the current job's queue.
-  *@param localIdentifier is the local document identifier to add (for the connector that
+  *@param documentIdentifier is the document identifier to add (for the connector that
   * fetched the document).
   *@param parentIdentifier is the document identifier that is considered to be the "parent"
   * of this identifier.  May be null, if no hopcount filtering desired for this kind of relationship.
@@ -63,13 +102,12 @@
   *          The type of each object must either be a String, or a CharacterInput.
   *@param originationTime is the time, in ms since epoch, that the document originated.  Pass null if none or unknown.
   */
-  public void addDocumentReference(String localIdentifier, String parentIdentifier, String relationshipType,
+  public void addDocumentReference(String documentIdentifier, String parentIdentifier, String relationshipType,
     String[] dataNames, Object[][] dataValues, Long originationTime)
     throws ManifoldCFException;
 
-
   /** Add a document description to the current job's queue.
-  *@param localIdentifier is the local document identifier to add (for the connector that
+  *@param documentIdentifier is the document identifier to add (for the connector that
   * fetched the document).
   *@param parentIdentifier is the document identifier that is considered to be the "parent"
   * of this identifier.  May be null, if no hopcount filtering desired for this kind of relationship.
@@ -81,12 +119,12 @@
   *@param dataValues are the values that correspond to the data names in the dataNames parameter.  May be null only if dataNames is null.
   *          The type of each object must either be a String, or a CharacterInput.
   */
-  public void addDocumentReference(String localIdentifier, String parentIdentifier, String relationshipType,
+  public void addDocumentReference(String documentIdentifier, String parentIdentifier, String relationshipType,
     String[] dataNames, Object[][] dataValues)
     throws ManifoldCFException;
 
   /** Add a document description to the current job's queue.
-  *@param localIdentifier is the local document identifier to add (for the connector that
+  *@param documentIdentifier is the document identifier to add (for the connector that
   * fetched the document).
   *@param parentIdentifier is the document identifier that is considered to be the "parent"
   * of this identifier.  May be null, if no hopcount filtering desired for this kind of relationship.
@@ -94,27 +132,19 @@
   * reference.  This must be one of the strings returned by the IRepositoryConnector method
   * "getRelationshipTypes()".  May be null.
   */
-  public void addDocumentReference(String localIdentifier, String parentIdentifier, String relationshipType)
+  public void addDocumentReference(String documentIdentifier, String parentIdentifier, String relationshipType)
     throws ManifoldCFException;
 
   /** Add a document description to the current job's queue.  This method is equivalent to
   * addDocumentReference(localIdentifier,null,null).
-  *@param localIdentifier is the local document identifier to add (for the connector that
+  *@param documentIdentifier is the document identifier to add (for the connector that
   * fetched the document).
   */
-  public void addDocumentReference(String localIdentifier)
+  public void addDocumentReference(String documentIdentifier)
     throws ManifoldCFException;
 
-
-  /** Record a document version, but don't ingest it.
-  *@param localIdentifier is the document identifier.
-  *@param version is the document version.
-  */
-  public void recordDocument(String localIdentifier, String version)
-    throws ManifoldCFException, ServiceInterruption;
-
   /** Ingest the current document.
-  *@param localIdentifier is the document's local identifier.
+  *@param documentIdentifier is the document's identifier.
   *@param version is the version of the document, as reported by the getDocumentVersions() method of the
   *       corresponding repository connector.
   *@param documentURI is the URI to use to retrieve this document from the search interface (and is
@@ -122,11 +152,27 @@
   *@param data is the document data.  The data is closed after ingestion is complete.
   *@throws IOException only when data stream reading fails.
   */
-  public void ingestDocumentWithException(String localIdentifier, String version, String documentURI, RepositoryDocument data)
+  public void ingestDocumentWithException(String documentIdentifier,
+    String version, String documentURI, RepositoryDocument data)
     throws ManifoldCFException, ServiceInterruption, IOException;
 
   /** Ingest the current document.
-  *@param localIdentifier is the document's local identifier.
+  *@param documentIdentifier is the document's identifier.
+  *@param componentIdentifier is the component document identifier, if any.
+  *@param version is the version of the document, as reported by the getDocumentVersions() method of the
+  *       corresponding repository connector.
+  *@param documentURI is the URI to use to retrieve this document from the search interface (and is
+  *       also the unique key in the index).
+  *@param data is the document data.  The data is closed after ingestion is complete.
+  *@throws IOException only when data stream reading fails.
+  */
+  public void ingestDocumentWithException(String documentIdentifier,
+    String componentIdentifier,
+    String version, String documentURI, RepositoryDocument data)
+    throws ManifoldCFException, ServiceInterruption, IOException;
+
+  /** Ingest the current document.
+  *@param documentIdentifier is the document's identifier.
   *@param version is the version of the document, as reported by the getDocumentVersions() method of the
   *       corresponding repository connector.
   *@param documentURI is the URI to use to retrieve this document from the search interface (and is
@@ -136,48 +182,114 @@
   * according to standard best practices.
   */
   @Deprecated
-  public void ingestDocument(String localIdentifier, String version, String documentURI, RepositoryDocument data)
+  public void ingestDocument(String documentIdentifier, String version, String documentURI, RepositoryDocument data)
     throws ManifoldCFException, ServiceInterruption;
 
+  /** Remove the specified document from the search engine index, and update the
+  * recorded version information for the document.
+  *@param documentIdentifier is the document's local identifier.
+  *@param version is the version string to be recorded for the document.
+  */
+  public void noDocument(String documentIdentifier,
+    String version)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Remove the specified document from the search engine index, and update the
+  * recorded version information for the document.
+  *@param documentIdentifier is the document's local identifier.
+  *@param componentIdentifier is the component document identifier, if any.
+  *@param version is the version string to be recorded for the document.
+  */
+  public void noDocument(String documentIdentifier,
+    String componentIdentifier,
+    String version)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Remove the specified document primary component permanently from the search engine index,
+  * and from the status table.  Use this method when your document has components and
+  * now also has a primary document, but will not have a primary document again for the foreseeable
+  * future.  This is a rare situation.
+  *@param documentIdentifier is the document's identifier.
+  */
+  public void removeDocument(String documentIdentifier)
+    throws ManifoldCFException, ServiceInterruption;
+
+  /** Retain existing document component.  Use this method to signal that an already-existing
+  * document component does not need to be reindexed.  The default behavior is to remove
+  * components that are not mentioned during processing.
+  *@param documentIdentifier is the document's identifier.
+  *@param componentIdentifier is the component document identifier, which cannot be null.
+  */
+  public void retainDocument(String documentIdentifier,
+    String componentIdentifier)
+    throws ManifoldCFException;
+
+  /** Record a document version, WITHOUT reindexing it, or removing it.  (Other
+  * documents with the same URL, however, will still be removed.)  This is
+  * useful if the version string changes but the document contents are known not
+  * to have changed.
+  *@param documentIdentifier is the document identifier.
+  *@param version is the document version.
+  */
+  public void recordDocument(String documentIdentifier,
+    String version)
+    throws ManifoldCFException;
+
+  /** Record a document version, WITHOUT reindexing it, or removing it.  (Other
+  * documents with the same URL, however, will still be removed.)  This is
+  * useful if the version string changes but the document contents are known not
+  * to have changed.
+  *@param documentIdentifier is the document identifier.
+  *@param componentIdentifier is the component document identifier, if any.
+  *@param version is the document version.
+  */
+  public void recordDocument(String documentIdentifier,
+    String componentIdentifier,
+    String version)
+    throws ManifoldCFException;
+
+  /** Delete the specified document permanently from the search engine index, and from the status table,
+  * along with all its components.
+  * This method does NOT keep track of any document version information for the document and thus can
+  * lead to "churn", whereby the same document is queued, processed,
+  * and removed on subsequent crawls.  It is therefore preferable to use noDocument() instead,
+  * in any case where the same decision will need to be made over and over.
+  *@param documentIdentifier is the document's identifier.
+  */
+  public void deleteDocument(String documentIdentifier)
+    throws ManifoldCFException;
+
   /** Delete the current document from the search engine index, while keeping track of the version information
   * for it (to reduce churn).
-  *@param localIdentifier is the document's local identifier.
-  *@param version is the version of the document, as reported by the getDocumentVersions() method of the
-  *       corresponding repository connector.
+  * Deprecated; use noDocument() above instead.
+  *@param documentIdentifier is the document's local identifier.
+  *@param version is the version string to be recorded for the document.
   */
-  public void deleteDocument(String localIdentifier, String version)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Delete the current document from the search engine index.  This method does NOT keep track of version
-  * information for the document and thus can lead to "churn", whereby the same document is queued, versioned,
-  * and removed on subsequent crawls.  It therefore should be considered to be deprecated, in favor of
-  * deleteDocument(String localIdentifier, String version).
-  *@param localIdentifier is the document's local identifier.
-  */
-  public void deleteDocument(String localIdentifier)
+  @Deprecated
+  public void deleteDocument(String documentIdentifier, String version)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Override the schedule for the next time a document is crawled.
   * Calling this method allows you to set an upper recrawl bound, lower recrawl bound, upper expire bound, lower expire bound,
   * or a combination of these, on a specific document.  This method is only effective if the job is a continuous one, and if the
   * identifier you pass in is being processed.
-  *@param localIdentifier is the document's local identifier.
+  *@param documentIdentifier is the document's identifier.
   *@param lowerRecrawlBoundTime is the time in ms since epoch that the reschedule time should not fall BELOW, or null if none.
   *@param upperRecrawlBoundTime is the time in ms since epoch that the reschedule time should not rise ABOVE, or null if none.
   *@param lowerExpireBoundTime is the time in ms since epoch that the expire time should not fall BELOW, or null if none.
   *@param upperExpireBoundTime is the time in ms since epoch that the expire time should not rise ABOVE, or null if none.
   */
-  public void setDocumentScheduleBounds(String localIdentifier,
+  public void setDocumentScheduleBounds(String documentIdentifier,
     Long lowerRecrawlBoundTime, Long upperRecrawlBoundTime,
     Long lowerExpireBoundTime, Long upperExpireBoundTime)
     throws ManifoldCFException;
 
   /** Override a document's origination time.
   * Use this method to signal the framework that a document's origination time is something other than the first time it was crawled.
-  *@param localIdentifier is the document's local identifier.
+  *@param documentIdentifier is the document's identifier.
   *@param originationTime is the document's origination time, or null if unknown.
   */
-  public void setDocumentOriginationTime(String localIdentifier,
+  public void setDocumentOriginationTime(String documentIdentifier,
     Long originationTime)
     throws ManifoldCFException;
 

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java
index 04b5c32..ee31ee6 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/IRepositoryConnector.java

@@ -48,34 +48,37 @@
 * It therefore establishes a space of document identifiers.  Each connector will only ever be
 * asked to deal with identifiers that have in some way originated from the connector.
 *
-* Documents are fetched by ManifoldCF in three stages.  First, the addSeedDocuments() method is called in the connector
+* Documents are fetched by ManifoldCF in two stages.  First, the addSeedDocuments() method is called in the connector
 * implementation.  This method is meant to add a set of document identifiers to the queue.  When ManifoldCF is ready
-* to process a document, the document identifier is used to obtain a current document version string, using the
-* getDocumentVersions() method (the second stage).  This version string is used to decide whether or not the
-* third stage need be called for the document or not.  The third stage is responsible for sending document content
-* to the output, and for extracting any references to additional documents, and consists of the processDocuments() method.
+* to process a document, the document identifier is used to build a version string for the document and check whether
+* the document needs to be indexed, and index it if needed (the second stage).  The second stage
+* consists of the processDocuments() method.
 *
-* All of these methods interact with ManifoldCF by means of an "activity" interface.  For example, an IVersionActivity object
-* is passed to the getDocumentVersions() method, and that object contains methods that are necessary for getDocumentVersions()
-* to do its job.  A similar architecture is used throughout the connector framework.
+* All of these methods interact with ManifoldCF by means of an "activity" interface.
+*
+* A note on connector models:
+*
+* These values describe what the connector returns for the addSeedDocuments() method.  The framework
+* uses these to figure out how to most efficiently use the connector.  It is desirable to pick a model that
+* is the most restrictive that is still accurate.  For example, if MODEL_ADD_CHANGE_DELETE applies, you would
+* return that value rather than MODEL_ADD.
+*
+* For the CHAINED models, what the connector is describing are the documents that will be processed IF the seeded
+* documents are followed to their leaves.  For instance, imagine a hierarchy where the root document is the only one ever
+* seeded, but if that document is processed, and its discovered changed children are processed as well, then all documents
+* that have been added, changed, or deleted will eventually be discovered.  In that case, model
+* MODEL_CHAINED_ADD_CHANGE_DELETE would be appropriate.  But, if a changed node can only discover child
+* additions and changes, then MODEL_CHAINED_ADD_CHANGE would be the right choice.
+*	
+* A CHAINED model also requires cooperation on the part of the connector for processing.  Specifically,
+* a document may be unchanged but its references are expected to still be extracted in order for a CHAINED
+* model to do the right thing.  For non-CHAINED models, re-extraction of references if there are no reference changes
+* for a document is NOT required.
 */
 public interface IRepositoryConnector extends IConnector
 {
   public static final String _rcsid = "@(#)$Id: IRepositoryConnector.java 996524 2010-09-13 13:38:01Z kwright $";
 
-  // Connector models.
-  // These values describe what the connector returns for the getDocumentIdentifiers() method.  The framework
-  // uses these to figure out how to most efficiently use the connector.  It is desirable to pick a model that
-  // is the most restrictive that is still accurate.  For example, if MODEL_ADD_CHANGE_DELETE applies, you would
-  // return that value rather than MODEL_ADD.
-
-  // For the CHAINED models, what the connector is describing are the documents that will be processed IF the seeded
-  // documents are followed to their leaves.  For instance, imagine a hierarchy where the root document is the only one ever
-  // seeded, but if that document is processed, and its discovered changed children are processed as well, then all documents
-  // that have been added, changed, or deleted will eventually be discovered.  In that case, model
-  // MODEL_CHAINED_ADD_CHANGE_DELETE would be appropriate.  But, if a changed node can only discover child
-  // additions and changes, then MODEL_CHAINED_ADD_CHANGE would be the right choice.
-
   /** This is the legacy ManifoldCF catch-all crawling model.  All existing documents will be rechecked when a crawl
   * is done, every time.  This model was typically used for models where seeds were essentially fixed and all
   * real documents were discovered during crawling. */
@@ -161,47 +164,25 @@
   * It is not a big problem if the connector chooses to create more seeds than are
   * strictly necessary; it is merely a question of overall work required.
   *
-  * The times passed to this method may be interpreted for greatest efficiency.  The time ranges
-  * any given job uses with this connector will not overlap, but will proceed starting at 0 and going
-  * to the "current time", each time the job is run.  For continuous crawling jobs, this method will
+  * The end time and seeding version string passed to this method may be interpreted for greatest efficiency.
+  * For continuous crawling jobs, this method will
   * be called once, when the job starts, and at various periodic intervals as the job executes.
   *
-  * When a job's specification is changed, the framework automatically resets the seeding start time to 0.  The
-  * seeding start time may also be set to 0 on each job run, depending on the connector model returned by
+  * When a job's specification is changed, the framework automatically resets the seeding version string to null.  The
+  * seeding version string may also be set to null on each job run, depending on the connector model returned by
   * getConnectorModel().
   *
   * Note that it is always ok to send MORE documents rather than less to this method.
   * The connector will be connected before this method can be called.
   *@param activities is the interface this method should use to perform whatever framework actions are desired.
   *@param spec is a document specification (that comes from the job).
-  *@param startTime is the beginning of the time range to consider, inclusive.
-  *@param endTime is the end of the time range to consider, exclusive.
+  *@param seedTime is the end of the time range of documents to consider, exclusive.
+  *@param lastSeedVersionString is the last seeding version string for this job, or null if the job has no previous seeding version string.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@return an updated seeding version string, to be stored with the job.
   */
-  public void addSeedDocuments(ISeedingActivity activities, Specification spec,
-    long startTime, long endTime, int jobMode)
-    throws ManifoldCFException, ServiceInterruption;
-
-  /** Get document versions given an array of document identifiers.
-  * This method is called for EVERY document that is considered. It is therefore important to perform
-  * as little work as possible here.
-  * The connector will be connected before this method can be called.
-  *@param documentIdentifiers is the array of local document identifiers, as understood by this connector.
-  *@param oldVersions is the corresponding array of version strings that have been saved for the document identifiers.
-  *   A null value indicates that this is a first-time fetch, while an empty string indicates that the previous document
-  *   had an empty version string.
-  *@param activities is the interface this method should use to perform whatever framework actions are desired.
-  *@param spec is the current document specification for the current job.  If there is a dependency on this
-  * specification, then the version string should include the pertinent data, so that reingestion will occur
-  * when the specification changes.  This is primarily useful for metadata.
-  *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
-  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
-  *@return the corresponding version strings, with null in the places where the document no longer exists.
-  * Empty version strings indicate that there is no versioning ability for the corresponding document, and the document
-  * will always be processed.
-  */
-  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
-    Specification spec, int jobMode, boolean usesDefaultAuthority)
+  public String addSeedDocuments(ISeedingActivity activities, Specification spec,
+    String lastSeedVersion, long seedTime, int jobMode)
     throws ManifoldCFException, ServiceInterruption;
 
   /** Process a set of documents.
@@ -210,30 +191,17 @@
   * The document specification allows this class to filter what is done based on the job.
   * The connector will be connected before this method can be called.
   *@param documentIdentifiers is the set of document identifiers to process.
-  *@param versions is the corresponding document versions to process, as returned by getDocumentVersions() above.
-  *       The implementation may choose to ignore this parameter and always process the current version.
+  *@param statuses are the currently-stored document versions for each document in the set of document identifiers
+  * passed in above.
   *@param activities is the interface this method should use to queue up new document references
   * and ingest documents.
-  *@param spec is the document specification.
-  *@param scanOnly is an array corresponding to the document identifiers.  It is set to true to indicate when the processing
-  * should only find other references, and should not actually call the ingestion methods.
   *@param jobMode is an integer describing how the job is being run, whether continuous or once-only.
+  *@param usesDefaultAuthority will be true only if the authority in use for these documents is the default one.
   */
-  public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
-    Specification spec, boolean[] scanOnly, int jobMode)
+  public void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec,
+    IProcessActivity activities, int jobMode, boolean usesDefaultAuthority)
     throws ManifoldCFException, ServiceInterruption;
 
-  /** Free a set of documents.  This method is called for all documents whose versions have been fetched using
-  * the getDocumentVersions() method, including those that returned null versions.  It may be used to free resources
-  * committed during the getDocumentVersions() method.  It is guaranteed to be called AFTER any calls to
-  * processDocuments() for the documents in question.
-  * The connector will be connected before this method can be called.
-  *@param documentIdentifiers is the set of document identifiers.
-  *@param versions is the corresponding set of version identifiers (individual identifiers may be null).
-  */
-  public void releaseDocumentVersions(String[] documentIdentifiers, String[] versions)
-    throws ManifoldCFException;
-
   /** Get the maximum number of documents to amalgamate together into one batch, for this connector.
   * The connector does not need to be connected for this method to be called.
   *@return the maximum number. 0 indicates "unlimited".

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobSeedingRecord.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobSeedingRecord.java
index d5314ef..248f287 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobSeedingRecord.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobSeedingRecord.java

@@ -25,8 +25,8 @@
 {
   public static final String _rcsid = "@(#)$Id$";
 
-  /** The last synch time */
-  protected final long synchTime;
+  /** The last seeding version */
+  protected final String seedingVersionString;
   /** The fail time, or -1L if none */
   protected final long failTime;
   /** The fail count, or -1 if none */
@@ -34,20 +34,20 @@
 
   /** Constructor.
   */
-  public JobSeedingRecord(Long jobID, long synchTime, long failTime, int failRetryCount)
+  public JobSeedingRecord(Long jobID, String seedingVersionString, long failTime, int failRetryCount)
   {
     super(jobID);
-    this.synchTime = synchTime;
+    this.seedingVersionString = seedingVersionString;
     this.failTime = failTime;
     this.failRetryCount = failRetryCount;
   }
 
-  /** Get the synch time.
-  *@return the time.
+  /** Get the seeding version string.
+  *@return the string.
   */
-  public long getSynchTime()
+  public String getSeedingVersionString()
   {
-    return synchTime;
+    return seedingVersionString;
   }
 
   /** Get the hard fail time.

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobStartRecord.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobStartRecord.java
index cec0c26..eec5e26 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobStartRecord.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/JobStartRecord.java

@@ -25,8 +25,8 @@
 {
   public static final String _rcsid = "@(#)$Id: JobStartRecord.java 988245 2010-08-23 18:39:35Z kwright $";
 
-  /** The last synch time */
-  protected final long synchTime;
+  /** The last seeding version */
+  protected final String seedingVersionString;
   /** The requestMinimum flag */
   protected final boolean requestMinimum;
   /** The fail time, or -1L if none */
@@ -36,21 +36,21 @@
 
   /** Constructor.
   */
-  public JobStartRecord(Long jobID, long synchTime, boolean requestMinimum, long failTime, int failRetryCount)
+  public JobStartRecord(Long jobID, String seedingVersionString, boolean requestMinimum, long failTime, int failRetryCount)
   {
     super(jobID);
-    this.synchTime = synchTime;
+    this.seedingVersionString = seedingVersionString;
     this.requestMinimum = requestMinimum;
     this.failTime = failTime;
     this.failRetryCount = failRetryCount;
   }
 
-  /** Get the synch time.
-  *@return the time.
+  /** Get the seeding version string.
+  *@return the string.
   */
-  public long getSynchTime()
+  public String getSeedingVersionString()
   {
-    return synchTime;
+    return seedingVersionString;
   }
 
   /** Get the requestMinimum flag.

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Carrydown.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Carrydown.java
index 034a477..229a99a 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Carrydown.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Carrydown.java

@@ -457,57 +457,102 @@
       presentMap.put(vr,vr);
     }
   }
+  
+  /** Revert all records belonging to the specified parent documents to their original,
+  * pre-modified, state.
+  */
+  public void revertRecords(Long jobID, String[] parentDocumentIDHashes)
+    throws ManifoldCFException
+  {
+    int maxClause = getMaxInClause();
+    StringBuilder sb = new StringBuilder();
+    List<String> list = new ArrayList<String>();
+    int k = 0;
+    for (String parentDocumentIDHash : parentDocumentIDHashes)
+    {
+      if (k == maxClause)
+      {
+        performRevertRecords(sb.toString(),jobID,list);
+        sb.setLength(0);
+        list.clear();
+        k = 0;
+      }
+      if (k > 0)
+        sb.append(",");
+      sb.append("?");
+      list.add(parentDocumentIDHash);
+      k++;
+    }
+
+    if (k > 0)
+      performRevertRecords(sb.toString(),jobID,list);
+  }
+  
+  protected void performRevertRecords(String query, Long jobID, List<String> list)
+    throws ManifoldCFException
+  {
+    // Delete new records
+    StringBuilder sb = new StringBuilder("WHERE ");
+    ArrayList newList = new ArrayList();
+    
+    sb.append(buildConjunctionClause(newList,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new MultiClause(parentIDHashField,list)})).append(" AND ");
+      
+    sb.append(newField).append("=?");
+    newList.add(statusToString(ISNEW_NEW));
+    performDelete(sb.toString(),newList,null);
+
+    // Restore old values
+    sb = new StringBuilder("WHERE ");
+    newList.clear();
+
+    sb.append(buildConjunctionClause(newList,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new MultiClause(parentIDHashField,list)})).append(" AND ");
+
+    sb.append(newField).append("=?");
+    newList.add(statusToString(ISNEW_EXISTING));
+    
+    HashMap map = new HashMap();
+    map.put(newField,statusToString(ISNEW_BASE));
+    map.put(processIDField,null);
+    performUpdate(map,sb.toString(),newList,null);
+    
+    noteModifications(0,list.size(),0);
+  }
+
   /** Return all records belonging to the specified parent documents to the base state,
   * and delete the old (eliminated) child records.
   */
   public void restoreRecords(Long jobID, String[] parentDocumentIDHashes)
     throws ManifoldCFException
   {
-    beginTransaction();
-    try
+    int maxClause = getMaxInClause();
+    StringBuilder sb = new StringBuilder();
+    List<String> list = new ArrayList<String>();
+    int k = 0;
+    for (String parentDocumentIDHash : parentDocumentIDHashes)
     {
-      int maxClause = getMaxInClause();
-      StringBuilder sb = new StringBuilder();
-      ArrayList list = new ArrayList();
-      int i = 0;
-      int k = 0;
-      while (i < parentDocumentIDHashes.length)
+      if (k == maxClause)
       {
-        if (k == maxClause)
-        {
-          performRestoreRecords(sb.toString(),jobID,list);
-          sb.setLength(0);
-          list.clear();
-          k = 0;
-        }
-        if (k > 0)
-          sb.append(",");
-        sb.append("?");
-        String parentDocumentIDHash = parentDocumentIDHashes[i++];
-        list.add(parentDocumentIDHash);
-        k++;
-      }
-
-      if (k > 0)
         performRestoreRecords(sb.toString(),jobID,list);
+        sb.setLength(0);
+        list.clear();
+        k = 0;
+      }
+      if (k > 0)
+        sb.append(",");
+      sb.append("?");
+      list.add(parentDocumentIDHash);
+      k++;
     }
-    catch (ManifoldCFException e)
-    {
-      signalRollback();
-      throw e;
-    }
-    catch (Error e)
-    {
-      signalRollback();
-      throw e;
-    }
-    finally
-    {
-      endTransaction();
-    }
+
+    if (k > 0)
+      performRestoreRecords(sb.toString(),jobID,list);
   }
 
-  protected void performRestoreRecords(String query, Long jobID, ArrayList list)
+  protected void performRestoreRecords(String query, Long jobID, List<String> list)
     throws ManifoldCFException
   {
     // Delete
@@ -547,45 +592,23 @@
   public void deleteRecords(Long jobID, String[] documentIDHashes)
     throws ManifoldCFException
   {
-    beginTransaction();
-    try
+    int maxClause = maxClausePerformDeleteRecords(jobID);
+    List<String> list = new ArrayList<String>();
+    int k = 0;
+    for (String documentIDHash : documentIDHashes)
     {
-      int maxClause = maxClausePerformDeleteRecords(jobID);
-      ArrayList list = new ArrayList();
-      int i = 0;
-      int k = 0;
-      while (i < documentIDHashes.length)
+      if (k == maxClause)
       {
-        if (k == maxClause)
-        {
-          performDeleteRecords(jobID,list);
-          list.clear();
-          k = 0;
-        }
-        list.add(documentIDHashes[i++]);
-        k++;
-      }
-
-      if (k > 0)
         performDeleteRecords(jobID,list);
-
-
-    }
-    catch (ManifoldCFException e)
-    {
-      signalRollback();
-      throw e;
-    }
-    catch (Error e)
-    {
-      signalRollback();
-      throw e;
-    }
-    finally
-    {
-      endTransaction();
+        list.clear();
+        k = 0;
+      }
+      list.add(documentIDHash);
+      k++;
     }
 
+    if (k > 0)
+      performDeleteRecords(jobID,list);
   }
 
   protected int maxClausePerformDeleteRecords(Long jobID)
@@ -594,7 +617,7 @@
       new UnitaryClause(jobIDField,jobID)});
   }
     
-  protected void performDeleteRecords(Long jobID, ArrayList list)
+  protected void performDeleteRecords(Long jobID, List<String> list)
     throws ManifoldCFException
   {
     StringBuilder sb = new StringBuilder("WHERE ");

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
index 0682fb3..2016f4f 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java

@@ -370,14 +370,25 @@
     doFinish(jobID,legalLinkTypes,sourceDocumentHashes,hopcountMethod);
   }
 
+  /** Revert newly-added links, because of a possibly incomplete document processing phase.
+  * All child links marked as "new" will be removed, and all links marked as "existing" will be
+  * reset to be "base".
+  */
+  public void revertParents(Long jobID, String[] sourceDocumentHashes)
+    throws ManifoldCFException
+  {
+    intrinsicLinkManager.revertLinks(jobID,sourceDocumentHashes);
+  }
+  
   /** Do the work of recording source-target references. */
   protected boolean[] doRecord(Long jobID, String[] legalLinkTypes, String sourceDocumentIDHash, String[] targetDocumentIDHashes, String linkType,
     int hopcountMethod, String processID)
     throws ManifoldCFException
   {
-
-    // We have to both add the reference, AND invalidate appropriate cached hopcounts (if it is a NEW
-    // link.)
+    // NOTE: In order for the revertParents() call above to be correct in its current form,
+    // this method would need to be revised to not process any additions until the finishParents() call
+    // is made.  At the moment, revertParents() is not used by any thread.
+    // TBD, MHL
     boolean[] rval = new boolean[targetDocumentIDHashes.length];
     for (int i = 0; i < rval.length; i++)
     {

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/IntrinsicLink.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/IntrinsicLink.java
index 331db8b..b1e2a88 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/IntrinsicLink.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/IntrinsicLink.java

@@ -232,19 +232,17 @@
     String[] targetDocumentIDHashes, String linkType, String processID)
     throws ManifoldCFException
   {
-    HashMap duplicateRemoval = new HashMap();
+    Set<String> duplicateRemoval = new HashSet<String>();
     int maxClause = maxClausePerformExistsCheck(jobID,linkType,sourceDocumentIDHash);
-    ArrayList list = new ArrayList();
+    List<String> list = new ArrayList<String>();
     int i = 0;
-    int k = 0;
     // Keep track of the document identifiers that have been seen vs. those that were unseen.
-    HashMap presentMap = new HashMap();
-    while (k < targetDocumentIDHashes.length)
+    Set<String> presentMap = new HashSet<String>();
+    for (String targetDocumentIDHash : targetDocumentIDHashes)
     {
-      String targetDocumentIDHash = targetDocumentIDHashes[k++];
-      if (duplicateRemoval.get(targetDocumentIDHash) != null)
+      if (duplicateRemoval.contains(targetDocumentIDHash))
         continue;
-      duplicateRemoval.put(targetDocumentIDHash,targetDocumentIDHash);
+      duplicateRemoval.add(targetDocumentIDHash);
       if (i == maxClause)
       {
         // Do the query and record the results
@@ -262,22 +260,22 @@
     // an update.
     // We have to count these by hand, in case there are duplicates in the array.
     int count = 0;
-    Iterator iter = duplicateRemoval.keySet().iterator();
+    Iterator<String> iter = duplicateRemoval.iterator();
     while (iter.hasNext())
     {
-      String targetDocumentIDHash = (String)iter.next();
-      if (presentMap.get(targetDocumentIDHash) == null)
+      String targetDocumentIDHash = iter.next();
+      if (!presentMap.contains(targetDocumentIDHash))
         count++;
     }
     String[] newReferences = new String[count];
     int j = 0;
     // Note: May be able to make this more efficient if we update things in batches...
-    iter = duplicateRemoval.keySet().iterator();
+    iter = duplicateRemoval.iterator();
     while (iter.hasNext())
     {
-      String targetDocumentIDHash = (String)iter.next();
+      String targetDocumentIDHash = iter.next();
 
-      if (presentMap.get(targetDocumentIDHash) == null)
+      if (!presentMap.contains(targetDocumentIDHash))
       {
         newReferences[j++] = targetDocumentIDHash;
         HashMap map = new HashMap();
@@ -319,7 +317,7 @@
   }
     
   /** Do the exists check, in batch. */
-  protected void performExistsCheck(Map presentMap, Long jobID, String linkType, String childIDHash, ArrayList list)
+  protected void performExistsCheck(Set<String> presentMap, Long jobID, String linkType, String childIDHash, List<String> list)
     throws ManifoldCFException
   {
     ArrayList newList = new ArrayList();
@@ -330,12 +328,11 @@
       new UnitaryClause(childIDHashField,childIDHash)});
 
     IResultSet result = performQuery("SELECT "+parentIDHashField+" FROM "+getTableName()+" WHERE "+query+" FOR UPDATE",newList,null,null);
-    int i = 0;
-    while (i < result.getRowCount())
+    for (int i = 0; i < result.getRowCount(); i++)
     {
-      IResultRow row = result.getRow(i++);
+      IResultRow row = result.getRow(i);
       String parentIDHash = (String)row.getValue(parentIDHashField);
-      presentMap.put(parentIDHash,parentIDHash);
+      presentMap.add(parentIDHash);
     }
   }
 
@@ -375,10 +372,9 @@
     throws ManifoldCFException
   {
     int maxClause = maxClausePerformRemoveDocumentLinks(jobID);
-    ArrayList list = new ArrayList();
-    int i = 0;
+    List<String> list = new ArrayList<String>();
     int k = 0;
-    while (i < documentIDHashes.length)
+    for (String documentIDHash : documentIDHashes)
     {
       if (k == maxClause)
       {
@@ -386,7 +382,7 @@
         list.clear();
         k = 0;
       }
-      list.add(documentIDHashes[i++]);
+      list.add(documentIDHash);
       k++;
     }
 
@@ -401,7 +397,7 @@
       new UnitaryClause(jobIDField,jobID)});
   }
     
-  protected void performRemoveDocumentLinks(ArrayList list, Long jobID)
+  protected void performRemoveDocumentLinks(List<String> list, Long jobID)
     throws ManifoldCFException
   {
     StringBuilder sb = new StringBuilder("WHERE ");
@@ -424,10 +420,9 @@
     throws ManifoldCFException
   {
     int maxClause = maxClausePerformRemoveLinks(jobID);
-    ArrayList list = new ArrayList();
-    int i = 0;
+    List<String> list = new ArrayList<String>();
     int k = 0;
-    while (i < sourceDocumentIDHashes.length)
+    for (String sourceDocumentIDHash : sourceDocumentIDHashes)
     {
       if (k == maxClause)
       {
@@ -435,7 +430,7 @@
         list.clear();
         k = 0;
       }
-      list.add(sourceDocumentIDHashes[i++]);
+      list.add(sourceDocumentIDHash);
       k++;
     }
 
@@ -450,7 +445,7 @@
       new UnitaryClause(jobIDField,jobID)});
   }
     
-  protected void performRemoveLinks(ArrayList list, Long jobID, String commonNewExpression,
+  protected void performRemoveLinks(List<String> list, Long jobID, String commonNewExpression,
     ArrayList commonNewParams)
     throws ManifoldCFException
   {
@@ -474,10 +469,9 @@
     throws ManifoldCFException
   {
     int maxClause = maxClausesPerformRestoreLinks(jobID);
-    ArrayList list = new ArrayList();
-    int i = 0;
+    List<String> list = new ArrayList<String>();
     int k = 0;
-    while (i < sourceDocumentIDHashes.length)
+    for (String sourceDocumentIDHash : sourceDocumentIDHashes)
     {
       if (k == maxClause)
       {
@@ -485,7 +479,7 @@
         list.clear();
         k = 0;
       }
-      list.add(sourceDocumentIDHashes[i++]);
+      list.add(sourceDocumentIDHash);
       k++;
     }
 
@@ -500,7 +494,7 @@
       new UnitaryClause(jobIDField,jobID)});
   }
   
-  protected void performRestoreLinks(Long jobID, ArrayList list)
+  protected void performRestoreLinks(Long jobID, List<String> list)
     throws ManifoldCFException
   {
     HashMap map = new HashMap();
@@ -519,6 +513,67 @@
     performUpdate(map,sb.toString(),newList,null);
   }
 
+  /** Throw away links added during (aborted) processing.
+  */
+  public void revertLinks(Long jobID, String[] sourceDocumentIDHashes)
+    throws ManifoldCFException
+  {
+    int maxClause = maxClausesPerformRevertLinks(jobID);
+    List<String> list = new ArrayList<String>();
+    int k = 0;
+    for (String sourceDocumentIDHash : sourceDocumentIDHashes)
+    {
+      if (k == maxClause)
+      {
+        performRevertLinks(jobID,list);
+        list.clear();
+        k = 0;
+      }
+      list.add(sourceDocumentIDHash);
+      k++;
+    }
+
+    if (k > 0)
+      performRevertLinks(jobID,list);
+    noteModifications(0,sourceDocumentIDHashes.length,0);
+  }
+
+  protected int maxClausesPerformRevertLinks(Long jobID)
+  {
+    return findConjunctionClauseMax(new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID)});
+  }
+  
+  protected void performRevertLinks(Long jobID, List<String> list)
+    throws ManifoldCFException
+  {
+    // First, delete everything marked as "new"
+    StringBuilder sb = new StringBuilder("WHERE ");
+    ArrayList newList = new ArrayList();
+
+    sb.append(buildConjunctionClause(newList,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new MultiClause(childIDHashField,list)})).append(" AND ")
+      .append(newField).append("=?");
+    newList.add(statusToString(LINKSTATUS_NEW));
+    performDelete(sb.toString(),newList,null);
+
+    // Now map everything marked as "EXISTING" back to "BASE".
+    HashMap map = new HashMap();
+    map.put(newField,statusToString(LINKSTATUS_BASE));
+    map.put(processIDField,null);
+    
+    sb = new StringBuilder();
+    newList.clear();
+    
+    sb.append(buildConjunctionClause(newList,new ClauseDescription[]{
+      new UnitaryClause(jobIDField,jobID),
+      new MultiClause(childIDHashField,list)})).append(" AND ")
+      .append(newField).append("=?");
+    newList.add(statusToString(LINKSTATUS_EXISTING));
+    performUpdate(map,sb.toString(),newList,null);
+  }
+
   /** Get document's children.
   *@return rows that contain the children.  Column names are 'linktype','childidentifier'.
   */
@@ -547,11 +602,10 @@
     IResultSet set = performQuery("SELECT DISTINCT "+parentIDHashField+" FROM "+
       getTableName()+" WHERE "+query,list,null,null);
     String[] rval = new String[set.getRowCount()];
-    int i = 0;
-    while (i < rval.length)
+    for (int i = 0; i < rval.length; i++)
     {
       IResultRow row = set.getRow(i);
-      rval[i++] = (String)row.getValue(parentIDHashField);
+      rval[i] = (String)row.getValue(parentIDHashField);
     }
     return rval;
   }

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobDescription.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobDescription.java
index 2cfa6ab..3207b36 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobDescription.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobDescription.java

@@ -101,7 +101,7 @@
         pipelineStage.getIsOutput(),
         pipelineStage.getConnectionName(),
         pipelineStage.getDescription(),
-        pipelineStage.getSpecification().duplicate(readOnly)));
+        ((OutputSpecification)pipelineStage.getSpecification()).duplicate(readOnly)));
     }
     rval.description = description;
     rval.type = type;
@@ -244,7 +244,7 @@
   *@return the empty output specification for this pipeline stage.
   */
   @Override
-  public OutputSpecification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
+  public Specification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
   {
     if (readOnly)
       throw new IllegalStateException("Attempt to change read-only object");
@@ -267,7 +267,7 @@
   *@return the newly-created output specification.
   */
   @Override
-  public OutputSpecification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
+  public Specification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
   {
     if (readOnly)
       throw new IllegalStateException("Attempt to change read-only object");
@@ -333,7 +333,7 @@
   *@return the specification for the connection.
   */
   @Override
-  public OutputSpecification getPipelineStageSpecification(int index)
+  public Specification getPipelineStageSpecification(int index)
   {
     return pipelineStages.get(index).getSpecification();
   }
@@ -652,7 +652,7 @@
     protected final boolean isOutput;
     protected final String connectionName;
     protected final String description;
-    protected final OutputSpecification specification;
+    protected final Specification specification;
     
     public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description)
     {
@@ -663,7 +663,7 @@
       this.specification = new OutputSpecification();
     }
 
-    public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description, OutputSpecification spec)
+    public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description, Specification spec)
     {
       this.prerequisiteStage = prerequisiteStage;
       this.isOutput = isOutput;
@@ -696,7 +696,7 @@
         prerequisiteStage = prerequisite;
     }
     
-    public OutputSpecification getSpecification()
+    public Specification getSpecification()
     {
       return specification;
     }

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
index 10aa855..1b29b3f 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java

@@ -3071,16 +3071,14 @@
     // order the "select for update" operations appropriately.
     //
 
-    HashMap indexMap = new HashMap();
+    Map<String,Integer> indexMap = new HashMap<String,Integer>();
     String[] docIDHashes = new String[documentDescriptions.length];
 
-    int i = 0;
-    while (i < documentDescriptions.length)
+    for (int i = 0; i < documentDescriptions.length; i++)
     {
       String documentIDHash = documentDescriptions[i].getDocumentIdentifierHash() + ":" + documentDescriptions[i].getJobID();
       docIDHashes[i] = documentIDHash;
       indexMap.put(documentIDHash,new Integer(i));
-      i++;
     }
 
     java.util.Arrays.sort(docIDHashes);
@@ -3095,13 +3093,10 @@
       try
       {
         // Do one row at a time, to avoid deadlocking things
-        i = 0;
-        while (i < docIDHashes.length)
+        for (String docIDHash : docIDHashes)
         {
-          String docIDHash = docIDHashes[i];
-
           // Get the DocumentDescription object
-          DocumentDescription dd = documentDescriptions[((Integer)indexMap.get(docIDHash)).intValue()];
+          DocumentDescription dd = documentDescriptions[indexMap.get(docIDHash).intValue()];
 
           // Query for the status
           ArrayList list = new ArrayList();
@@ -3119,7 +3114,6 @@
             // Update the jobqueue table
             jobQueue.updateCompletedRecord(dd.getID(),status);
           }
-          i++;
         }
         TrackerClass.notePrecommit();
         database.performCommit();
@@ -3146,6 +3140,12 @@
         TrackerClass.noteRollback();
         throw e;
       }
+      catch (RuntimeException e)
+      {
+        database.signalRollback();
+        TrackerClass.noteRollback();
+        throw e;
+      }
       finally
       {
         database.endTransaction();
@@ -4299,7 +4299,68 @@
     }
 
   }
-  
+
+  /** Retry delete notification.
+  *@param jnr is the current job notification record.
+  *@param failTime is the new fail time (-1L if none).
+  *@param failCount is the new fail retry count (-1 if none).
+  */
+  @Override
+  public void retryDeleteNotification(JobNotifyRecord jnr, long failTime, int failCount)
+    throws ManifoldCFException
+  {
+    Long jobID = jnr.getJobID();
+    long oldFailTime = jnr.getFailTime();
+    if (oldFailTime == -1L)
+      oldFailTime = failTime;
+    failTime = oldFailTime;
+    int oldFailCount = jnr.getFailRetryCount();
+    if (oldFailCount == -1)
+      oldFailCount = failCount;
+    else
+    {
+      oldFailCount--;
+      if (failCount != -1 && oldFailCount > failCount)
+        oldFailCount = failCount;
+    }
+    failCount = oldFailCount;
+
+    while (true)
+    {
+      long sleepAmt = 0L;
+      database.beginTransaction();
+      try
+      {
+        jobs.retryDeleteNotification(jobID,failTime,failCount);
+        database.performCommit();
+        break;
+      }
+      catch (Error e)
+      {
+        database.signalRollback();
+        throw e;
+      }
+      catch (ManifoldCFException e)
+      {
+        database.signalRollback();
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          if (Logging.perf.isDebugEnabled())
+            Logging.perf.debug("Aborted transaction resetting job notification: "+e.getMessage());
+          sleepAmt = getRandomAmount();
+          continue;
+        }
+        throw e;
+      }
+      finally
+      {
+        database.endTransaction();
+        sleepFor(sleepAmt);
+      }
+    }
+
+  }
+
   // Add documents methods
   
   /** Add an initial set of documents to the queue.
@@ -4976,7 +5037,7 @@
           " docs and hopcounts for job "+jobID.toString()+" parent identifier hash "+parentIdentifierHash);
 
         // Go through document id's one at a time, in order - mainly to prevent deadlock as much as possible.  Search for any existing row in jobqueue first (for update)
-        HashMap existingRows = new HashMap();
+        Map<String,JobqueueRecord> existingRows = new HashMap<String,JobqueueRecord>();
 
         for (int z = 0; z < reorderedDocIDHashes.length; z++)
         {
@@ -5030,7 +5091,7 @@
         for (int z = 0; z < reorderedDocIDHashes.length; z++)
         {
           String docIDHash = reorderedDocIDHashes[z];
-          JobqueueRecord jr = (JobqueueRecord)existingRows.get(docIDHash);
+          JobqueueRecord jr = existingRows.get(docIDHash);
           if (jr != null)
           {
             // It was an existing row; do the update logic
@@ -5127,6 +5188,85 @@
       new Object[][][]{dataValues},currentTime,new IPriorityCalculator[]{priority},new String[][]{prereqEventNames});
   }
 
+  /** Undo the addition of child documents to the queue, for a set of documents.
+  * This method is called at the end of document processing, to back out any incomplete additions to the queue, and restore
+  * the status quo ante prior to the incomplete additions.  Call this method instead of finishDocuments() if the
+  * addition of documents was not completed.
+  *@param jobID is the job identifier.
+  *@param legalLinkTypes is the set of legal link types that this connector generates.
+  *@param parentIdentifierHashes are the hashes of the document identifiers for whom child link extraction just took place.
+  */
+  @Override
+  public void revertDocuments(Long jobID, String[] legalLinkTypes,
+    String[] parentIdentifierHashes)
+    throws ManifoldCFException
+  {
+    if (parentIdentifierHashes.length == 0)
+      return;
+    
+    if (legalLinkTypes.length == 0)
+    {
+      while (true)
+      {
+        long sleepAmt = 0L;
+        database.beginTransaction(database.TRANSACTION_SERIALIZED);
+        try
+        {
+          // Revert carrydown records
+          carryDown.revertRecords(jobID,parentIdentifierHashes);
+          database.performCommit();
+          break;
+        }
+        catch (Error e)
+        {
+          database.signalRollback();
+          throw e;
+        }
+        catch (RuntimeException e)
+        {
+          database.signalRollback();
+          throw e;
+        }
+        finally
+        {
+          database.endTransaction();
+          sleepFor(sleepAmt);
+        }
+      }
+    }
+    else
+    {
+      // Revert both hopcount and carrydown
+      while (true)
+      {
+        long sleepAmt = 0L;
+        database.beginTransaction(database.TRANSACTION_SERIALIZED);
+        try
+        {
+          carryDown.revertRecords(jobID,parentIdentifierHashes);
+          hopCount.revertParents(jobID,parentIdentifierHashes);
+          database.performCommit();
+          break;
+        }
+        catch (Error e)
+        {
+          database.signalRollback();
+          throw e;
+        }
+        catch (RuntimeException e)
+        {
+          database.signalRollback();
+          throw e;
+        }
+        finally
+        {
+          database.endTransaction();
+          sleepFor(sleepAmt);
+        }
+      }
+    }
+  }
+
   /** Complete adding child documents to the queue, for a set of documents.
   * This method is called at the end of document processing, to help the hopcount tracking engine do its bookkeeping.
   *@param jobID is the job identifier.
@@ -5179,6 +5319,11 @@
           database.signalRollback();
           throw e;
         }
+        catch (RuntimeException e)
+        {
+          database.signalRollback();
+          throw e;
+        }
         finally
         {
           database.endTransaction();
@@ -5238,6 +5383,11 @@
           database.signalRollback();
           throw e;
         }
+        catch (RuntimeException e)
+        {
+          database.signalRollback();
+          throw e;
+        }
         finally
         {
           database.endTransaction();
@@ -6245,23 +6395,23 @@
   *@param startTime is the job start time.
   */
   @Override
-  public void noteJobStarted(Long jobID, long startTime)
+  public void noteJobStarted(Long jobID, long startTime, String seedingVersion)
     throws ManifoldCFException
   {
-    jobs.noteJobStarted(jobID,startTime);
+    jobs.noteJobStarted(jobID,startTime,seedingVersion);
     if (Logging.jobs.isDebugEnabled())
       Logging.jobs.debug("Job "+jobID+" is now started");
   }
 
   /** Note job seeded.
   *@param jobID is the job id.
-  *@param seedTime is the job seed time.
+  *@param seedingVersion is the job seeding version string to record.
   */
   @Override
-  public void noteJobSeeded(Long jobID, long seedTime)
+  public void noteJobSeeded(Long jobID, String seedingVersion)
     throws ManifoldCFException
   {
-    jobs.noteJobSeeded(jobID,seedTime);
+    jobs.noteJobSeeded(jobID,seedingVersion);
     if (Logging.jobs.isDebugEnabled())
       Logging.jobs.debug("Job "+jobID+" has been successfully reseeded");
   }
@@ -6780,7 +6930,7 @@
         ArrayList list = new ArrayList();
         
         sb.append(jobs.idField).append(",")
-          .append(jobs.lastCheckTimeField).append(",")
+          .append(jobs.seedingVersionField).append(",")
           .append(jobs.failTimeField).append(",")
           .append(jobs.failCountField).append(",")
           .append(jobs.reseedIntervalField)
@@ -6802,10 +6952,7 @@
         {
           IResultRow row = set.getRow(i);
           Long jobID = (Long)row.getValue(jobs.idField);
-          Long x = (Long)row.getValue(jobs.lastCheckTimeField);
-          long synchTime = 0;
-          if (x != null)
-            synchTime = x.longValue();
+          String seedingVersionString = (String)row.getValue(jobs.seedingVersionField);
 
           Long r = (Long)row.getValue(jobs.reseedIntervalField);
           Long reseedTime;
@@ -6834,8 +6981,7 @@
           {
             Logging.jobs.debug("Marked job "+jobID+" for seeding");
           }
-
-          rval[i] = new JobSeedingRecord(jobID,synchTime,failTime,failRetryCount);
+          rval[i] = new JobSeedingRecord(jobID,seedingVersionString,failTime,failRetryCount);
           i++;
         }
         database.performCommit();
@@ -6866,12 +7012,12 @@
     }
   }
 
-  /** Get the list of jobs that are ready for deletion.
+  /** Get the list of jobs that are ready for delete cleanup.
   *@param processID is the current process ID.
   *@return jobs that were in the "readyfordelete" state.
   */
   @Override
-  public JobDeleteRecord[] getJobsReadyForDelete(String processID)
+  public JobDeleteRecord[] getJobsReadyForDeleteCleanup(String processID)
     throws ManifoldCFException
   {
     while (true)
@@ -6957,7 +7103,7 @@
         sb.append(jobs.idField).append(",")
           .append(jobs.failTimeField).append(",")
           .append(jobs.failCountField).append(",")
-          .append(jobs.lastCheckTimeField).append(",")
+          .append(jobs.seedingVersionField).append(",")
           .append(jobs.statusField)
           .append(" FROM ").append(jobs.getTableName()).append(" WHERE ")
           .append(database.buildConjunctionClause(list,new ClauseDescription[]{
@@ -6974,7 +7120,7 @@
         {
           IResultRow row = set.getRow(i);
           Long jobID = (Long)row.getValue(jobs.idField);
-          Long x = (Long)row.getValue(jobs.lastCheckTimeField);
+          String seedingVersionString = (String)row.getValue(jobs.seedingVersionField);
           int status = jobs.stringToStatus((String)row.getValue(jobs.statusField));
           Long failTimeLong = (Long)row.getValue(jobs.failTimeField);
           Long failRetryCountLong = (Long)row.getValue(jobs.failCountField);
@@ -6991,10 +7137,6 @@
 
           boolean requestMinimum = (status == jobs.STATUS_READYFORSTARTUPMINIMAL);
           
-          long synchTime = 0;
-          if (x != null)
-            synchTime = x.longValue();
-
           // Mark status of job as "starting"
           jobs.writeTransientStatus(jobID,requestMinimum?jobs.STATUS_STARTINGUPMINIMAL:jobs.STATUS_STARTINGUP,processID);
           if (Logging.jobs.isDebugEnabled())
@@ -7002,7 +7144,7 @@
             Logging.jobs.debug("Marked job "+jobID+" for startup");
           }
 
-          rval[i] = new JobStartRecord(jobID,synchTime,requestMinimum,failTime,failRetryCount);
+          rval[i] = new JobStartRecord(jobID,seedingVersionString,requestMinimum,failTime,failRetryCount);
           i++;
         }
         database.performCommit();
@@ -7099,6 +7241,84 @@
     }
   }
 
+  /** Remove a job, from the notification state.
+  *@param jobID is the ID of the job to remove.
+  */
+  @Override
+  public void removeJob(Long jobID)
+    throws ManifoldCFException
+  {
+    // While there is no flow that can cause a job to be in the wrong state when this gets called, as a precaution
+    // it might be a good idea to put this in a transaction and have the state get checked first.
+    while (true)
+    {
+      long sleepAmt = 0L;
+      database.beginTransaction();
+      try
+      {
+        // Check job status
+        StringBuilder sb = new StringBuilder("SELECT ");
+        ArrayList list = new ArrayList();
+        
+        sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ")
+          .append(database.buildConjunctionClause(list,new ClauseDescription[]{
+            new UnitaryClause(jobs.idField,jobID)}))
+          .append(" FOR UPDATE");
+            
+        IResultSet set = database.performQuery(sb.toString(),list,null,null);
+        if (set.getRowCount() == 0)
+          // Presume already removed!
+          return;
+        IResultRow row = set.getRow(0);
+        int status = jobs.stringToStatus((String)row.getValue(jobs.statusField));
+
+        switch (status)
+        {
+        case Jobs.STATUS_NOTIFYINGOFDELETION:
+          ManifoldCF.noteConfigurationChange();
+          // Remove documents from job queue
+          jobQueue.deleteAllJobRecords(jobID);
+          // Remove carrydowns for the job
+          carryDown.deleteOwner(jobID);
+          // Nothing is in a critical section - so this should be OK.
+          hopCount.deleteOwner(jobID);
+          jobs.delete(jobID);
+          if (Logging.jobs.isDebugEnabled())
+          {
+            Logging.jobs.debug("Removed job "+jobID);
+          }
+          break;
+        default:
+          throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status));
+        }
+        database.performCommit();
+        return;
+      }
+      catch (ManifoldCFException e)
+      {
+        database.signalRollback();
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          if (Logging.perf.isDebugEnabled())
+            Logging.perf.debug("Aborted clearing delete notification state for job: "+e.getMessage());
+          sleepAmt = getRandomAmount();
+          continue;
+        }
+        throw e;
+      }
+      catch (Error e)
+      {
+        database.signalRollback();
+        throw e;
+      }
+      finally
+      {
+        database.endTransaction();
+        sleepFor(sleepAmt);
+      }
+    }
+  }
+
   /** Reset a job starting for delete back to "ready for delete"
   * state.
   *@param jobID is the job id.
@@ -7237,6 +7457,75 @@
     }
   }
 
+  /** Reset a job that is delete notifying back to "ready for delete notify"
+  * state.
+  *@param jobID is the job id.
+  */
+  @Override
+  public void resetDeleteNotifyJob(Long jobID)
+    throws ManifoldCFException
+  {
+    while (true)
+    {
+      long sleepAmt = 0L;
+      database.beginTransaction();
+      try
+      {
+        // Check job status
+        StringBuilder sb = new StringBuilder("SELECT ");
+        ArrayList list = new ArrayList();
+        
+        sb.append(jobs.statusField).append(" FROM ").append(jobs.getTableName()).append(" WHERE ")
+          .append(database.buildConjunctionClause(list,new ClauseDescription[]{
+            new UnitaryClause(jobs.idField,jobID)}))
+          .append(" FOR UPDATE");
+            
+        IResultSet set = database.performQuery(sb.toString(),list,null,null);
+        if (set.getRowCount() == 0)
+          throw new ManifoldCFException("No such job: "+jobID);
+        IResultRow row = set.getRow(0);
+        int status = jobs.stringToStatus((String)row.getValue(jobs.statusField));
+
+        switch (status)
+        {
+        case Jobs.STATUS_NOTIFYINGOFDELETION:
+          if (Logging.jobs.isDebugEnabled())
+            Logging.jobs.debug("Setting job "+jobID+" back to 'ReadyForDeleteNotify' state");
+
+          // Set the state of the job back to "ReadyForNotify"
+          jobs.writePermanentStatus(jobID,jobs.STATUS_READYFORDELETENOTIFY,true);
+          break;
+        default:
+          throw new ManifoldCFException("Unexpected job status: "+Integer.toString(status));
+        }
+        database.performCommit();
+        return;
+      }
+      catch (ManifoldCFException e)
+      {
+        database.signalRollback();
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          if (Logging.perf.isDebugEnabled())
+            Logging.perf.debug("Aborted resetting delete notify job: "+e.getMessage());
+          sleepAmt = getRandomAmount();
+          continue;
+        }
+        throw e;
+      }
+      catch (Error e)
+      {
+        database.signalRollback();
+        throw e;
+      }
+      finally
+      {
+        database.endTransaction();
+        sleepFor(sleepAmt);
+      }
+    }
+  }
+
   /** Reset a starting job back to "ready for startup" state.
   *@param jobID is the job id.
   */
@@ -7516,18 +7805,12 @@
           if (confirmSet.getRowCount() > 0)
             continue;
 
-          ManifoldCF.noteConfigurationChange();
-          // Remove documents from job queue
-          jobQueue.deleteAllJobRecords(jobID);
-          // Remove carrydowns for the job
-          carryDown.deleteOwner(jobID);
-          // Nothing is in a critical section - so this should be OK.
-          hopCount.deleteOwner(jobID);
-          jobs.delete(jobID);
+          jobs.finishJobCleanup(jobID);
           if (Logging.jobs.isDebugEnabled())
           {
-            Logging.jobs.debug("Removed job "+jobID);
+            Logging.jobs.debug("Job "+jobID+" cleanup is now completed");
           }
+
         }
         database.performCommit();
         return;
@@ -7740,7 +8023,88 @@
       }
     }
   }
-  
+
+  /** Find the list of jobs that need to have their connectors notified of job deletion.
+  *@param processID is the process ID.
+  *@return the ID's of jobs that need their output connectors notified in order to be removed.
+  */
+  @Override
+  public JobNotifyRecord[] getJobsReadyForDelete(String processID)
+    throws ManifoldCFException
+  {
+    while (true)
+    {
+      long sleepAmt = 0L;
+      database.beginTransaction();
+      try
+      {
+        // Do the query
+        StringBuilder sb = new StringBuilder("SELECT ");
+        ArrayList list = new ArrayList();
+        
+        sb.append(jobs.idField).append(",").append(jobs.failTimeField).append(",").append(jobs.failCountField)
+          .append(" FROM ").append(jobs.getTableName()).append(" WHERE ")
+          .append(database.buildConjunctionClause(list,new ClauseDescription[]{
+            new UnitaryClause(jobs.statusField,jobs.statusToString(jobs.STATUS_READYFORDELETENOTIFY))}))
+          .append(" FOR UPDATE");
+            
+        IResultSet set = database.performQuery(sb.toString(),list,null,null);
+        // Return them all
+        JobNotifyRecord[] rval = new JobNotifyRecord[set.getRowCount()];
+        int i = 0;
+        while (i < rval.length)
+        {
+          IResultRow row = set.getRow(i);
+          Long jobID = (Long)row.getValue(jobs.idField);
+          Long failTimeLong = (Long)row.getValue(jobs.failTimeField);
+          Long failRetryCountLong = (Long)row.getValue(jobs.failCountField);
+          long failTime;
+          if (failTimeLong == null)
+            failTime = -1L;
+          else
+            failTime = failTimeLong.longValue();
+          int failRetryCount;
+          if (failRetryCountLong == null)
+            failRetryCount = -1;
+          else
+            failRetryCount = (int)failRetryCountLong.longValue();
+      
+          // Mark status of job as "starting delete"
+          jobs.writeTransientStatus(jobID,jobs.STATUS_NOTIFYINGOFDELETION,processID);
+          if (Logging.jobs.isDebugEnabled())
+          {
+            Logging.jobs.debug("Found job "+jobID+" in need of delete notification");
+          }
+          rval[i++] = new JobNotifyRecord(jobID,failTime,failRetryCount);
+        }
+        database.performCommit();
+        return rval;
+      }
+      catch (ManifoldCFException e)
+      {
+        database.signalRollback();
+        if (e.getErrorCode() == e.DATABASE_TRANSACTION_ABORT)
+        {
+          if (Logging.perf.isDebugEnabled())
+            Logging.perf.debug("Aborted getting jobs ready for notify: "+e.getMessage());
+          sleepAmt = getRandomAmount();
+          continue;
+        }
+        throw e;
+      }
+      catch (Error e)
+      {
+        database.signalRollback();
+        throw e;
+      }
+      finally
+      {
+        database.endTransaction();
+        sleepFor(sleepAmt);
+      }
+    }
+  }
+
   /** Complete the sequence that resumes jobs, either from a pause or from a scheduling window
   * wait.  The logic will restore the job to an active state (many possibilities depending on
   * connector status), and will record the jobs that have been so modified.
@@ -8264,6 +8628,8 @@
         break;
       case Jobs.STATUS_READYFORNOTIFY:
       case Jobs.STATUS_NOTIFYINGOFCOMPLETION:
+      case Jobs.STATUS_READYFORDELETENOTIFY:
+      case Jobs.STATUS_NOTIFYINGOFDELETION:
         rstatus = JobStatus.JOBSTATUS_JOBENDNOTIFICATION;
         break;
       case Jobs.STATUS_ABORTING:

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java
index bdf0af6..ec36348 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/Jobs.java

@@ -38,6 +38,7 @@
  * <tr><td>lasttime</td><td>BIGINT</td><td>operational field</td></tr>
  * <tr><td>starttime</td><td>BIGINT</td><td>operational field</td></tr>
  * <tr><td>lastchecktime</td><td>BIGINT</td><td>operational field</td></tr>
+ * <tr><td>seedingversion</td><td>LONGTEXT</td><td>operational field</td></tr>
  * <tr><td>endtime</td><td>BIGINT</td><td>operational field</td></tr>
  * <tr><td>docspec</td><td>LONGTEXT</td><td></td></tr>
  * <tr><td>connectionname</td><td>VARCHAR(32)</td><td>Reference:repoconnections.connectionname</td></tr>
@@ -111,6 +112,8 @@
   public static final int STATUS_DELETING = 35;                         // The job is deleting.
   public static final int STATUS_DELETESTARTINGUP = 36;         // The delete is starting up.
   public static final int STATUS_ABORTINGSHUTTINGDOWN = 37;     // Aborting the cleanup phase.
+  public static final int STATUS_READYFORDELETENOTIFY = 38;     // Job is ready for delete notification
+  public static final int STATUS_NOTIFYINGOFDELETION = 39;      // Notifying connector of job deletion
   
   // These statuses have to do with whether a job has an installed underlying connector or not.
   // There are two reasons to have a special state here: (1) if the behavior of the crawler differs, or (2) if the
@@ -121,9 +124,9 @@
   // But, since there is no indication in the jobs table of an uninstalled connector for such jobs, the code which starts
   // jobs up (or otherwise would enter any state that has a corresponding special state) must check to see if the underlying
   // connector exists before deciding what state to put the job into.
-  public static final int STATUS_ACTIVE_UNINSTALLED = 38;               // Active, but repository connector not installed
-  public static final int STATUS_ACTIVESEEDING_UNINSTALLED = 39;   // Active and seeding, but repository connector not installed
-  public static final int STATUS_DELETING_NOOUTPUT = 40;                // Job is being deleted but there's no output connector installed
+  public static final int STATUS_ACTIVE_UNINSTALLED = 40;               // Active, but repository connector not installed
+  public static final int STATUS_ACTIVESEEDING_UNINSTALLED = 41;   // Active and seeding, but repository connector not installed
+  public static final int STATUS_DELETING_NOOUTPUT = 42;                // Job is being deleted but there's no output connector installed
 
   // Deprecated states.  These states should never be used; they're defined only for upgrade purposes
   public static final int STATUS_ACTIVE_NOOUTPUT = 100;                  // Active, but output connector not installed
@@ -175,9 +178,8 @@
   public final static String lastTimeField = "lasttime";
   /** If active, paused, activewait, or pausedwait, the start time of the current session, else null. */
   public final static String startTimeField = "starttime";
-  /** The time of the LAST session, if any.  This is the place where the "last repository change check time"
-  * is gotten from. */
-  public final static String lastCheckTimeField = "lastchecktime";
+  /** This text data represents the seeding version string, which for many connectors is simply the last time seeding was done */
+  public final static String seedingVersionField = "seedingversion";
   /** If inactive, the end time of the LAST session, if any. */
   public final static String endTimeField = "endtime";
   /** If non-null, this is the time that the current execution window closes, in ms since epoch. */
@@ -213,6 +215,8 @@
     statusMap.put("S",new Integer(STATUS_SHUTTINGDOWN));
     statusMap.put("s",new Integer(STATUS_READYFORNOTIFY));
     statusMap.put("n",new Integer(STATUS_NOTIFYINGOFCOMPLETION));
+    statusMap.put("d",new Integer(STATUS_READYFORDELETENOTIFY));
+    statusMap.put("j",new Integer(STATUS_NOTIFYINGOFDELETION));
     statusMap.put("W",new Integer(STATUS_ACTIVEWAIT));
     statusMap.put("Z",new Integer(STATUS_PAUSEDWAIT));
     statusMap.put("X",new Integer(STATUS_ABORTING));
@@ -245,15 +249,18 @@
     statusMap.put("I",new Integer(STATUS_RESUMING));
     statusMap.put("i",new Integer(STATUS_RESUMINGSEEDING));
 
+
     // These are the uninstalled states.  The values, I'm afraid, are pretty random.
     statusMap.put("R",new Integer(STATUS_ACTIVE_UNINSTALLED));
     statusMap.put("r",new Integer(STATUS_ACTIVESEEDING_UNINSTALLED));
+    statusMap.put("D",new Integer(STATUS_DELETING_NOOUTPUT));
+
+    // These are deprecated states; we may be able to reclaim them
     statusMap.put("O",new Integer(STATUS_ACTIVE_NOOUTPUT));
     statusMap.put("o",new Integer(STATUS_ACTIVESEEDING_NOOUTPUT));
     statusMap.put("U",new Integer(STATUS_ACTIVE_NEITHER));
     statusMap.put("u",new Integer(STATUS_ACTIVESEEDING_NEITHER));
-    statusMap.put("D",new Integer(STATUS_DELETING_NOOUTPUT));
-    
+
     typeMap = new HashMap<String,Integer>();
     typeMap.put("C",new Integer(TYPE_CONTINUOUS));
     typeMap.put("S",new Integer(TYPE_SPECIFIED));
@@ -289,6 +296,7 @@
   * STATUS_PAUSEDWAIT
   * STATUS_PAUSED
   * STATUS_READYFORNOTIFY
+  * STATUS_READYFORDELETENOTIFY
   * STATUS_READYFORDELETE
   * STATUS_DELETING
   * STATUS_READYFORSTARTUP
@@ -310,6 +318,7 @@
   * These are the process-transient states:
   * STATUS_DELETESTARTINGUP
   * STATUS_NOTIFYINGOFCOMPLETION
+  * STATUS_NOTIFYINGOFDELETION
   * STATUS_STARTINGUP
   * STATUS_STARTINGUPMINIMAL
   * STATUS_ABORTINGSTARTINGUPFORRESTART
@@ -377,6 +386,7 @@
       // These are fields we want to get rid of.
       String oldOutputSpecField = "outputspec";
       String oldOutputNameField = "outputname";
+      String oldLastCheckTimeField = "lastchecktime";
 
       // A place to keep the outputs we find, so we can add them into the pipeline at the end.
       IResultSet outputSet = null;
@@ -390,7 +400,7 @@
         map.put(statusField,new ColumnDescription("CHAR(1)",false,false,null,null,false));
         map.put(lastTimeField,new ColumnDescription("BIGINT",false,false,null,null,false));
         map.put(startTimeField,new ColumnDescription("BIGINT",false,true,null,null,false));
-        map.put(lastCheckTimeField,new ColumnDescription("BIGINT",false,true,null,null,false));
+        map.put(seedingVersionField,new ColumnDescription("LONGTEXT",false,true,null,null,false));
         map.put(endTimeField,new ColumnDescription("BIGINT",false,true,null,null,false));
         map.put(documentSpecField,new ColumnDescription("LONGTEXT",false,true,null,null,false));
         map.put(this.connectionNameField,new ColumnDescription("VARCHAR(32)",false,false,connectionTableName,connectionNameField,false));
@@ -461,6 +471,33 @@
           map.put(statusField,statusToString(STATUS_ACTIVESEEDING_UNINSTALLED));
           performUpdate(map,"WHERE "+query,list,null);
         }
+        if (existing.get(seedingVersionField) == null)
+        {
+          Map insertMap = new HashMap();
+          insertMap.put(seedingVersionField,new ColumnDescription("LONGTEXT",false,true,null,null,false));
+          performAlter(insertMap,null,null,null);
+          // Populate it with data from the old last check version field
+          IResultSet set = performQuery("SELECT "+idField+","+oldLastCheckTimeField+" FROM "+getTableName(),null,null,null);
+          for (int i = 0; i < set.getRowCount(); i++)
+          {
+            IResultRow row = set.getRow(i);
+            Long jobID = (Long)row.getValue(idField);
+            Long oldTime = (Long)row.getValue(oldLastCheckTimeField);
+            if (oldTime != null)
+            {
+              HashMap map = new HashMap();
+              map.put(seedingVersionField,oldTime.toString());
+              ArrayList list = new ArrayList();
+              String query = buildConjunctionClause(list,new ClauseDescription[]{
+                new UnitaryClause(idField,jobID)});
+              performUpdate(map,"WHERE "+query,list,null);
+            }
+          }
+          List<String> deleteList = new ArrayList<String>();
+          deleteList.add(oldLastCheckTimeField);
+          performAlter(null,null,deleteList,null);
+        }
+        
         if (existing.get(oldOutputNameField) != null)
         {
           // Remove output name and spec fields, but first read them so we can put them into the pipeline manager
@@ -471,6 +508,8 @@
           deleteList.add(oldOutputNameField);
           performAlter(null,null,deleteList,null);
         }
+        // Need upgrade for seedingversionfield and to get rid of lastcheckfield
+        // MHL
       }
 
       // Handle related tables
@@ -937,7 +976,7 @@
             if (set.getRowCount() > 0)
             {
               // Update
-              // We need to reset the lastCheckTimeField if there are any changes that
+              // We need to reset the seedingVersionField if there are any changes that
               // could affect what set of documents we allow!!!
 
               IResultRow row = set.getRow(0);
@@ -969,7 +1008,7 @@
                 isSame = forcedParamManager.compareRows(id,jobDescription);
 
               if (!isSame)
-                values.put(lastCheckTimeField,null);
+                values.put(seedingVersionField,null);
 
               params.clear();
               query = buildConjunctionClause(params,new ClauseDescription[]{
@@ -984,7 +1023,7 @@
             {
               // Insert
               values.put(startTimeField,null);
-              values.put(lastCheckTimeField,null);
+              values.put(seedingVersionField,null);
               values.put(endTimeField,null);
               values.put(statusField,statusToString(STATUS_INACTIVE));
               values.put(lastTimeField,new Long(System.currentTimeMillis()));
@@ -1045,7 +1084,7 @@
     throws ManifoldCFException
   {
     Map values = new HashMap();
-    values.put(lastCheckTimeField,null);
+    values.put(seedingVersionField,null);
     ArrayList params = new ArrayList();
     String query = buildConjunctionClause(params,new ClauseDescription[]{
       new UnitaryClause(idField,jobID)});
@@ -1082,6 +1121,17 @@
     map.put(failCountField,null);
     performUpdate(map,"WHERE "+query,list,invKey);
 
+    // Notifying of deletion goes back to just being ready for delete notify
+    list.clear();
+    query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(statusField,statusToString(STATUS_NOTIFYINGOFDELETION)),
+      new UnitaryClause(processIDField,processID)});
+    map.put(statusField,statusToString(STATUS_READYFORDELETENOTIFY));
+    map.put(processIDField,null);
+    map.put(failTimeField,null);
+    map.put(failCountField,null);
+    performUpdate(map,"WHERE "+query,list,invKey);
+
     // Starting up or aborting starting up goes back to just being ready
     list.clear();
     query = buildConjunctionClause(list,new ClauseDescription[]{
@@ -1229,6 +1279,14 @@
     map.put(processIDField,null);
     performUpdate(map,"WHERE "+query,list,invKey);
 
+    // Notifying of deletion goes back to just being ready for delete notify
+    list.clear();
+    query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(statusField,statusToString(STATUS_NOTIFYINGOFDELETION))});
+    map.put(statusField,statusToString(STATUS_READYFORDELETENOTIFY));
+    map.put(processIDField,null);
+    performUpdate(map,"WHERE "+query,list,invKey);
+
     // Starting up or aborting starting up goes back to just being ready
     list.clear();
     query = buildConjunctionClause(list,new ClauseDescription[]{
@@ -1539,7 +1597,7 @@
   {
     // No cache keys need invalidation, since we're changing the start time, not the status.
     HashMap newValues = new HashMap();
-    newValues.put(lastCheckTimeField,null);
+    newValues.put(seedingVersionField,null);
     ArrayList list = new ArrayList();
     String query = buildConjunctionClause(list,new ClauseDescription[]{
       new UnitaryClause(connectionNameField,connectionName)});
@@ -1555,7 +1613,7 @@
   {
     // No cache keys need invalidation, since we're changing the start time, not the status.
     HashMap newValues = new HashMap();
-    newValues.put(lastCheckTimeField,null);
+    newValues.put(seedingVersionField,null);
     ArrayList list = new ArrayList();
     String query = buildConjunctionClause(list,new ClauseDescription[]{
       new JoinClause(getTableName()+"."+idField,pipelineManager.ownerIDField),
@@ -1571,7 +1629,7 @@
   {
     // No cache keys need invalidation, since we're changing the start time, not the status.
     HashMap newValues = new HashMap();
-    newValues.put(lastCheckTimeField,null);
+    newValues.put(seedingVersionField,null);
     ArrayList list = new ArrayList();
     String query = buildConjunctionClause(list,new ClauseDescription[]{
       new JoinClause(getTableName()+"."+idField,pipelineManager.ownerIDField),
@@ -1636,6 +1694,17 @@
     map.put(failCountField,null);
     performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
 
+    list.clear();
+    map.clear();
+    query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(statusField,statusToString(STATUS_NOTIFYINGOFDELETION)),
+      new UnitaryClause(processIDField,processID)});
+    map.put(statusField,statusToString(STATUS_READYFORDELETENOTIFY));
+    map.put(processIDField,null);
+    map.put(failTimeField,null);
+    map.put(failCountField,null);
+    performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
+
   }
   
   /** Reset startup worker thread status.
@@ -1918,7 +1987,32 @@
     map.put(processIDField,null);
     performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
   }
-  
+
+  /** Retry delete notification.
+  *@param jobID is the job identifier.
+  *@param failTime is the fail time, -1 == none
+  *@param failCount is the fail count to use, -1 == none.
+  */
+  public void retryDeleteNotification(Long jobID, long failTime, int failCount)
+    throws ManifoldCFException
+  {
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(idField,jobID)});
+    HashMap map = new HashMap();
+    map.put(statusField,statusToString(STATUS_READYFORDELETENOTIFY));
+    if (failTime == -1L)
+      map.put(failTimeField,null);
+    else
+      map.put(failTimeField,new Long(failTime));
+    if (failCount == -1)
+      map.put(failCountField,null);
+    else
+      map.put(failCountField,failCount);
+    map.put(processIDField,null);
+    performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
+  }
+
   /** Write job status and window end, and clear the endtime field.  (The start time will be written
   * when the job enters the "active" state.)
   *@param jobID is the job identifier.
@@ -2129,7 +2223,8 @@
       {
         map.put(startTimeField,new Long(startTime));
       }
-      map.put(lastCheckTimeField,new Long(startTime));
+      // Clear out seeding version, in case we wind up keeping the job and rerunning it
+      map.put(seedingVersionField,null);
       performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
     }
     catch (ManifoldCFException e)
@@ -2156,8 +2251,9 @@
   /** Make job active, and set the start time field.
   *@param jobID is the job identifier.
   *@param startTime is the current time in milliseconds from start of epoch.
+  *@param seedVersionString is the version string to record for the seeding.
   */
-  public void noteJobStarted(Long jobID, long startTime)
+  public void noteJobStarted(Long jobID, long startTime, String seedVersionString)
     throws ManifoldCFException
   {
     beginTransaction();
@@ -2208,7 +2304,7 @@
         map.put(startTimeField,new Long(startTime));
       }
       // The seeding was complete or we wouldn't have gotten called, so at least note that.
-      map.put(lastCheckTimeField,new Long(startTime));
+      map.put(seedingVersionField,seedVersionString);
       // Clear out the retry fields we might have set
       map.put(failTimeField,null);
       map.put(failCountField,null);
@@ -2238,9 +2334,9 @@
 
   /** Note job seeded.
   *@param jobID is the job id.
-  *@param seedTime is the job seed time.
+  *@param seedVersionString is the job seed version string.
   */
-  public void noteJobSeeded(Long jobID, long seedTime)
+  public void noteJobSeeded(Long jobID, String seedVersionString)
     throws ManifoldCFException
   {
     // We have to convert the current status to the non-seeding equivalent
@@ -2295,7 +2391,7 @@
       HashMap map = new HashMap();
       map.put(statusField,statusToString(newStatus));
       map.put(processIDField,null);
-      map.put(lastCheckTimeField,new Long(seedTime));
+      map.put(seedingVersionField,seedVersionString);
       map.put(failTimeField,null);
       map.put(failCountField,null);
       performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
@@ -2726,6 +2822,24 @@
     performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
   }
 
+  /** Finish job cleanup.
+  * Write completion and the current time.
+  *@param jobID is the job id.
+  */
+  public void finishJobCleanup(Long jobID)
+    throws ManifoldCFException
+  {
+    ArrayList list = new ArrayList();
+    String query = buildConjunctionClause(list,new ClauseDescription[]{
+      new UnitaryClause(idField,jobID)});
+    HashMap map = new HashMap();
+    map.put(statusField,statusToString(STATUS_READYFORDELETENOTIFY));
+    map.put(errorField,null);
+    // Anything else?
+    // MHL
+    performUpdate(map,"WHERE "+query,list,new StringSet(getJobStatusKey()));
+  }
+
   /** Resume a stopped job (from a pause or activewait).
   * Updates the job record in a manner consistent with the job's state.
   */
@@ -3081,6 +3195,10 @@
       return "n";
     case STATUS_READYFORNOTIFY:
       return "s";
+    case STATUS_NOTIFYINGOFDELETION:
+      return "j";
+    case STATUS_READYFORDELETENOTIFY:
+      return "d";
     case STATUS_ACTIVEWAIT:
       return "W";
     case STATUS_PAUSEDWAIT:
@@ -3125,14 +3243,6 @@
       return "R";
     case STATUS_ACTIVESEEDING_UNINSTALLED:
       return "r";
-    case STATUS_ACTIVE_NOOUTPUT:
-      return "O";
-    case STATUS_ACTIVESEEDING_NOOUTPUT:
-      return "o";
-    case STATUS_ACTIVE_NEITHER:
-      return "U";
-    case STATUS_ACTIVESEEDING_NEITHER:
-      return "u";
     case STATUS_DELETING_NOOUTPUT:
       return "D";
     
@@ -3156,6 +3266,16 @@
     case STATUS_ABORTINGSHUTTINGDOWN:
       return "v";
 
+    // These are deprecated
+    case STATUS_ACTIVE_NOOUTPUT:
+      return "O";
+    case STATUS_ACTIVESEEDING_NOOUTPUT:
+      return "o";
+    case STATUS_ACTIVE_NEITHER:
+      return "U";
+    case STATUS_ACTIVESEEDING_NEITHER:
+      return "u";
+
     default:
       throw new ManifoldCFException("Bad status value: "+Integer.toString(status));
     }

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/PipelineManager.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/PipelineManager.java
index 08812af..8d3f85d 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/PipelineManager.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/PipelineManager.java

@@ -263,15 +263,18 @@
     {
       IResultRow row = set.getRow(i);
       String outputConnectionName = (String)row.getValue(outputNameField);
-      if (!outputSet.contains(outputConnectionName))
-        throw new ManifoldCFException("Output name '"+outputConnectionName+"' removed from job; not allowed");
+      boolean isOutputConnection = outputConnectionName != null && outputConnectionName.length() > 0;
+      if (isOutputConnection)
+      {
+        if (!outputSet.contains(outputConnectionName))
+          throw new ManifoldCFException("Output name '"+outputConnectionName+"' removed from job; not allowed");
+      }
       String transformationConnectionName = (String)row.getValue(transformationNameField);
       Long prerequisite = (Long)row.getValue(prerequisiteField);
       String spec = (String)row.getValue(connectionSpecField);
       if (spec == null)
         spec = "";
       int prerequisiteValue = (prerequisite==null)?-1:(int)prerequisite.longValue();
-      boolean isOutputConnection = outputConnectionName != null && outputConnectionName.length() > 0;
       if (job.getPipelineStagePrerequisite(i) != prerequisiteValue)
         return false;
       if (job.getPipelineStageIsOutputConnection(i) != isOutputConnection)
@@ -314,7 +317,7 @@
         int prerequisite = job.getPipelineStagePrerequisite(i);
         String pipelineConnectionName = job.getPipelineStageConnectionName(i);
         String pipelineStageDescription = job.getPipelineStageDescription(i);
-        OutputSpecification os = job.getPipelineStageSpecification(i);
+        Specification os = job.getPipelineStageSpecification(i);
         map.clear();
         map.put(ownerIDField,ownerID);
         map.put(ordinalField,new Long((long)i));

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobNotificationThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobNotificationThread.java
index f25f6a9..700b552 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobNotificationThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/JobNotificationThread.java

@@ -249,6 +249,188 @@
               throw exception;
           }
 
+          // ???
+          JobNotifyRecord[] jobsNeedingDeleteNotification = jobManager.getJobsReadyForDelete(processID);
+          try
+          {
+            Set<OutputAndRepositoryConnection> connectionNames = new HashSet<OutputAndRepositoryConnection>();
+            
+            int k = 0;
+            while (k < jobsNeedingDeleteNotification.length)
+            {
+              JobNotifyRecord jsr = jobsNeedingDeleteNotification[k++];
+              Long jobID = jsr.getJobID();
+              IJobDescription job = jobManager.load(jobID,true);
+              if (job != null)
+              {
+                // Get the connection name
+                String repositoryConnectionName = job.getConnectionName();
+                IPipelineSpecificationBasic basicSpec = new PipelineSpecificationBasic(job);
+                for (int i = 0; i < basicSpec.getOutputCount(); i++)
+                {
+                  String outputConnectionName = basicSpec.getStageConnectionName(basicSpec.getOutputStage(i));
+                  OutputAndRepositoryConnection c = new OutputAndRepositoryConnection(outputConnectionName, repositoryConnectionName);
+                  connectionNames.add(c);
+                }
+              }
+            }
+            
+            // Attempt to notify the specified connections
+            Map<OutputAndRepositoryConnection,Disposition> notifiedConnections = new HashMap<OutputAndRepositoryConnection,Disposition>();
+            
+            for (OutputAndRepositoryConnection connections : connectionNames)
+            {
+              String outputConnectionName = connections.getOutputConnectionName();
+              String repositoryConnectionName = connections.getRepositoryConnectionName();
+              
+              OutputNotifyActivity activity = new OutputNotifyActivity(repositoryConnectionName,repositoryConnectionManager,outputConnectionName);
+              
+              IOutputConnection connection = connectionManager.load(outputConnectionName);
+              if (connection != null)
+              {
+                // Grab an appropriate connection instance
+                IOutputConnector connector = outputConnectorPool.grab(connection);
+                if (connector != null)
+                {
+                  try
+                  {
+                    // Do the notification itself
+                    try
+                    {
+                      connector.noteJobComplete(activity);
+                      notifiedConnections.put(connections,new Disposition());
+                    }
+                    catch (ServiceInterruption e)
+                    {
+                      notifiedConnections.put(connections,new Disposition(e));
+                    }
+                    catch (ManifoldCFException e)
+                    {
+                      if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
+                        throw e;
+                      if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR)
+                        throw e;
+                      if (e.getErrorCode() == ManifoldCFException.SETUP_ERROR)
+                        throw e;
+                      // Nothing special; report the error and keep going.
+                      Logging.threads.error(e.getMessage(),e);
+                    }
+                  }
+                  finally
+                  {
+                    outputConnectorPool.release(connection,connector);
+                  }
+                }
+              }
+            }
+            
+            // Go through jobs again, and put the notified ones into the inactive state.
+            k = 0;
+            while (k < jobsNeedingDeleteNotification.length)
+            {
+              JobNotifyRecord jsr = jobsNeedingDeleteNotification[k++];
+              Long jobID = jsr.getJobID();
+              IJobDescription job = jobManager.load(jobID,true);
+              if (job != null)
+              {
+                // Get the connection name
+                String repositoryConnectionName = job.getConnectionName();
+                IPipelineSpecificationBasic basicSpec = new PipelineSpecificationBasic(job);
+                boolean allOK = true;
+                for (int i = 0; i < basicSpec.getOutputCount(); i++)
+                {
+                  String outputConnectionName = basicSpec.getStageConnectionName(basicSpec.getOutputStage(i));
+
+                  OutputAndRepositoryConnection c = new OutputAndRepositoryConnection(outputConnectionName, repositoryConnectionName);
+                  
+                  Disposition d = notifiedConnections.get(c);
+                  if (d != null)
+                  {
+                    ServiceInterruption e = d.getServiceInterruption();
+                    if (e == null)
+                    {
+                      break;
+                    }
+                    else
+                    {
+                      if (!e.jobInactiveAbort())
+                      {
+                        Logging.jobs.warn("Delete notification service interruption reported for job "+
+                          jobID+" output connection '"+outputConnectionName+"': "+
+                          e.getMessage(),e);
+                      }
+
+                      // If either we are going to be requeuing beyond the fail time, OR
+                      // the number of retries available has hit 0, THEN we treat this
+                      // as either an "ignore" or a hard error.
+                      ///System.out.println("jsr.getFailTime()="+jsr.getFailTime()+"; e.getRetryTime()="+e.getRetryTime()+"; jsr.getFailRetryCount()="+jsr.getFailRetryCount());
+                      if (!e.jobInactiveAbort() && (jsr.getFailTime() != -1L && jsr.getFailTime() < e.getRetryTime() ||
+                        jsr.getFailRetryCount() == 0))
+                      {
+                        // Treat this as a hard failure.
+                        if (e.isAbortOnFail())
+                        {
+                          // Note the error in the job, and transition to inactive state
+                          String message = e.jobInactiveAbort()?"":"Repeated service interruptions during delete notification"+((e.getCause()!=null)?": "+e.getCause().getMessage():"");
+                          if (message.length() > 0)
+                            Logging.jobs.error(message,e.getCause());
+                          // Can't abort a delete!!
+                          jobManager.removeJob(jobID);
+                          jsr.noteStarted();
+                        }
+                        else
+                        {
+                          // Not sure this can happen -- but just transition silently to inactive state
+                          jobManager.removeJob(jobID);
+                          jsr.noteStarted();
+                        }
+                      }
+                      else
+                      {
+                        // Reset the job to the READYFORDELETENOTIFY state, updating the failtime and failcount fields
+                        //System.out.println("Retrying... e.getFailTime()="+e.getFailTime()+"; e.getFailRetryCount()="+e.getFailRetryCount());
+                        jobManager.retryDeleteNotification(jsr,e.getFailTime(),e.getFailRetryCount());
+                        jsr.noteStarted();
+                      }
+                      allOK = false;
+                      break;
+                    }
+                  }
+                }
+                if (allOK)
+                {
+                  jobManager.removeJob(jobID);
+                  jsr.noteStarted();
+                }
+
+              }
+            }
+          }
+          finally
+          {
+            // Clean up all jobs that did not start
+            ManifoldCFException exception = null;
+            int i = 0;
+            while (i < jobsNeedingDeleteNotification.length)
+            {
+              JobNotifyRecord jsr = jobsNeedingDeleteNotification[i++];
+              if (!jsr.wasStarted())
+              {
+                // Clean up from failed start.
+                try
+                {
+                  jobManager.resetDeleteNotifyJob(jsr.getJobID());
+                }
+                catch (ManifoldCFException e)
+                {
+                  exception = e;
+                }
+              }
+            }
+            if (exception != null)
+              throw exception;
+          }
+
           ManifoldCF.sleep(10000L);
         }
         catch (ManifoldCFException e)

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
index 597c750..b2a6aaf 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/ManifoldCF.java

@@ -4119,7 +4119,7 @@
       PipelineStage ps = pipelineStages.get(stageName);
       ps.ordinal = k++;
       int prerequisite = (ps.prerequisite == null)?-1:pipelineStages.get(ps.prerequisite).ordinal;
-      OutputSpecification os = jobDescription.addPipelineStage(prerequisite,ps.isOutput,ps.connectionName,ps.description);
+      Specification os = jobDescription.addPipelineStage(prerequisite,ps.isOutput,ps.connectionName,ps.description);
       os.clearChildren();
       if (ps.specification != null)
       {
@@ -4238,7 +4238,7 @@
         stage.setValue(description);
         child.addChild(child.getChildCount(),stage);
       }
-      OutputSpecification spec = job.getPipelineStageSpecification(j);
+      Specification spec = job.getPipelineStageSpecification(j);
       stage = new ConfigurationNode(JOBNODE_STAGESPECIFICATION);
       for (int k = 0; k < spec.getChildCount(); k++)
       {

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java
index efb6134..421dd7d 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecification.java

@@ -27,17 +27,17 @@
 public class PipelineSpecification implements IPipelineSpecification
 {
   protected final IPipelineSpecificationBasic basicSpecification;
-  protected final String[] pipelineDescriptionStrings;
+  protected final VersionContext[] pipelineDescriptionStrings;
     
   public PipelineSpecification(IPipelineSpecificationBasic basicSpecification, IJobDescription job, IIncrementalIngester ingester)
     throws ManifoldCFException, ServiceInterruption
   {
     this.basicSpecification = basicSpecification;
-    this.pipelineDescriptionStrings = new String[basicSpecification.getStageCount()];
+    this.pipelineDescriptionStrings = new VersionContext[basicSpecification.getStageCount()];
     for (int i = 0; i < pipelineDescriptionStrings.length; i++)
     {
       // Note: this needs to change when output connections become part of the pipeline
-      String descriptionString;
+      VersionContext descriptionString;
       if (basicSpecification.checkStageOutputConnection(i))
       {
         descriptionString = ingester.getOutputDescription(basicSpecification.getStageConnectionName(i),job.getPipelineStageSpecification(i));
@@ -64,7 +64,7 @@
   *@return the description string that stage.
   */
   @Override
-  public String getStageDescriptionString(int stage)
+  public VersionContext getStageDescriptionString(int stage)
   {
     return pipelineDescriptionStrings[stage];
   }

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java
index 53a6a1c..4387ff7 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/PipelineSpecificationWithVersions.java

@@ -28,13 +28,14 @@
 {
   protected final IPipelineSpecification pipelineSpecification;
   protected final QueuedDocument queuedDocument;
+  protected final String componentIDHash;
     
   public PipelineSpecificationWithVersions(IPipelineSpecification pipelineSpecification,
-    QueuedDocument queuedDocument)
-    throws ManifoldCFException, ServiceInterruption
+    QueuedDocument queuedDocument, String componentIDHash)
   {
     this.pipelineSpecification = pipelineSpecification;
     this.queuedDocument = queuedDocument;
+    this.componentIDHash = componentIDHash;
   }
   
   /** Get pipeline specification.
@@ -49,7 +50,10 @@
   protected DocumentIngestStatus getStatus(int index)
   {
     IPipelineSpecificationBasic basic = pipelineSpecification.getBasicPipelineSpecification();
-    return queuedDocument.getLastIngestedStatus(basic.getStageConnectionName(basic.getOutputStage(index)));
+    DocumentIngestStatusSet set = queuedDocument.getLastIngestedStatus(basic.getStageConnectionName(basic.getOutputStage(index)));
+    if (set == null)
+      return null;
+    return set.getComponent(componentIDHash);
   }
   
   /** For a given output index, return a document version string.

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java
index a4a05f0..c36c3d6 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocument.java

@@ -39,7 +39,7 @@
   /** The document description. */
   protected final DocumentDescription documentDescription;
   /** The last ingested status, null meaning "never ingested". */
-  protected final Map<String,DocumentIngestStatus> lastIngestedStatus;
+  protected final Map<String,DocumentIngestStatusSet> lastIngestedStatus;
   /** The binnames for the document, according to the connector */
   protected final String[] binNames;
   /** This flag indicates whether the document has been processed or not. */
@@ -50,7 +50,7 @@
   *@param lastIngestedStatus is the document's last ingested status.
   *@param binNames are the bins associated with the document.
   */
-  public QueuedDocument(DocumentDescription documentDescription, Map<String,DocumentIngestStatus> lastIngestedStatus, String[] binNames)
+  public QueuedDocument(DocumentDescription documentDescription, Map<String,DocumentIngestStatusSet> lastIngestedStatus, String[] binNames)
   {
     this.documentDescription = documentDescription;
     this.lastIngestedStatus = lastIngestedStatus;
@@ -69,7 +69,7 @@
   *@param outputConnectionName is the name of the output connection.
   *@return the last ingested status for that output, or null if not found.
   */
-  public DocumentIngestStatus getLastIngestedStatus(String outputConnectionName)
+  public DocumentIngestStatusSet getLastIngestedStatus(String outputConnectionName)
   {
     if (lastIngestedStatus == null)
       return null;

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java
index 6ffff5a..93513c6 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/SeedingThread.java

@@ -116,7 +116,7 @@
               Long jobID = jsr.getJobID();
               try
               {
-                long lastJobTime = jsr.getSynchTime();
+                String lastSeedingVersion = jsr.getSeedingVersionString();
                 IJobDescription jobDescription = jobManager.load(jobID,true);
                 int jobType = jobDescription.getType();
 
@@ -127,8 +127,11 @@
                 // Null will come back if the connector instance could not be obtained, so just skip in that case.
                 if (connector == null)
                   continue;
+
+                String newSeedingVersion = null;
                 try
                 {
+                  
                   // Get the number of link types.
                   String[] legalLinkTypes = connector.getRelationshipTypes();
 
@@ -144,7 +147,7 @@
                     if (Logging.threads.isDebugEnabled())
                       Logging.threads.debug("Seeding thread: Getting seeds for job "+jobID.toString());
 
-                    connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastJobTime,currentTime,jobType);
+                    newSeedingVersion = connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
 
                     activity.doneSeeding(model==connector.MODEL_PARTIAL);
 
@@ -180,7 +183,7 @@
                       else
                       {
                         // Not sure this can happen -- but just transition silently to active state
-                        jobManager.noteJobSeeded(jobID,currentTime);
+                        jobManager.noteJobSeeded(jobID,newSeedingVersion);
                         jsr.noteStarted();
                       }
                     }
@@ -204,7 +207,7 @@
                   Logging.threads.debug("Seeding thread: Successfully reseeded job "+jobID.toString());
 
                 // Note that this job has been seeded!
-                jobManager.noteJobSeeded(jobID,currentTime);
+                jobManager.noteJobSeeded(jobID,newSeedingVersion);
                 jsr.noteStarted();
 
               }

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartDeleteThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartDeleteThread.java
index 05b49cb..7887e6f 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartDeleteThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartDeleteThread.java

@@ -84,7 +84,7 @@
 
           // See if there are any starting jobs.
           // Note: Since this following call changes the job state, we must be careful to reset it on any kind of failure.
-          JobDeleteRecord[] deleteJobs = jobManager.getJobsReadyForDelete(processID);
+          JobDeleteRecord[] deleteJobs = jobManager.getJobsReadyForDeleteCleanup(processID);
           try
           {
 

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
index c18b626..30fa94b 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StartupThread.java

@@ -100,14 +100,13 @@
 
 
             // Loop through jobs
-            int i = 0;
-            while (i < startupJobs.length)
+            for (int i = 0; i < startupJobs.length; i++)
             {
-              JobStartRecord jsr = startupJobs[i++];
+              JobStartRecord jsr = startupJobs[i];
               Long jobID = jsr.getJobID();
               try
               {
-                long lastJobTime = jsr.getSynchTime();
+                String lastSeedingVersion = jsr.getSeedingVersionString();
                 IJobDescription jobDescription = jobManager.load(jobID,true);
 
                 int jobType = jobDescription.getType();
@@ -120,6 +119,7 @@
                 if (connector == null)
                   continue;
 
+                String newSeedingVersion = null;
                 try
                 {
                   // Only now record the fact that we are trying to start the job.
@@ -136,7 +136,7 @@
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Preparing job "+jobID.toString()+" for execution...");
                   jobManager.prepareJobScan(jobID,legalLinkTypes,hopcountMethod,
-                    model,jobType == IJobDescription.TYPE_CONTINUOUS,lastJobTime == 0L,
+                    model,jobType == IJobDescription.TYPE_CONTINUOUS,lastSeedingVersion == null,
                     requestMinimum);
                   if (Logging.threads.isDebugEnabled())
                     Logging.threads.debug("Prepared job "+jobID.toString()+" for execution.");
@@ -150,7 +150,7 @@
                     if (Logging.threads.isDebugEnabled())
                       Logging.threads.debug("Adding initial seed documents for job "+jobID.toString()+"...");
                     // Get the initial seed documents, and make sure those are added
-                    connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastJobTime,currentTime,jobType);
+                    newSeedingVersion = connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);
                     // Flush anything left
                     activity.doneSeeding(model==connector.MODEL_PARTIAL);
                     if (Logging.threads.isDebugEnabled())
@@ -183,7 +183,7 @@
                       else
                       {
                         // Not sure this can happen -- but just transition silently to active state
-                        jobManager.noteJobStarted(jobID,currentTime);
+                        jobManager.noteJobStarted(jobID,currentTime,newSeedingVersion);
                         jsr.noteStarted();
                       }
                     }
@@ -203,7 +203,7 @@
                 }
 
                 // Start this job!
-                jobManager.noteJobStarted(jobID,currentTime);
+                jobManager.noteJobStarted(jobID,currentTime,newSeedingVersion);
                 jsr.noteStarted();
               }
               catch (ManifoldCFException e)

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
index 0354b87..84e4b8c 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java

@@ -248,7 +248,7 @@
 
           }
 
-          Map<OutputKey,DocumentIngestStatus> statuses = new HashMap<OutputKey,DocumentIngestStatus>();
+          IngestStatuses statuses = new IngestStatuses();
           ingester.getPipelineDocumentIngestDataMultiple(statuses,pipelineSpecifications,documentClasses,documentIDHashes);
           // Break apart the result.
           for (int i = 0; i < descs.length; i++)
@@ -257,10 +257,9 @@
             for (int j = 0; j < pipelineSpecifications[i].getOutputCount(); j++)
             {
               String outputName = pipelineSpecifications[i].getStageConnectionName(pipelineSpecifications[i].getOutputStage(j));
-              OutputKey key = new OutputKey(documentClasses[i],documentIDHashes[i],outputName);
-              DocumentIngestStatus status = statuses.get(key);
-              if (status != null)
-                versions[i].put(outputName,status);
+              DocumentIngestStatusSet statusSet = statuses.getStatus(documentClasses[i],documentIDHashes[i],outputName);
+              if (statusSet != null)
+                versions[i].put(outputName,statusSet);
             }
           }
 
@@ -335,7 +334,7 @@
               binNames = new String[]{""};
             }
 
-            QueuedDocument qd = new QueuedDocument(descs[i],(Map<String,DocumentIngestStatus>)versions[i],binNames);
+            QueuedDocument qd = new QueuedDocument(descs[i],(Map<String,DocumentIngestStatusSet>)versions[i],binNames);
 
             // Grab the arraylist that's there, or create it.
             List<QueuedDocument> set = documentSets.get(jobID);

diff --git a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
index a445991..1d9b6c8 100644
--- a/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
+++ b/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

@@ -76,15 +76,12 @@
       IJobManager jobManager = JobManagerFactory.make(threadContext);
       IBinManager binManager = BinManagerFactory.make(threadContext);
       IRepositoryConnectionManager connMgr = RepositoryConnectionManagerFactory.make(threadContext);
-      IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(threadContext);
       IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);
 
       IRepositoryConnectorPool repositoryConnectorPool = RepositoryConnectorPoolFactory.make(threadContext);
       
-      List<DocumentToProcess> fetchList = new ArrayList<DocumentToProcess>();
-      Map<String,String> versionMap = new HashMap<String,String>();
+      // This is the set of documents that we will either be marking as complete, or requeued, depending on the kind of crawl.
       List<QueuedDocument> finishList = new ArrayList<QueuedDocument>();
-      Map<String,Integer> idHashIndexMap = new HashMap<String,Integer>();
 
       // This is where we accumulate the document QueuedDocuments to be deleted from the job queue.
       List<QueuedDocument> deleteList = new ArrayList<QueuedDocument>();
@@ -174,9 +171,7 @@
             }
 
             // Clear out all of our disposition lists
-            fetchList.clear();
             finishList.clear();
-            versionMap.clear();
             deleteList.clear();
             ingesterCheckList.clear();
             hopcountremoveList.clear();
@@ -287,504 +282,409 @@
                   // Check for interruption before we start fetching
                   if (Thread.currentThread().isInterrupted())
                     throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
+                  
+                  // We need first to assemble an IPipelineSpecificationWithVersions object for each document we're going to process.
+                  // We put this in a map so it can be looked up by document identifier.
+                  // Create a full PipelineSpecification, including description strings.  (This is per-job still, but can throw ServiceInterruptions, so we do it in here.)
+                  IPipelineSpecification pipelineSpecification;
+                  try
+                  {
+                    pipelineSpecification = new PipelineSpecification(pipelineSpecificationBasic,job,ingester);
+                  }
+                  catch (ServiceInterruption e)
+                  {
+                    // Handle service interruption from pipeline
+                    if (!e.jobInactiveAbort())
+                      Logging.jobs.warn("Service interruption reported for job "+
+                      job.getID()+" connection '"+job.getConnectionName()+"': "+
+                      e.getMessage());
+
+                    // All documents get requeued, because we never got far enough to make distinctions.  All we have to decide
+                    // is whether to requeue or abort.
+                    List<QueuedDocument> requeueList = new ArrayList<QueuedDocument>();
+
+                    for (QueuedDocument qd : activeDocuments)
+                    {
+                      DocumentDescription dd = qd.getDocumentDescription();
+                      // Check for hard failure.  But no hard failure possible of it's a job inactive abort.
+                      if (!e.jobInactiveAbort() && (dd.getFailTime() != -1L && dd.getFailTime() < e.getRetryTime() ||
+                        dd.getFailRetryCount() == 0))
+                      {
+                        // Treat this as a hard failure.
+                        if (e.isAbortOnFail())
+                        {
+                          rescanList.add(qd);
+                          abortOnFail = new ManifoldCFException("Repeated service interruptions - failure processing document"+((e.getCause()!=null)?": "+e.getCause().getMessage():""),e.getCause());
+                        }
+                        else
+                        {
+                          requeueList.add(qd);
+                        }
+                      }
+                      else
+                      {
+                        requeueList.add(qd);
+                      }
+                    }
+                      
+                    requeueDocuments(jobManager,requeueList,e.getRetryTime(),e.getFailTime(),
+                      e.getFailRetryCount());
+                      
+                    activeDocuments.clear();
+                    pipelineSpecification = null;
+                  }
 
                   if (activeDocuments.size() > 0)
                   {
-                    // === Fetch document versions ===
-                    String[] currentDocIDHashArray = new String[activeDocuments.size()];
-                    String[] currentDocIDArray = new String[activeDocuments.size()];
-                    // We used to feed the old document version back to the repository connector so that it could
-                    // make decisions about whether to fetch, or just to call documentRecord().  The problem in a
-                    // multi-output world is that we may have had an error, and successfully output a document to
-                    // some outputs but not to others.  But we do this in a specific order.  It should be always safe
-                    // to get the document version from the *last* output in the sequence.  The problem is, we need
-                    // to be able to figure out what that is, and it is currently an implementation detail of
-                    // IncrementalIngester.  We solve this by allowing IncrementalIngester to make the decision.
-
-                    String[] oldVersionStringArray = new String[activeDocuments.size()];
-
-                    for (int i = 0; i < activeDocuments.size(); i++)
-                    {
-                      QueuedDocument qd = activeDocuments.get(i);
-                      currentDocIDHashArray[i] = qd.getDocumentDescription().getDocumentIdentifierHash();
-                      currentDocIDArray[i] = qd.getDocumentDescription().getDocumentIdentifier();
-                      DocumentIngestStatus dis = qd.getLastIngestedStatus(lastIndexedOutputConnectionName);
-                      if (dis == null)
-                        oldVersionStringArray[i] = null;
-                      else
-                      {
-                        oldVersionStringArray[i] = dis.getDocumentVersion();
-                        if (oldVersionStringArray[i] == null)
-                          oldVersionStringArray[i] = "";
-                      }
-                    }
-
-                    // Create a full PipelineSpecification, including description strings.  (This is per-job still, but can throw ServiceInterruptions, so we do it in here.)
-                    IPipelineSpecification pipelineSpecification = new PipelineSpecification(pipelineSpecificationBasic,job,ingester);
                     
-                    Set<String> abortSet = new HashSet<String>();
-                    VersionActivity versionActivity = new VersionActivity(job.getID(),processID,connectionName,pipelineSpecification,connMgr,jobManager,ingester,abortSet,ingestLogger);
-
+                    // **** New worker thread code starts here!!! ****
+                    
+                    IExistingVersions existingVersions = new ExistingVersions(lastIndexedOutputConnectionName,activeDocuments);
                     String aclAuthority = connection.getACLAuthority();
                     if (aclAuthority == null)
                       aclAuthority = "";
                     boolean isDefaultAuthority = (aclAuthority.length() == 0);
 
-                    if (Logging.threads.isDebugEnabled())
-                      Logging.threads.debug("Worker thread getting versions for "+Integer.toString(currentDocIDArray.length)+" documents");
-
-                    // === Fetch documents ===
-                    // We start by getting the document version string.
-                    String[] newVersionStringArray = null;
+                    // Build the processActivity object
+                    Map<String,QueuedDocument> previousDocuments = new HashMap<String,QueuedDocument>();
+                    
+                    String[] documentIDs = new String[activeDocuments.size()];
+                    int k = 0;
+                    for (QueuedDocument qd : activeDocuments)
+                    {
+                      previousDocuments.put(qd.getDocumentDescription().getDocumentIdentifierHash(),qd);
+                      documentIDs[k++] = qd.getDocumentDescription().getDocumentIdentifier();
+                    }
+                    
+                    ProcessActivity activity = new ProcessActivity(job.getID(),processID,
+                      threadContext,rt,jobManager,ingester,
+                      connectionName,pipelineSpecification,
+                      previousDocuments,
+                      currentTime,
+                      job.getExpiration(),
+                      job.getForcedMetadata(),
+                      job.getInterval(),
+                      job.getMaxInterval(),
+                      job.getHopcountMode(),
+                      connection,connector,connMgr,legalLinkTypes,ingestLogger,
+                      newParameterVersion);
                     try
                     {
-                      newVersionStringArray = connector.getDocumentVersions(currentDocIDArray,oldVersionStringArray,
-                        versionActivity,spec,jobType,isDefaultAuthority);
-
                       if (Logging.threads.isDebugEnabled())
-                        Logging.threads.debug("Worker thread done getting versions for "+Integer.toString(currentDocIDArray.length)+" documents");
+                        Logging.threads.debug("Worker thread about to process "+Integer.toString(documentIDs.length)+" documents");
 
-                    }
-                    catch (ServiceInterruption e)
-                    {
-                      // This service interruption comes from a point where we
-                      // know that no documents were ingested.
-                      // Therefore, active -> pending and activepurgatory -> pendingpurgatory
-
-                      if (!e.jobInactiveAbort())
+                      // Now, process in bulk -- catching and handling ServiceInterruptions
+                      ServiceInterruption serviceInterruption = null;
+                      try
                       {
-                        Logging.jobs.warn("Pre-ingest service interruption reported for job "+
+                        connector.processDocuments(documentIDs,existingVersions,job.getSpecification(),activity,jobType,isDefaultAuthority);
+                        
+                        // Now do everything that the connector might have done if we were not doing it for it.
+
+                        // Right now, that's just getting rid of untouched components.
+                        for (QueuedDocument qd : activeDocuments)
+                        {
+                          String documentIdentifier = qd.getDocumentDescription().getDocumentIdentifier();
+                          if (!activity.wasDocumentAborted(documentIdentifier) && !activity.wasDocumentDeleted(documentIdentifier))
+                          {
+                            String documentIdentifierHash = qd.getDocumentDescription().getDocumentIdentifierHash();
+                            // In order to be able to loop over all the components that the incremental ingester knows about, we need to know
+                            // what the FIRST output is.
+                            DocumentIngestStatusSet set = qd.getLastIngestedStatus(ingester.getFirstIndexedOutputConnectionName(pipelineSpecificationBasic));
+                            if (set != null)
+                            {
+                              Iterator<String> componentHashes = set.componentIterator();
+                              while (componentHashes.hasNext())
+                              {
+                                String componentHash = componentHashes.next();
+                                // Check whether we've indexed or not
+                                if (!activity.wasDocumentComponentTouched(documentIdentifier,
+                                  componentHash))
+                                {
+                                  // This component must be removed.
+                                  ingester.documentRemove(
+                                    pipelineSpecificationBasic,
+                                    connectionName,documentIdentifierHash,componentHash,
+                                    ingestLogger);
+                                }
+                              }
+                            }
+                          }
+                        }
+
+                        // Done with connector functionality!
+                      }
+                      catch (ServiceInterruption e)
+                      {
+                        serviceInterruption = e;
+                        if (!e.jobInactiveAbort())
+                          Logging.jobs.warn("Service interruption reported for job "+
                           job.getID()+" connection '"+job.getConnectionName()+"': "+
                           e.getMessage());
                       }
 
-                      if (!e.jobInactiveAbort() && e.isAbortOnFail())
-                        abortOnFail = new ManifoldCFException("Repeated service interruptions - failure getting document version"+((e.getCause()!=null)?": "+e.getCause().getMessage():""),e.getCause());
-                        
-                      // Mark the current documents to be recrawled at the
-                      // time specified, with the proper error handling.
-                      List<QueuedDocument> newActiveList = new ArrayList<QueuedDocument>(activeDocuments.size());
-                      for (int i = 0; i < activeDocuments.size(); i++)
+                      // Flush remaining references into the database!
+                      activity.flush();
+
+                      if (Logging.threads.isDebugEnabled())
+                        Logging.threads.debug("Worker thread done processing "+Integer.toString(documentIDs.length)+" documents");
+
+                      // Either way, handle the documents we were supposed to process.  But if there was a service interruption,
+                      // and the disposition of the document was unclear, then the document will need to be requeued instead of handled normally.
+                      List<QueuedDocument> requeueList = new ArrayList<QueuedDocument>();
+
+                      for (QueuedDocument qd : activeDocuments)
                       {
-                        QueuedDocument qd = activeDocuments.get(i);
-                        DocumentDescription dd = qd.getDocumentDescription();
-                        // If either we are going to be requeuing beyond the fail time, OR
-                        // the number of retries available has hit 0, THEN we treat this
-                        // as either an "ignore" or a hard error.
-                        if (!e.jobInactiveAbort() && (dd.getFailTime() != -1L && dd.getFailTime() < e.getRetryTime() ||
-                          dd.getFailRetryCount() == 0))
+                        // If this document was aborted, then treat it specially.
+                        if (activity.wasDocumentAborted(qd.getDocumentDescription().getDocumentIdentifier()))
                         {
-                          // Treat this as a hard failure.
-                          if (e.isAbortOnFail())
-                          {
-                            rescanList.add(qd);
-                          }
-                          // We want this particular document to be not included in the
-                          // reprocessing.  Therefore, we do the same thing as we would
-                          // if we got back a null version.
+                          // Special treatment for aborted documents.
+                          // We ignore the returned version string completely, since it's presumed that processing was not completed for this doc.
+                          // We want to give up immediately on this one, and just requeue it for immediate reprocessing (pending its prereqs being all met).
+                          // Add to the finish list, so it gets requeued.  Because the document is already marked as aborted, this should be enough to cause an
+                          // unconditional requeue.
+                          finishList.add(qd);
+                        }
+                        else if (activity.wasDocumentDeleted(qd.getDocumentDescription().getDocumentIdentifier()))
+                        {
                           deleteList.add(qd);
                         }
-                        else
+                        else if (serviceInterruption != null)
                         {
-                          // Retry this document according to the parameters provided.
-                          jobManager.resetDocument(dd,e.getRetryTime(),
-                            IJobManager.ACTION_RESCAN,e.getFailTime(),e.getFailRetryCount());
-                          qd.setProcessed();
-                        }
-                      }
-                      
-                      // All active documents have been removed from the list
-                      activeDocuments.clear();
-                      
-                    }
 
-                    // If version fetch was successful, the go on to processing phase
-                    if (newVersionStringArray != null)
-                    {
-                      // This try{ } is for releasing document versions at the connector level.
-                      try
-                      {
-
-                        // Loop through documents now, and amass what we need to fetch.
-                        // We also need to tally: (1) what needs to be marked as deleted via
-                        //   jobManager.markDocumentDeleted();
-                        // (2) what needs to be noted as a deletion to ingester
-                        // (3) what needs to be noted as a check for the ingester
-                        for (int i = 0; i < activeDocuments.size(); i++)
-                        {
-                          QueuedDocument qd = activeDocuments.get(i);
+                          // Service interruption has precedence over unchanged, because we might have been interrupted while scanning the document
+                          // for references
                           DocumentDescription dd = qd.getDocumentDescription();
-                          // If this document was aborted, then treat it specially; we never go on to fetch it, for one thing.
-                          if (abortSet.contains(dd.getDocumentIdentifier()))
+                          // Check for hard failure.  But no hard failure possible of it's a job inactive abort.
+                          if (!serviceInterruption.jobInactiveAbort() && (dd.getFailTime() != -1L && dd.getFailTime() < serviceInterruption.getRetryTime() ||
+                            dd.getFailRetryCount() == 0))
                           {
-                            // Special treatment for aborted documents.
-                            // We ignore the returned version string completely, since it's presumed that processing was not completed for this doc.
-                            // We want to give up immediately on this one, and just requeue it for immediate reprocessing (pending its prereqs being all met).
-                            // Add to the finish list, so it gets requeued.  Because the document is already marked as aborted, this should be enough to cause an
-                            // unconditional requeue.
-                            finishList.add(qd);
-                          }
-                          else
-                          {
-                            // Compare against old version.
-                            // We call the incremental ingester to make the decision for us as to whether we refetch a document or not.
-                            
-                            String documentIDHash = dd.getDocumentIdentifierHash();
-                            String newDocVersion = newVersionStringArray[i];
-                            versionMap.put(documentIDHash,newDocVersion);
-
-                            if (newDocVersion == null)
+                            // Treat this as a hard failure.
+                            if (serviceInterruption.isAbortOnFail())
                             {
-                              deleteList.add(qd);
+                              // Make sure that the job aborts.
+                              abortOnFail = new ManifoldCFException("Repeated service interruptions - failure processing document"+((serviceInterruption.getCause()!=null)?": "+serviceInterruption.getCause().getMessage():""),serviceInterruption.getCause());
+                              rescanList.add(qd);
                             }
                             else
                             {
-                              // Not getting deleted, so we must do the finish processing (i.e. conditionally requeue), so note that.
-                              finishList.add(qd);
-
-                              // See if we need to add, or update.
-                              IPipelineSpecificationWithVersions specWithVersions = new PipelineSpecificationWithVersions(pipelineSpecification,qd);
-                              boolean allowIngest = ingester.checkFetchDocument(specWithVersions,
-                                newDocVersion,
-                                newParameterVersion,
-                                aclAuthority);
-
-                              fetchList.add(new DocumentToProcess(qd,!allowIngest));
-                              if (!allowIngest)
-                                ingesterCheckList.add(documentIDHash);
+                              // Skip the document, rather than failing.
+                              // We want this particular document to be not included in the
+                              // reprocessing.  Therefore, we do the same thing as we would
+                              // if we got back a null version.
+                              deleteList.add(qd);
                             }
                           }
-
-                        }
-                        activeDocuments.clear();
-
-                        // We are done transfering activeDocuments documents to the other lists for processing.
-                        // Those lists will all need to be processed, but the processList is special because it
-                        // must be processed in the same context as the version fetch.
-
-                        // Note the documents that have been checked but not reingested.  This should happen BEFORE we need
-                        // the statistics (which are calculated during the finishlist step below)
-                        if (ingesterCheckList.size() > 0)
-                        {
-                          String[] checkClasses = new String[ingesterCheckList.size()];
-                          String[] checkIDs = new String[ingesterCheckList.size()];
-                          for (int i = 0; i < checkIDs.length; i++)
+                          else
                           {
-                            checkClasses[i] = connectionName;
-                            checkIDs[i] = ingesterCheckList.get(i);
+                            // Not a hard failure.  Requeue.
+                            requeueList.add(qd);
                           }
-                          ingester.documentCheckMultiple(pipelineSpecificationBasic,checkClasses,checkIDs,currentTime);
                         }
-
-                        // First, make the things we will need for all subsequent steps.
-                        // We need first to assemble an IPipelineSpecificationWithVersions object for each document we're going to process.
-                        // We put this in a map so it can be looked up by document identifier.
-                        Map<String,IPipelineSpecificationWithVersions> fetchPipelineSpecifications = new HashMap<String,IPipelineSpecificationWithVersions>();
-                        for (int i = 0; i < fetchList.size(); i++)
-                        {
-                          QueuedDocument qd = fetchList.get(i).getDocument();
-                          fetchPipelineSpecifications.put(qd.getDocumentDescription().getDocumentIdentifierHash(),
-                            new PipelineSpecificationWithVersions(pipelineSpecification,qd));
-                        }
+                        else
+                          finishList.add(qd);
                         
-                        ProcessActivity activity = new ProcessActivity(job.getID(),processID,
-                          threadContext,rt,jobManager,ingester,
-                          connectionName,pipelineSpecification,
-                          fetchPipelineSpecifications,
-                          currentTime,
-                          job.getExpiration(),
-                          job.getForcedMetadata(),
-                          job.getInterval(),
-                          job.getMaxInterval(),
-                          job.getHopcountMode(),
-                          connection,connector,connMgr,legalLinkTypes,ingestLogger,abortSet,
-                          newParameterVersion);
-                        try
+                        // Note whether the document was untouched; if so, update it
+                        if (!activity.wasDocumentTouched(qd.getDocumentDescription().getDocumentIdentifier()))
                         {
+                          ingesterCheckList.add(qd.getDocumentDescription().getDocumentIdentifierHash());
+                        }
+                      }
+                      
 
-                          // Finishlist and Fetchlist are parallel.  Fetchlist contains what we need to process.
-                          if (fetchList.size() > 0)
+                      if (serviceInterruption != null)
+                      {
+                        // Requeue the documents we've identified as needing to be repeated
+                        requeueDocuments(jobManager,requeueList,serviceInterruption.getRetryTime(),serviceInterruption.getFailTime(),
+                          serviceInterruption.getFailRetryCount());
+                      }
+                      
+                      // Note the documents that have been checked but not reingested.  This should happen BEFORE we need
+                      // the statistics (which are calculated during the finishlist step below)
+                      if (ingesterCheckList.size() > 0)
+                      {
+                        String[] checkClasses = new String[ingesterCheckList.size()];
+                        String[] checkIDs = new String[ingesterCheckList.size()];
+                        for (int i = 0; i < checkIDs.length; i++)
+                        {
+                          checkClasses[i] = connectionName;
+                          checkIDs[i] = ingesterCheckList.get(i);
+                        }
+                        // This method should exercise reasonable intelligence.  If the document has never been indexed, it should detect that
+                        // and stop.  Otherwise, it should update the statistics accordingly.
+                        ingester.documentCheckMultiple(pipelineSpecificationBasic,checkClasses,checkIDs,currentTime);
+                      }
+
+                      // Process the finish list!
+                      if (finishList.size() > 0)
+                      {
+                        // "Finish" the documents (removing unneeded carrydown info, and compute hopcounts).
+                        // This can ONLY be done on fully-completed documents; everything else should be left in a dangling
+                        // state (which we know is OK because it will be fixed the next time the document is attempted).
+                        String[] documentIDHashes = new String[finishList.size()];
+                        k = 0;
+                        for (QueuedDocument qd : finishList)
+                        {
+                          documentIDHashes[k++] = qd.getDocumentDescription().getDocumentIdentifierHash();
+                        }
+                        DocumentDescription[] requeueCandidates = jobManager.finishDocuments(job.getID(),legalLinkTypes,documentIDHashes,job.getHopcountMode());
+                        ManifoldCF.requeueDocumentsDueToCarrydown(jobManager,requeueCandidates,connector,connection,rt,currentTime);
+                        
+                        // In both job types, we have to go through the finishList to figure out what to do with the documents.
+                        // In the case of a document that was aborted, we must requeue it for immediate reprocessing in BOTH job types.
+                        switch (job.getType())
+                        {
+                        case IJobDescription.TYPE_CONTINUOUS:
                           {
-                            // Build a list of id's and flags
-                            String[] processIDs = new String[fetchList.size()];
-                            String[] processIDHashes = new String[fetchList.size()];
-                            String[] versions = new String[fetchList.size()];
-                            boolean[] scanOnly = new boolean[fetchList.size()];
-
-                            for (int i = 0; i < fetchList.size(); i++)
+                            // We need to populate timeArray
+                            String[] timeIDClasses = new String[finishList.size()];
+                            String[] timeIDHashes = new String[finishList.size()];
+                            for (int i = 0; i < timeIDHashes.length; i++)
                             {
-                              DocumentToProcess dToP = fetchList.get(i);
-                              DocumentDescription dd = dToP.getDocument().getDocumentDescription();
-                              processIDs[i] = dd.getDocumentIdentifier();
-                              processIDHashes[i] = dd.getDocumentIdentifierHash();
-                              versions[i] = versionMap.get(dd.getDocumentIdentifierHash());
-                              scanOnly[i] = dToP.getScanOnly();
+                              QueuedDocument qd = (QueuedDocument)finishList.get(i);
+                              DocumentDescription dd = qd.getDocumentDescription();
+                              String documentIDHash = dd.getDocumentIdentifierHash();
+                              timeIDClasses[i] = connectionName;
+                              timeIDHashes[i] = documentIDHash;
                             }
-
-                            if (Thread.currentThread().isInterrupted())
-                              throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);
-
-                            if (Logging.threads.isDebugEnabled())
-                              Logging.threads.debug("Worker thread about to process "+Integer.toString(processIDs.length)+" documents");
-
-                            // Now, process in bulk
-                            try
-                            {
-
-                              connector.processDocuments(processIDs,versions,activity,job.getSpecification(),scanOnly,jobType);
-
-                              // Flush remaining references into the database!
-                              activity.flush();
-
-                              // "Finish" the documents (removing unneeded carrydown info, etc.)
-                              DocumentDescription[] requeueCandidates = jobManager.finishDocuments(job.getID(),legalLinkTypes,processIDHashes,job.getHopcountMode());
-
-                              ManifoldCF.requeueDocumentsDueToCarrydown(jobManager,
-                                requeueCandidates,connector,connection,rt,currentTime);
-
-                              if (Logging.threads.isDebugEnabled())
-                                Logging.threads.debug("Worker thread done processing "+Integer.toString(processIDs.length)+" documents");
-
-                            }
-                            catch (ServiceInterruption e)
-                            {
-                              // This service interruption could have resulted
-                              // after some or all of the documents ingested.
-                              // They will therefore need to go into the PENDINGPURGATORY
-                              // state.
-
-                              if (!e.jobInactiveAbort())
-                                Logging.jobs.warn("Service interruption reported for job "+
-                                  job.getID()+" connection '"+job.getConnectionName()+"': "+
-                                  e.getMessage());
-
-                              if (!e.jobInactiveAbort() && e.isAbortOnFail())
-                                abortOnFail = new ManifoldCFException("Repeated service interruptions - failure processing document"+((e.getCause()!=null)?": "+e.getCause().getMessage():""),e.getCause());
-
-                              // Mark the current documents to be recrawled in the
-                              // time specified, except for the ones beyond their limits.
-                              // Those will either be deleted, or an exception will be thrown that
-                              // will abort the current job.
-
-                              deleteList.clear();
-                              ArrayList requeueList = new ArrayList();
-
-                              Set<String> fetchDocuments = new HashSet<String>();
-                              for (int i = 0; i < fetchList.size(); i++)
-                              {
-                                fetchDocuments.add(fetchList.get(i).getDocument().getDocumentDescription().getDocumentIdentifierHash());
-                              }
-                              List<QueuedDocument> newFinishList = new ArrayList<QueuedDocument>();
-                              for (int i = 0; i < finishList.size(); i++)
-                              {
-                                QueuedDocument qd = finishList.get(i);
-                                if (fetchDocuments.contains(qd.getDocumentDescription().getDocumentIdentifierHash()))
-                                {
-                                  DocumentDescription dd = qd.getDocumentDescription();
-                                  // Check for hard failure.  But no hard failure possible of it's a job inactive abort.
-                                  if (!e.jobInactiveAbort() && (dd.getFailTime() != -1L && dd.getFailTime() < e.getRetryTime() ||
-                                    dd.getFailRetryCount() == 0))
-                                  {
-                                    // Treat this as a hard failure.
-                                    if (e.isAbortOnFail())
-                                    {
-                                      rescanList.add(qd);
-                                    }
-                                    else
-                                    {
-                                      // We want this particular document to be not included in the
-                                      // reprocessing.  Therefore, we do the same thing as we would
-                                      // if we got back a null version.
-                                      deleteList.add(qd);
-                                    }
-                                  }
-                                  else
-                                  {
-                                    requeueList.add(qd);
-                                  }
-                                }
-                                else
-                                  newFinishList.add(qd);
-                              }
-
-                              // Requeue the documents we've identified
-                              requeueDocuments(jobManager,requeueList,e.getRetryTime(),e.getFailTime(),
-                                e.getFailRetryCount());
-
-                              // We've disposed of all the documents, so finishlist is now clear
-                              finishList = newFinishList;
-                            }
-                          } // End of fetching
-
-                          if (finishList.size() > 0)
-                          {
-                            // In both job types, we have to go through the finishList to figure out what to do with the documents.
-                            // In the case of a document that was aborted, we must requeue it for immediate reprocessing in BOTH job types.
-                            switch (job.getType())
-                            {
-                            case IJobDescription.TYPE_CONTINUOUS:
-                              {
-                                // We need to populate timeArray
-                                String[] timeIDClasses = new String[finishList.size()];
-                                String[] timeIDHashes = new String[finishList.size()];
-                                for (int i = 0; i < timeIDHashes.length; i++)
-                                {
-                                  QueuedDocument qd = (QueuedDocument)finishList.get(i);
-                                  DocumentDescription dd = qd.getDocumentDescription();
-                                  String documentIDHash = dd.getDocumentIdentifierHash();
-                                  timeIDClasses[i] = connectionName;
-                                  timeIDHashes[i] = documentIDHash;
-                                }
-                                long[] timeArray = ingester.getDocumentUpdateIntervalMultiple(pipelineSpecificationBasic,timeIDClasses,timeIDHashes);
-                                Long[] recheckTimeArray = new Long[timeArray.length];
-                                int[] actionArray = new int[timeArray.length];
-                                DocumentDescription[] recrawlDocs = new DocumentDescription[finishList.size()];
-                                for (int i = 0; i < finishList.size(); i++)
-                                {
-                                  QueuedDocument qd = finishList.get(i);
-                                  recrawlDocs[i] = qd.getDocumentDescription();
-                                  String documentID = recrawlDocs[i].getDocumentIdentifier();
-
-                                  // If aborted due to sequencing issue, then requeue for reprocessing immediately, ignoring everything else.
-                                  boolean wasAborted = abortSet.contains(documentID);
-                                  if (wasAborted)
-                                  {
-                                    // Requeue for immediate reprocessing
-                                    if (Logging.scheduling.isDebugEnabled())
-                                      Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED as soon as prerequisites are met");
-
-                                    actionArray[i] = IJobManager.ACTION_RESCAN;
-                                    recheckTimeArray[i] = new Long(0L);     // Must not use null; that means 'never'.
-                                  }
-                                  else
-                                  {
-                                    // Calculate the next time to run, or time to expire.
-
-                                    // For run time, the formula is to calculate the running avg interval between changes,
-                                    // add an additional interval (which comes from the job description),
-                                    // and add that to the current time.
-                                    // One caveat: we really want to calculate the interval from the last
-                                    // time change was detected, but this is not implemented yet.
-                                    long timeAmt = timeArray[i];
-                                    // null value indicates never to schedule
-
-                                    Long recrawlTime = activity.calculateDocumentRescheduleTime(currentTime,timeAmt,documentID);
-                                    Long expireTime = activity.calculateDocumentExpireTime(currentTime,documentID);
-
-
-                                    // Merge the two times together.  We decide on the action based on the action with the lowest time.
-                                    if (expireTime == null || (recrawlTime != null && recrawlTime.longValue() < expireTime.longValue()))
-                                    {
-                                      if (Logging.scheduling.isDebugEnabled())
-                                        Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED at "+recrawlTime.toString());
-                                      recheckTimeArray[i] = recrawlTime;
-                                      actionArray[i] = IJobManager.ACTION_RESCAN;
-                                    }
-                                    else if (recrawlTime == null || (expireTime != null && recrawlTime.longValue() > expireTime.longValue()))
-                                    {
-                                      if (Logging.scheduling.isDebugEnabled())
-                                        Logging.scheduling.debug("Document '"+documentID+"' will be REMOVED at "+expireTime.toString());
-                                      recheckTimeArray[i] = expireTime;
-                                      actionArray[i] = IJobManager.ACTION_REMOVE;
-                                    }
-                                    else
-                                    {
-                                      // Default activity if conflict will be rescan
-                                      if (Logging.scheduling.isDebugEnabled() && recrawlTime != null)
-                                        Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED at "+recrawlTime.toString());
-                                      recheckTimeArray[i] = recrawlTime;
-                                      actionArray[i] = IJobManager.ACTION_RESCAN;
-                                    }
-                                  }
-                                }
-
-                                jobManager.requeueDocumentMultiple(recrawlDocs,recheckTimeArray,actionArray);
-
-                              }
-                              break;
-                            case IJobDescription.TYPE_SPECIFIED:
-                              {
-                                // Separate the ones we actually finished from the ones we need to requeue because they were aborted
-                                List<DocumentDescription> completedList = new ArrayList<DocumentDescription>();
-                                List<DocumentDescription> abortedList = new ArrayList<DocumentDescription>();
-                                for (int i = 0; i < finishList.size(); i++)
-                                {
-                                  QueuedDocument qd = finishList.get(i);
-                                  DocumentDescription dd = qd.getDocumentDescription();
-                                  if (abortSet.contains(dd.getDocumentIdentifier()))
-                                  {
-                                    // The document was aborted, so put it into the abortedList
-                                    abortedList.add(dd);
-                                  }
-                                  else
-                                  {
-                                    // The document was completed.
-                                    completedList.add(dd);
-                                  }
-                                }
-
-                                // Requeue the ones that must be repeated
-                                if (abortedList.size() > 0)
-                                {
-                                  DocumentDescription[] docDescriptions = new DocumentDescription[abortedList.size()];
-                                  Long[] recheckTimeArray = new Long[docDescriptions.length];
-                                  int[] actionArray = new int[docDescriptions.length];
-                                  for (int i = 0; i < docDescriptions.length; i++)
-                                  {
-                                    docDescriptions[i] = abortedList.get(i);
-                                    recheckTimeArray[i] = new Long(0L);
-                                    actionArray[i] = IJobManager.ACTION_RESCAN;
-                                  }
-
-                                  jobManager.requeueDocumentMultiple(docDescriptions,recheckTimeArray,actionArray);
-                                }
-
-                                // Mark the ones completed that were actually completed.
-                                if (completedList.size() > 0)
-                                {
-                                  DocumentDescription[] docDescriptions = new DocumentDescription[completedList.size()];
-                                  for (int i = 0; i < docDescriptions.length; i++)
-                                  {
-                                    docDescriptions[i] = (DocumentDescription)completedList.get(i);
-                                  }
-
-                                  jobManager.markDocumentCompletedMultiple(docDescriptions);
-                                }
-                              }
-                              break;
-                            default:
-                              throw new ManifoldCFException("Unexpected value for job type: '"+Integer.toString(job.getType())+"'");
-                            }
-
-                            // Finally, if we're still alive, mark everything as "processed".
+                            long[] timeArray = ingester.getDocumentUpdateIntervalMultiple(pipelineSpecificationBasic,timeIDClasses,timeIDHashes);
+                            Long[] recheckTimeArray = new Long[timeArray.length];
+                            int[] actionArray = new int[timeArray.length];
+                            DocumentDescription[] recrawlDocs = new DocumentDescription[finishList.size()];
                             for (int i = 0; i < finishList.size(); i++)
                             {
                               QueuedDocument qd = finishList.get(i);
-                              qd.setProcessed();
+                              recrawlDocs[i] = qd.getDocumentDescription();
+                              String documentID = recrawlDocs[i].getDocumentIdentifier();
+
+                              // If aborted due to sequencing issue, then requeue for reprocessing immediately, ignoring everything else.
+                              boolean wasAborted = activity.wasDocumentAborted(documentID);
+                              if (wasAborted)
+                              {
+                                // Requeue for immediate reprocessing
+                                if (Logging.scheduling.isDebugEnabled())
+                                  Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED as soon as prerequisites are met");
+
+                                actionArray[i] = IJobManager.ACTION_RESCAN;
+                                recheckTimeArray[i] = new Long(0L);     // Must not use null; that means 'never'.
+                              }
+                              else
+                              {
+                                // Calculate the next time to run, or time to expire.
+
+                                // For run time, the formula is to calculate the running avg interval between changes,
+                                // add an additional interval (which comes from the job description),
+                                // and add that to the current time.
+                                // One caveat: we really want to calculate the interval from the last
+                                // time change was detected, but this is not implemented yet.
+                                long timeAmt = timeArray[i];
+                                // null value indicates never to schedule
+
+                                Long recrawlTime = activity.calculateDocumentRescheduleTime(currentTime,timeAmt,documentID);
+                                Long expireTime = activity.calculateDocumentExpireTime(currentTime,documentID);
+
+
+                                // Merge the two times together.  We decide on the action based on the action with the lowest time.
+                                if (expireTime == null || (recrawlTime != null && recrawlTime.longValue() < expireTime.longValue()))
+                                {
+                                  if (Logging.scheduling.isDebugEnabled())
+                                    Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED at "+recrawlTime.toString());
+                                  recheckTimeArray[i] = recrawlTime;
+                                  actionArray[i] = IJobManager.ACTION_RESCAN;
+                                }
+                                else if (recrawlTime == null || (expireTime != null && recrawlTime.longValue() > expireTime.longValue()))
+                                {
+                                  if (Logging.scheduling.isDebugEnabled())
+                                    Logging.scheduling.debug("Document '"+documentID+"' will be REMOVED at "+expireTime.toString());
+                                  recheckTimeArray[i] = expireTime;
+                                  actionArray[i] = IJobManager.ACTION_REMOVE;
+                                }
+                                else
+                                {
+                                  // Default activity if conflict will be rescan
+                                  if (Logging.scheduling.isDebugEnabled() && recrawlTime != null)
+                                    Logging.scheduling.debug("Document '"+documentID+"' will be RESCANNED at "+recrawlTime.toString());
+                                  recheckTimeArray[i] = recrawlTime;
+                                  actionArray[i] = IJobManager.ACTION_RESCAN;
+                                }
+                              }
                             }
 
-                          }
-                        
-                        }
-                        finally
-                        {
-                          // Make sure we don't leave any dangling carrydown files
-                          activity.discard();
-                        }
-                          
-                        // Successful processing of the set
-                        // We count 'get version' time in the average, so even if we decide not to process a doc
-                        // it still counts.
-                        queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,System.currentTimeMillis() - processingStartTime);
+                            jobManager.requeueDocumentMultiple(recrawlDocs,recheckTimeArray,actionArray);
 
+                          }
+                          break;
+                        case IJobDescription.TYPE_SPECIFIED:
+                          {
+                            // Separate the ones we actually finished from the ones we need to requeue because they were aborted
+                            List<DocumentDescription> completedList = new ArrayList<DocumentDescription>();
+                            List<DocumentDescription> abortedList = new ArrayList<DocumentDescription>();
+                            for (QueuedDocument qd : finishList)
+                            {
+                              DocumentDescription dd = qd.getDocumentDescription();
+                              if (activity.wasDocumentAborted(dd.getDocumentIdentifier()))
+                              {
+                                // The document was aborted, so put it into the abortedList
+                                abortedList.add(dd);
+                              }
+                              else
+                              {
+                                // The document was completed.
+                                completedList.add(dd);
+                              }
+                            }
+
+                            // Requeue the ones that must be repeated
+                            if (abortedList.size() > 0)
+                            {
+                              DocumentDescription[] docDescriptions = new DocumentDescription[abortedList.size()];
+                              Long[] recheckTimeArray = new Long[docDescriptions.length];
+                              int[] actionArray = new int[docDescriptions.length];
+                              for (int i = 0; i < docDescriptions.length; i++)
+                              {
+                                docDescriptions[i] = abortedList.get(i);
+                                recheckTimeArray[i] = new Long(0L);
+                                actionArray[i] = IJobManager.ACTION_RESCAN;
+                              }
+
+                              jobManager.requeueDocumentMultiple(docDescriptions,recheckTimeArray,actionArray);
+                            }
+
+                            // Mark the ones completed that were actually completed.
+                            if (completedList.size() > 0)
+                            {
+                              DocumentDescription[] docDescriptions = new DocumentDescription[completedList.size()];
+                              for (int i = 0; i < docDescriptions.length; i++)
+                              {
+                                docDescriptions[i] = completedList.get(i);
+                              }
+
+                              jobManager.markDocumentCompletedMultiple(docDescriptions);
+                            }
+                          }
+                          break;
+                        default:
+                          throw new ManifoldCFException("Unexpected value for job type: '"+Integer.toString(job.getType())+"'");
+                        }
+
+                        // Finally, if we're still alive, mark everything we finished as "processed".
+                        for (QueuedDocument qd : finishList)
+                        {
+                          qd.setProcessed();
+                        }
                       }
-                      finally
-                      {
-                        // Release any document temporary storage held by the connector
-                        connector.releaseDocumentVersions(currentDocIDArray,newVersionStringArray);
-                      }
-                    
                     }
+                    finally
+                    {
+                      // Make sure we don't leave any dangling carrydown files
+                      activity.discard();
+                    }
+                    
+                    // Successful processing of the set
+                    // We count 'get version' time in the average, so even if we decide not to process a doc
+                    // it still counts.
+                    queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,System.currentTimeMillis() - processingStartTime);
+
                   }
                   
                   // Now, handle the delete list
@@ -806,9 +706,8 @@
               }
               
               // Handle rescanning
-              for (int i = 0; i < rescanList.size(); i++)
+              for (QueuedDocument qd : rescanList)
               {
-                QueuedDocument qd = rescanList.get(i);
                 jobManager.resetDocument(qd.getDocumentDescription(),0L,IJobManager.ACTION_RESCAN,-1L,-1);
                 qd.setProcessed();
               }
@@ -845,10 +744,9 @@
           {
             // Go through qds and requeue any that aren't closed out in one way or another.  This allows the job
             // to be aborted; no dangling entries are left around.
-            int i = 0;
-            while (i < qds.getCount())
+            for (int i = 0; i < qds.getCount(); i++)
             {
-              QueuedDocument qd = qds.getDocument(i++);
+              QueuedDocument qd = qds.getDocument(i);
               if (!qd.wasProcessed())
               {
                 jobManager.resetDocument(qd.getDocumentDescription(),0L,IJobManager.ACTION_RESCAN,-1L,-1);
@@ -1114,9 +1012,8 @@
         requeueCandidates,connector,connection,rt,currentTime);
 
       // Mark all these as done
-      for (int i = 0; i < jobmanagerRemovalList.size(); i++)
+      for (QueuedDocument qd : jobmanagerRemovalList)
       {
-        QueuedDocument qd = jobmanagerRemovalList.get(i);
         qd.setProcessed();
       }
     }
@@ -1136,34 +1033,22 @@
     {
       DocumentDescription[] requeueDocs = new DocumentDescription[requeueList.size()];
 
-      int i = 0;
-      while (i < requeueDocs.length)
+      for (int i = 0; i < requeueDocs.length; i++)
       {
         QueuedDocument qd = requeueList.get(i);
         DocumentDescription dd = qd.getDocumentDescription();
         requeueDocs[i] = dd;
-        i++;
       }
 
       jobManager.resetDocumentMultiple(requeueDocs,retryTime,IJobManager.ACTION_RESCAN,failTime,failCount);
 
-      i = 0;
-      while (i < requeueList.size())
+      for (QueuedDocument qd : requeueList)
       {
-        QueuedDocument qd = requeueList.get(i++);
         qd.setProcessed();
       }
     }
   }
 
-  protected static String packTransformations(String[] transformationNames, String[] transformationDescriptionStrings)
-  {
-    StringBuilder sb = new StringBuilder();
-    packList(sb,transformationNames,'+');
-    packList(sb,transformationDescriptionStrings,'!');
-    return sb.toString();
-  }
-  
   /** Another stuffer for packing lists of variable length */
   protected static void packList(StringBuilder output, String[] values, char delimiter)
   {
@@ -1223,231 +1108,6 @@
 
   // Nested classes
 
-  /** Version activity class wraps access to activity history.
-  */
-  protected static class VersionActivity implements IVersionActivity
-  {
-    protected final Long jobID;
-    protected final String processID;
-    protected final String connectionName;
-    protected final IPipelineSpecification pipelineSpecification;
-    protected final IRepositoryConnectionManager connMgr;
-    protected final IJobManager jobManager;
-    protected final IIncrementalIngester ingester;
-    protected final Set<String> abortSet;
-    protected final CheckActivity checkActivity;
-    /** Constructor.
-    */
-    public VersionActivity(Long jobID, String processID,
-      String connectionName, IPipelineSpecification pipelineSpecification,
-      IRepositoryConnectionManager connMgr,
-      IJobManager jobManager, IIncrementalIngester ingester, Set<String> abortSet,
-      CheckActivity checkActivity)
-    {
-      this.jobID = jobID;
-      this.processID = processID;
-      this.connectionName = connectionName;
-      this.pipelineSpecification = pipelineSpecification;
-      this.connMgr = connMgr;
-      this.jobManager = jobManager;
-      this.ingester = ingester;
-      this.abortSet = abortSet;
-      this.checkActivity = checkActivity;
-    }
-
-    /** Check whether a mime type is indexable by the currently specified output connector.
-    *@param mimeType is the mime type to check, not including any character set specification.
-    *@return true if the mime type is indexable.
-    */
-    @Override
-    public boolean checkMimeTypeIndexable(String mimeType)
-      throws ManifoldCFException, ServiceInterruption
-    {
-      return ingester.checkMimeTypeIndexable(
-        pipelineSpecification,
-        mimeType,
-        checkActivity);
-    }
-
-    /** Check whether a document is indexable by the currently specified output connector.
-    *@param localFile is the local copy of the file to check.
-    *@return true if the document is indexable.
-    */
-    @Override
-    public boolean checkDocumentIndexable(File localFile)
-      throws ManifoldCFException, ServiceInterruption
-    {
-      return ingester.checkDocumentIndexable(
-        pipelineSpecification,
-        localFile,
-        checkActivity);
-    }
-
-    /** Check whether a document of a specified length is indexable by the currently specified output connector.
-    *@param length is the length to check.
-    *@return true if the document is indexable.
-    */
-    @Override
-    public boolean checkLengthIndexable(long length)
-      throws ManifoldCFException, ServiceInterruption
-    {
-      return ingester.checkLengthIndexable(
-        pipelineSpecification,
-        length,
-        checkActivity);
-    }
-
-    /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
-    * to help filter out documents that are not worth indexing.
-    *@param url is the URL of the document.
-    *@return true if the file is indexable.
-    */
-    @Override
-    public boolean checkURLIndexable(String url)
-      throws ManifoldCFException, ServiceInterruption
-    {
-      return ingester.checkURLIndexable(
-        pipelineSpecification,
-        url,
-        checkActivity);
-    }
-
-    /** Record time-stamped information about the activity of the connector.
-    *@param startTime is either null or the time since the start of epoch in milliseconds (Jan 1, 1970).  Every
-    *       activity has an associated time; the startTime field records when the activity began.  A null value
-    *       indicates that the start time and the finishing time are the same.
-    *@param activityType is a string which is fully interpretable only in the context of the connector involved, which is
-    *       used to categorize what kind of activity is being recorded.  For example, a web connector might record a
-    *       "fetch document" activity.  Cannot be null.
-    *@param dataSize is the number of bytes of data involved in the activity, or null if not applicable.
-    *@param entityIdentifier is a (possibly long) string which identifies the object involved in the history record.
-    *       The interpretation of this field will differ from connector to connector.  May be null.
-    *@param resultCode contains a terse description of the result of the activity.  The description is limited in
-    *       size to 255 characters, and can be interpreted only in the context of the current connector.  May be null.
-    *@param resultDescription is a (possibly long) human-readable string which adds detail, if required, to the result
-    *       described in the resultCode field.  This field is not meant to be queried on.  May be null.
-    *@param childIdentifiers is a set of child entity identifiers associated with this activity.  May be null.
-    */
-    @Override
-    public void recordActivity(Long startTime, String activityType, Long dataSize,
-      String entityIdentifier, String resultCode, String resultDescription, String[] childIdentifiers)
-      throws ManifoldCFException
-    {
-      connMgr.recordHistory(connectionName,startTime,activityType,dataSize,entityIdentifier,resultCode,
-        resultDescription,childIdentifiers);
-    }
-
-    /** Retrieve data passed from parents to a specified child document.
-    *@param localIdentifier is the document identifier of the document we want the recorded data for.
-    *@param dataName is the name of the data items to retrieve.
-    *@return an array containing the unique data values passed from ALL parents.  Note that these are in no particular order, and there will not be any duplicates.
-    */
-    @Override
-    public String[] retrieveParentData(String localIdentifier, String dataName)
-      throws ManifoldCFException
-    {
-      return jobManager.retrieveParentData(jobID,ManifoldCF.hash(localIdentifier),dataName);
-    }
-
-    /** Retrieve data passed from parents to a specified child document.
-    *@param localIdentifier is the document identifier of the document we want the recorded data for.
-    *@param dataName is the name of the data items to retrieve.
-    *@return an array containing the unique data values passed from ALL parents.  Note that these are in no particular order, and there will not be any duplicates.
-    */
-    @Override
-    public CharacterInput[] retrieveParentDataAsFiles(String localIdentifier, String dataName)
-      throws ManifoldCFException
-    {
-      return jobManager.retrieveParentDataAsFiles(jobID,ManifoldCF.hash(localIdentifier),dataName);
-    }
-
-    /** Check whether current job is still active.
-    * This method is provided to allow an individual connector that needs to wait on some long-term condition to give up waiting due to the job
-    * itself being aborted.  If the connector should abort, this method will raise a properly-formed ServiceInterruption, which if thrown to the
-    * caller, will signal that the current versioning activity remains incomplete and must be retried when the job is resumed.
-    */
-    @Override
-    public void checkJobStillActive()
-      throws ManifoldCFException, ServiceInterruption
-    {
-      if (jobManager.checkJobActive(jobID) == false)
-        throw new ServiceInterruption("Job no longer active",System.currentTimeMillis(),true);
-    }
-
-    /** Begin an event sequence.
-    * This method should be called by a connector when a sequencing event should enter the "pending" state.  If the event is already in that state,
-    * this method will return false, otherwise true.  The connector has the responsibility of appropriately managing sequencing given the response
-    * status.
-    *@param eventName is the event name.
-    *@return false if the event is already in the "pending" state.
-    */
-    @Override
-    public boolean beginEventSequence(String eventName)
-      throws ManifoldCFException
-    {
-      return jobManager.beginEventSequence(processID,eventName);
-    }
-
-    /** Complete an event sequence.
-    * This method should be called to signal that an event is no longer in the "pending" state.  This can mean that the prerequisite processing is
-    * completed, but it can also mean that prerequisite processing was aborted or cannot be completed.
-    * Note well: This method should not be called unless the connector is CERTAIN that an event is in progress, and that the current thread has
-    * the sole right to complete it.  Otherwise, race conditions can develop which would be difficult to diagnose.
-    *@param eventName is the event name.
-    */
-    @Override
-    public void completeEventSequence(String eventName)
-      throws ManifoldCFException
-    {
-      jobManager.completeEventSequence(eventName);
-    }
-
-    /** Abort processing a document (for sequencing reasons).
-    * This method should be called in order to cause the specified document to be requeued for later processing.  While this is similar in some respects
-    * to the semantics of a ServiceInterruption, it is applicable to only one document at a time, and also does not specify any delay period, since it is
-    * presumed that the reason for the requeue is because of sequencing issues synchronized around an underlying event.
-    *@param localIdentifier is the document identifier to requeue
-    */
-    @Override
-    public void retryDocumentProcessing(String localIdentifier)
-      throws ManifoldCFException
-    {
-      // Accumulate aborts
-      abortSet.add(localIdentifier);
-    }
-
-    /** Create a global string from a simple string.
-    *@param simpleString is the simple string.
-    *@return a global string.
-    */
-    @Override
-    public String createGlobalString(String simpleString)
-    {
-      return ManifoldCF.createGlobalString(simpleString);
-    }
-
-    /** Create a connection-specific string from a simple string.
-    *@param simpleString is the simple string.
-    *@return a connection-specific string.
-    */
-    @Override
-    public String createConnectionSpecificString(String simpleString)
-    {
-      return ManifoldCF.createConnectionSpecificString(connectionName,simpleString);
-    }
-
-    /** Create a job-based string from a simple string.
-    *@param simpleString is the simple string.
-    *@return a job-specific string.
-    */
-    @Override
-    public String createJobSpecificString(String simpleString)
-    {
-      return ManifoldCF.createJobSpecificString(jobID,simpleString);
-    }
-
-  }
-
   /** Process activity class wraps access to the ingester and job queue.
   */
   protected static class ProcessActivity implements IProcessActivity
@@ -1460,7 +1120,7 @@
     protected final IIncrementalIngester ingester;
     protected final String connectionName;
     protected final IPipelineSpecification pipelineSpecification;
-    protected final Map<String,IPipelineSpecificationWithVersions> fetchPipelineSpecifications;
+    protected final Map<String,QueuedDocument> previousDocuments;
     protected final long currentTime;
     protected final Long expireInterval;
     protected final Map<String,Set<String>> forcedMetadata;
@@ -1473,7 +1133,6 @@
     protected final String[] legalLinkTypes;
     protected final OutputActivity ingestLogger;
     protected final IReprioritizationTracker rt;
-    protected final Set<String> abortSet;
     protected final String parameterVersion;
     
     // We submit references in bulk, because that's way more efficient.
@@ -1488,6 +1147,19 @@
     // Origination times
     protected final Map<String,Long> originationTimes = new HashMap<String,Long>();
 
+    // Whether the document was aborted or not
+    protected final Set<String> abortSet = new HashSet<String>();
+
+    // Whether the document was touched or not
+    protected final Set<String> touchedSet = new HashSet<String>();
+    
+    // Whether document was deleted
+    protected final Set<String> documentDeletedSet = new HashSet<String>();
+    
+    // Whether a component was touched or not, keyed by document identifier.
+    // This does not include primary document.  The set is keyed by component id hash.
+    protected final Map<String,Set<String>> touchedComponentSet = new HashMap<String,Set<String>>();
+    
     /** Constructor.
     *@param jobManager is the job manager
     *@param ingester is the ingester
@@ -1498,7 +1170,7 @@
       IIncrementalIngester ingester,
       String connectionName,
       IPipelineSpecification pipelineSpecification,
-      Map<String,IPipelineSpecificationWithVersions> fetchPipelineSpecifications,
+      Map<String,QueuedDocument> previousDocuments,
       long currentTime,
       Long expireInterval,
       Map<String,Set<String>> forcedMetadata,
@@ -1507,7 +1179,6 @@
       int hopcountMode,
       IRepositoryConnection connection, IRepositoryConnector connector,
       IRepositoryConnectionManager connMgr, String[] legalLinkTypes, OutputActivity ingestLogger,
-      Set<String> abortSet,
       String parameterVersion)
     {
       this.jobID = jobID;
@@ -1518,7 +1189,7 @@
       this.ingester = ingester;
       this.connectionName = connectionName;
       this.pipelineSpecification = pipelineSpecification;
-      this.fetchPipelineSpecifications = fetchPipelineSpecifications;
+      this.previousDocuments = previousDocuments;
       this.currentTime = currentTime;
       this.expireInterval = expireInterval;
       this.forcedMetadata = forcedMetadata;
@@ -1530,7 +1201,6 @@
       this.connMgr = connMgr;
       this.legalLinkTypes = legalLinkTypes;
       this.ingestLogger = ingestLogger;
-      this.abortSet = abortSet;
       this.parameterVersion = parameterVersion;
     }
 
@@ -1545,6 +1215,73 @@
       referenceList.clear();
     }
 
+    /** Check whether a document (and its version string) was touched or not.
+    */
+    public boolean wasDocumentTouched(String documentIdentifier)
+    {
+      return touchedSet.contains(documentIdentifier);
+    }
+
+    /** Check whether a document component was touched or not.
+    */
+    public boolean wasDocumentComponentTouched(String documentIdentifier,
+      String componentIdentifierHash)
+    {
+      Set<String> components = touchedComponentSet.get(documentIdentifier);
+      if (components == null)
+        return false;
+      return components.contains(componentIdentifierHash);
+    }
+    
+    /** Check whether document was deleted or not.
+    */
+    public boolean wasDocumentDeleted(String documentIdentifier)
+    {
+      return documentDeletedSet.contains(documentIdentifier);
+    }
+    
+    /** Check whether a document was aborted or not.
+    */
+    public boolean wasDocumentAborted(String documentIdentifier)
+    {
+      return abortSet.contains(documentIdentifier);
+    }
+    
+    /** Check if a document needs to be reindexed, based on a computed version string.
+    * Call this method to determine whether reindexing is necessary.  Pass in a newly-computed version
+    * string.  This method will return "true" if the document needs to be re-indexed.
+    *@param documentIdentifier is the document identifier.
+    *@param newVersionString is the newly-computed version string.
+    *@return true if the document needs to be reindexed.
+    */
+    @Override
+    public boolean checkDocumentNeedsReindexing(String documentIdentifier,
+      String newVersionString)
+      throws ManifoldCFException
+    {
+      return checkDocumentNeedsReindexing(documentIdentifier,null,newVersionString);
+    }
+
+    /** Check if a document needs to be reindexed, based on a computed version string.
+    * Call this method to determine whether reindexing is necessary.  Pass in a newly-computed version
+    * string.  This method will return "true" if the document needs to be re-indexed.
+    *@param documentIdentifier is the document identifier.
+    *@param componentIdentifier is the component document identifier, if any.
+    *@param newVersionString is the newly-computed version string.
+    *@return true if the document needs to be reindexed.
+    */
+    @Override
+    public boolean checkDocumentNeedsReindexing(String documentIdentifier,
+      String componentIdentifier,
+      String newVersionString)
+      throws ManifoldCFException
+    {
+      String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
+      String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+      IPipelineSpecificationWithVersions spec = computePipelineSpecification(documentIdentifierHash,componentIdentifierHash);
+      return ingester.checkFetchDocument(spec,newVersionString,parameterVersion,connection.getACLAuthority());
+    }
+
     /** Add a document description to the current job's queue.
     *@param localIdentifier is the local document identifier to add (for the connector that
     * fetched the document).
@@ -1738,19 +1475,38 @@
     }
 
     /** Record a document version, but don't ingest it.
-    * ServiceInterruption is thrown if this action must be rescheduled.
     *@param documentIdentifier is the document identifier.
     *@param version is the document version.
     */
     @Override
     public void recordDocument(String documentIdentifier, String version)
-      throws ManifoldCFException, ServiceInterruption
+      throws ManifoldCFException
+    {
+      recordDocument(documentIdentifier,null,version);
+    }
+
+    /** Record a document version, WITHOUT reindexing it, or removing it.  (Other
+    * documents with the same URL, however, will still be removed.)  This is
+    * useful if the version string changes but the document contents are known not
+    * to have changed.
+    *@param documentIdentifier is the document identifier.
+    *@param componentIdentifier is the component document identifier, if any.
+    *@param version is the document version.
+    */
+    @Override
+    public void recordDocument(String documentIdentifier,
+      String componentIdentifier,
+      String version)
+      throws ManifoldCFException
     {
       String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
+      String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
       ingester.documentRecord(
         pipelineSpecification.getBasicPipelineSpecification(),
-        connectionName,documentIdentifierHash,
-        version,currentTime,ingestLogger);
+        connectionName,documentIdentifierHash,componentIdentifierHash,
+        version,currentTime);
+      touchedSet.add(documentIdentifier);
+      touchComponentSet(documentIdentifier,componentIdentifierHash);
     }
 
     /** Ingest the current document.
@@ -1792,11 +1548,31 @@
     public void ingestDocumentWithException(String documentIdentifier, String version, String documentURI, RepositoryDocument data)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
+      ingestDocumentWithException(documentIdentifier,null,version,documentURI,data);
+    }
+
+    /** Ingest the current document.
+    *@param documentIdentifier is the document's identifier.
+    *@param componentIdentifier is the component document identifier, if any.
+    *@param version is the version of the document, as reported by the getDocumentVersions() method of the
+    *       corresponding repository connector.
+    *@param documentURI is the URI to use to retrieve this document from the search interface (and is
+    *       also the unique key in the index).
+    *@param data is the document data.  The data is closed after ingestion is complete.
+    *@throws IOException only when data stream reading fails.
+    */
+    @Override
+    public void ingestDocumentWithException(String documentIdentifier,
+      String componentIdentifier,
+      String version, String documentURI, RepositoryDocument data)
+      throws ManifoldCFException, ServiceInterruption, IOException
+    {
       // We should not get called here if versions agree, unless the repository
       // connector cannot distinguish between versions - in which case it must
       // always ingest (essentially)
 
       String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
+      String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
 
       if (data != null)
       {
@@ -1818,55 +1594,120 @@
         
       // First, we need to add into the metadata the stuff from the job description.
       ingester.documentIngest(
-        fetchPipelineSpecifications.get(documentIdentifierHash),
-        connectionName,documentIdentifierHash,
+        computePipelineSpecification(documentIdentifierHash,componentIdentifierHash),
+        connectionName,documentIdentifierHash,componentIdentifierHash,
         version,parameterVersion,
         connection.getACLAuthority(),
         data,currentTime,
         documentURI,
         ingestLogger);
+      
+      touchedSet.add(documentIdentifier);
+      touchComponentSet(documentIdentifier,componentIdentifierHash);
     }
 
-    /** Delete the current document from the search engine index, while keeping track of the version information
+    /** Remove the specified document from the search engine index, while keeping track of the version information
     * for it (to reduce churn).
     *@param documentIdentifier is the document's local identifier.
-    *@param version is the version of the document, as reported by the getDocumentVersions() method of the
-    *       corresponding repository connector.
+    *@param version is the version string to be recorded for the document.
     */
     @Override
-    public void deleteDocument(String documentIdentifier, String version)
+    public void noDocument(String documentIdentifier, String version)
       throws ManifoldCFException, ServiceInterruption
     {
-      if (version.length() == 0)
-        deleteDocument(documentIdentifier);
-      else
-      {
-        try
-        {
-          ingestDocumentWithException(documentIdentifier,version,null,null);
-        }
-        catch (IOException e)
-        {
-          // Should never occur, since we passed in no data
-          throw new IllegalStateException("IngestDocumentWithException threw an illegal IOException: "+e.getMessage(),e);
-        }
-      }
+      noDocument(documentIdentifier,null,version);
     }
 
-    /** Delete the current document from the search engine index.  This method does NOT keep track of version
-    * information for the document and thus can lead to "churn", whereby the same document is queued, versioned,
-    * and removed on subsequent crawls.  It therefore should be considered to be deprecated, in favor of
-    * deleteDocument(String localIdentifier, String version).
+    /** Remove the specified document from the search engine index, and update the
+    * recorded version information for the document.
     *@param documentIdentifier is the document's local identifier.
+    *@param componentIdentifier is the component document identifier, if any.
+    *@param version is the version string to be recorded for the document.
     */
     @Override
-    public void deleteDocument(String documentIdentifier)
+    public void noDocument(String documentIdentifier,
+      String componentIdentifier,
+      String version)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      // Special interpretation for empty version string; treat as if the document doesn't exist
+      // (by ignoring it and allowing it to be deleted later)
+      String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
+      String componentIdentifierHash = computeComponentIDHash(componentIdentifier);
+
+      ingester.documentNoData(
+        computePipelineSpecification(documentIdentifierHash,componentIdentifierHash),
+        connectionName,documentIdentifierHash,componentIdentifierHash,
+        version,parameterVersion,
+        connection.getACLAuthority(),
+        currentTime,
+        ingestLogger);
+      
+      touchedSet.add(documentIdentifier);
+      touchComponentSet(documentIdentifier,componentIdentifierHash);
+    }
+
+    /** Remove the specified document primary component permanently from the search engine index,
+    * and from the status table.  Use this method when your document has components and
+    * now also has a primary document, but will not have a primary document again for the foreseeable
+    * future.  This is a rare situation.
+    *@param documentIdentifier is the document's identifier.
+    */
+    @Override
+    public void removeDocument(String documentIdentifier)
       throws ManifoldCFException, ServiceInterruption
     {
       String documentIdentifierHash = ManifoldCF.hash(documentIdentifier);
-      ingester.documentDelete(pipelineSpecification.getBasicPipelineSpecification(),
-        connectionName,documentIdentifierHash,
+      ingester.documentRemove(
+        pipelineSpecification.getBasicPipelineSpecification(),
+        connectionName,documentIdentifierHash,null,
         ingestLogger);
+        
+      // Note that we touched it, so it won't get checked
+      touchedSet.add(documentIdentifier);
+    }
+
+    /** Retain existing document component.  Use this method to signal that an already-existing
+    * document component does not need to be reindexed.  The default behavior is to remove
+    * components that are not mentioned during processing.
+    *@param documentIdentifier is the document's identifier.
+    *@param componentIdentifier is the component document identifier, which cannot be null.
+    */
+    @Override
+    public void retainDocument(String documentIdentifier,
+      String componentIdentifier)
+      throws ManifoldCFException
+    {
+      touchComponentSet(documentIdentifier,computeComponentIDHash(componentIdentifier));
+    }
+
+    
+    /** Delete the current document from the search engine index, while keeping track of the version information
+    * for it (to reduce churn).
+    * Use noDocument() above instead.
+    *@param documentIdentifier is the document's local identifier.
+    *@param version is the version string to be recorded for the document.
+    */
+    @Override
+    @Deprecated
+    public void deleteDocument(String documentIdentifier, String version)
+      throws ManifoldCFException, ServiceInterruption
+    {
+      noDocument(documentIdentifier,version);
+    }
+
+    /** Delete the specified document from the search engine index, and from the status table.  This
+    *  method does NOT keep track of version
+    * information for the document and thus can lead to "churn", whereby the same document is queued, processed,
+    * and removed on subsequent crawls.  It is therefore preferable to use noDocument() instead,
+    * in any case where the same decision will need to be made over and over.
+    *@param documentIdentifier is the document's identifier.
+    */
+    @Override
+    public void deleteDocument(String documentIdentifier)
+      throws ManifoldCFException
+    {
+      documentDeletedSet.add(documentIdentifier);
     }
 
     /** Override the schedule for the next time a document is crawled.
@@ -2255,8 +2096,36 @@
       return ManifoldCF.createJobSpecificString(jobID,simpleString);
     }
 
+    protected void touchComponentSet(String documentIdentifier, String componentIdentifierHash)
+    {
+      if (componentIdentifierHash == null)
+        return;
+      Set<String> components = touchedComponentSet.get(documentIdentifier);
+      if (components == null)
+      {
+        components = new HashSet<String>();
+        touchedComponentSet.put(documentIdentifier,components);
+      }
+      components.add(componentIdentifierHash);
+    }
+    
+    protected IPipelineSpecificationWithVersions computePipelineSpecification(String documentIdentifierHash,
+      String componentIdentifierHash)
+    {
+      return new PipelineSpecificationWithVersions(pipelineSpecification,previousDocuments.get(documentIdentifierHash),componentIdentifierHash);
+    }
+
   }
 
+  protected static String computeComponentIDHash(String componentIdentifier)
+    throws ManifoldCFException
+  {
+    if (componentIdentifier != null)
+      return ManifoldCF.hash(componentIdentifier);
+    else
+      return null;
+  }
+    
   /** DocumentBin class */
   protected static class DocumentBin
   {
@@ -2311,6 +2180,7 @@
       }
       return true;
     }
+    
   }
 
   /** Class describing document reference.
@@ -2578,6 +2448,60 @@
     
   }
   
+  /** The implementation of the IExistingVersions interface.
+  */
+  protected static class ExistingVersions implements IExistingVersions
+  {
+    protected final Map<String,QueuedDocument> map;
+    protected final String lastOutputConnectionName;
+    
+    public ExistingVersions(String lastOutputConnectionName, List<QueuedDocument> list)
+    {
+      this.lastOutputConnectionName = lastOutputConnectionName;
+      this.map = new HashMap<String,QueuedDocument>();
+      for (QueuedDocument qd : list)
+      {
+        map.put(qd.getDocumentDescription().getDocumentIdentifier(),qd);
+      }
+    }
+    
+    /** Retrieve an existing version string given a document identifier.
+    *@param documentIdentifier is the document identifier.
+    *@return the document version string, or null if the document was never previously indexed.
+    */
+    @Override
+    public String getIndexedVersionString(String documentIdentifier)
+      throws ManifoldCFException
+    {
+      return getIndexedVersionString(documentIdentifier,null);
+    }
+
+    /** Retrieve a component existing version string given a document identifier.
+    *@param documentIdentifier is the document identifier.
+    *@param componentIdentifier is the component identifier, if any.
+    *@return the document version string, or null of the document component was never previously indexed.
+    */
+    @Override
+    public String getIndexedVersionString(String documentIdentifier, String componentIdentifier)
+      throws ManifoldCFException
+    {
+      QueuedDocument qd = map.get(documentIdentifier);
+      DocumentIngestStatusSet status = qd.getLastIngestedStatus(lastOutputConnectionName);
+      if (status == null)
+        return null;
+      String componentIdentifierHash;
+      if (componentIdentifier == null)
+        componentIdentifierHash = null;
+      else
+        componentIdentifierHash = ManifoldCF.hash(componentIdentifier);
+      DocumentIngestStatus s = status.getComponent(componentIdentifierHash);
+      if (s == null)
+        return null;
+      return s.getDocumentVersion();
+    }
+
+  }
+  
   /** The ingest logger class */
   protected static class OutputActivity extends CheckActivity implements IOutputActivity
   {
@@ -2637,17 +2561,26 @@
     /** Send a document via the pipeline to the next output connection.
     *@param documentURI is the document's URI.
     *@param document is the document data to be processed (handed to the output data store).
-    *@param authorityNameString is the authority name string that should be used to qualify the document's access tokens.
     *@return the document status (accepted or permanently rejected); return codes are listed in IPipelineConnector.
     *@throws IOException only if there's an IO error reading the data from the document.
     */
-    public int sendDocument(String documentURI, RepositoryDocument document, String authorityNameString)
+    @Override
+    public int sendDocument(String documentURI, RepositoryDocument document)
       throws ManifoldCFException, ServiceInterruption, IOException
     {
       // No downstream connection at output connection level.
       return IPipelineConnector.DOCUMENTSTATUS_REJECTED;
     }
 
+    /** Send NO document via the pipeline to the next output connection.  This is equivalent
+    * to sending an empty document placeholder.
+    */
+    @Override
+    public void noDocument()
+      throws ManifoldCFException, ServiceInterruption
+    {
+    }
+
   }
 
   protected final static long interruptionRetryTime = 5L*60L*1000L;

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionHSQLDBTest.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionHSQLDBTest.java
new file mode 100644
index 0000000..9067448
--- /dev/null
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionHSQLDBTest.java

@@ -0,0 +1,128 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.agents.system.ManifoldCF;
+
+import java.io.*;
+import java.util.*;
+import org.junit.*;
+
+/** This is a test of service interruptions */
+public class InterruptionHSQLDBTest extends ConnectorBaseHSQLDB
+{
+  protected final ManifoldCFInstance mcfInstance;
+  protected InterruptionTester tester;
+
+  public InterruptionHSQLDBTest()
+  {
+    super();
+    mcfInstance = new ManifoldCFInstance("A",false,false);
+    tester = new InterruptionTester(mcfInstance);
+  }
+  
+  @Override
+  protected String[] getConnectorClasses()
+  {
+    return new String[]{"org.apache.manifoldcf.crawler.tests.InterruptionRepositoryConnector"};
+  }
+  
+  @Override
+  protected String[] getConnectorNames()
+  {
+    return new String[]{"InterruptionConnector"};
+  }
+
+  @Override
+  protected String[] getOutputClasses()
+  {
+    return new String[]{"org.apache.manifoldcf.agents.tests.TestingOutputConnector"};
+  }
+  
+  @Override
+  protected String[] getOutputNames()
+  {
+    return new String[]{"NullOutput"};
+  }
+
+  @Test
+  public void interruptionTestRun()
+    throws Exception
+  {
+    tester.executeTest();
+  }
+  
+  @Before
+  public void setUp()
+    throws Exception
+  {
+    initializeSystem();
+    try
+    {
+      localReset();
+    }
+    catch (Exception e)
+    {
+      System.out.println("Warning: Preclean failed: "+e.getMessage());
+    }
+    try
+    {
+      localSetUp();
+    }
+    catch (Exception e)
+    {
+      e.printStackTrace();
+      throw e;
+    }
+  }
+  
+  @After
+  public void cleanUp()
+    throws Exception
+  {
+    Exception currentException = null;
+    // Last, shut down the web applications.
+    // If this is done too soon it closes the database before the rest of the cleanup happens.
+    try
+    {
+      mcfInstance.unload();
+    }
+    catch (Exception e)
+    {
+      if (currentException == null)
+        currentException = e;
+    }
+    try
+    {
+      localCleanUp();
+    }
+    catch (Exception e)
+    {
+      e.printStackTrace();
+      throw e;
+    }
+    if (currentException != null)
+      throw currentException;
+    cleanupSystem();
+  }
+  
+
+}

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionRepositoryConnector.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionRepositoryConnector.java
new file mode 100644
index 0000000..48a3ddc
--- /dev/null
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionRepositoryConnector.java

@@ -0,0 +1,103 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.system.ManifoldCF;
+
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+
+/** Connector class to be used by general integration tests that need documents */
+public class InterruptionRepositoryConnector extends org.apache.manifoldcf.crawler.connectors.BaseRepositoryConnector
+{
+  public InterruptionRepositoryConnector()
+  {
+  }
+
+  @Override
+  public void addSeedDocuments(ISeedingActivity activities, DocumentSpecification spec,
+    long startTime, long endTime, int jobMode)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    String docCount = "10";
+    for (int i = 0; i < spec.getChildCount(); i++)
+    {
+      SpecificationNode sn = spec.getChild(i);
+      if (sn.getType().equals("documentcount"))
+        docCount = sn.getAttributeValue("count");
+    }
+    int count = Integer.parseInt(docCount);
+    
+    for (int i = 0; i < count; i++)
+    {
+      String doc = "test"+i+".txt";
+      activities.addSeedDocument(doc,null);
+    }
+  }
+  
+  @Override
+  public String[] getDocumentVersions(String[] documentIdentifiers, String[] oldVersions, IVersionActivity activities,
+    DocumentSpecification spec, int jobMode, boolean usesDefaultAuthority)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    String[] rval = new String[documentIdentifiers.length];
+    for (int i = 0; i < rval.length; i++)
+    {
+      rval[i] = "";
+    }
+    return rval;
+  }
+
+  @Override
+  public void processDocuments(String[] documentIdentifiers, String[] versions, IProcessActivity activities,
+    DocumentSpecification spec, boolean[] scanOnly, int jobMode)
+    throws ManifoldCFException, ServiceInterruption
+  {
+    for (int i = 0; i < documentIdentifiers.length; i++)
+    {
+      String documentIdentifier = documentIdentifiers[i];
+      String version = versions[i];
+      if (!scanOnly[i])
+      {
+        if (documentIdentifier.equals("test0.txt"))
+        {
+          // This will emulate one particular document failing (and being skipped)
+          long currentTime = System.currentTimeMillis();
+          throw new ServiceInterruption("Pretending there's a service interruption",
+            null,currentTime+1000L,currentTime+5000L,10,false);
+        }
+        RepositoryDocument rd = new RepositoryDocument();
+        byte[] bytes = documentIdentifier.getBytes(StandardCharsets.UTF_8);
+        rd.setBinary(new ByteArrayInputStream(bytes),bytes.length);
+        try
+        {
+          activities.ingestDocumentWithException(documentIdentifier,version,"http://"+documentIdentifier,rd);
+        }
+        catch (IOException e)
+        {
+          throw new RuntimeException("Shouldn't be seeing IOException from binary array input stream: "+e.getMessage(),e);
+        }
+      }
+    }
+  }
+
+}

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionTester.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionTester.java
new file mode 100644
index 0000000..73a94e0
--- /dev/null
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/InterruptionTester.java

@@ -0,0 +1,99 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.crawler.tests;
+
+import org.apache.manifoldcf.core.interfaces.*;
+import org.apache.manifoldcf.agents.interfaces.*;
+import org.apache.manifoldcf.crawler.interfaces.*;
+import org.apache.manifoldcf.crawler.system.ManifoldCF;
+
+import java.io.*;
+import java.util.*;
+
+/** This is a test whether we can handle service interruptions */
+public class InterruptionTester
+{
+  protected final ManifoldCFInstance instance;
+  
+  public InterruptionTester(ManifoldCFInstance instance)
+  {
+    this.instance = instance;
+  }
+  
+  public void executeTest()
+    throws Exception
+  {
+    instance.start();
+    
+    // Hey, we were able to install the file system connector etc.
+    // Now, create a local test job and run it.
+    IThreadContext tc = ThreadContextFactory.make();
+      
+    // Create a basic file system connection, and save it.
+    IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(tc);
+    IRepositoryConnection conn = mgr.create();
+    conn.setName("InterruptionTest Connection");
+    conn.setDescription("InterruptionTest Connection");
+    conn.setClassName("org.apache.manifoldcf.crawler.tests.InterruptionRepositoryConnector");
+    conn.setMaxConnections(100);
+    // Now, save
+    mgr.save(conn);
+      
+    // Create a basic null output connection, and save it.
+    IOutputConnectionManager outputMgr = OutputConnectionManagerFactory.make(tc);
+    IOutputConnection outputConn = outputMgr.create();
+    outputConn.setName("Null Connection");
+    outputConn.setDescription("Null Connection");
+    outputConn.setClassName("org.apache.manifoldcf.agents.tests.TestingOutputConnector");
+    outputConn.setMaxConnections(100);
+    // Now, save
+    outputMgr.save(outputConn);
+
+    // Create a job.
+    IJobManager jobManager = JobManagerFactory.make(tc);
+    IJobDescription job = jobManager.createJob();
+    job.setDescription("Test Job");
+    job.setConnectionName("InterruptionTest Connection");
+    job.addPipelineStage(-1,true,"Null Connection","");
+    //job.setOutputConnectionName("Null Connection");
+    job.setType(job.TYPE_SPECIFIED);
+    job.setStartMethod(job.START_DISABLE);
+    job.setHopcountMode(job.HOPCOUNT_ACCURATE);
+      
+    // Save the job.
+    jobManager.save(job);
+
+    // Now, start the job, and wait until it is running.
+    jobManager.manualStart(job.getID());
+    instance.waitJobRunningNative(jobManager,job.getID(),30000L);
+    
+    // Wait for the job to become inactive.  The time should not exceed 10 seconds for the actual crawl.
+    instance.waitJobInactiveNative(jobManager,job.getID(),30000L);
+    // The document will be skipped in the end.
+    if (jobManager.getStatus(job.getID()).getDocumentsProcessed() != 9)
+      throw new Exception("Expected 9 documents, saw "+jobManager.getStatus(job.getID()).getDocumentsProcessed());
+    
+    // Now, delete the job.
+    jobManager.deleteJob(job.getID());
+    instance.waitJobDeletedNative(jobManager,job.getID(),30000L);
+
+    // Shut down instance2
+    instance.stop();
+  }
+}

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/ManifoldCFInstance.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/ManifoldCFInstance.java
index 683a0f6..e3007c8 100644
--- a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/ManifoldCFInstance.java
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/ManifoldCFInstance.java

@@ -442,7 +442,7 @@
     HttpPut method = new HttpPut(apiURL);
     try
     {
-      method.setEntity(new StringEntity(input,ContentType.create("text/plain","UTF-8")));
+      method.setEntity(new StringEntity(input,ContentType.create("text/plain",StandardCharsets.UTF_8)));
       HttpResponse response = client.execute(method);
       int responseCode = response.getStatusLine().getStatusCode();
       String responseString = convertToString(response);
@@ -470,7 +470,7 @@
     HttpPost method = new HttpPost(apiURL);
     try
     {
-      method.setEntity(new StringEntity(input,ContentType.create("text/plain","UTF-8")));
+      method.setEntity(new StringEntity(input,ContentType.create("text/plain",StandardCharsets.UTF_8)));
       HttpResponse response = client.execute(method);
       int responseCode = response.getStatusLine().getStatusCode();
       String responseString = convertToString(response);

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/SchedulerTester.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/SchedulerTester.java
index ffcf35e..87305ee 100644
--- a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/SchedulerTester.java
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/SchedulerTester.java

@@ -95,13 +95,15 @@
     // Wait for the job to become inactive.  The time should be at least long enough to handle
     // 100 documents per bin, but not significantly greater than that.  Let's say 120 seconds.
     long startTime = System.currentTimeMillis();
-    instance2.waitJobInactiveNative(jobManager,job.getID(),150000L);
+    instance2.waitJobInactiveNative(jobManager,job.getID(),1200000L);
     long endTime = System.currentTimeMillis();
+    System.out.println("Crawl took "+(endTime-startTime)+" milliseconds");
     if (jobManager.getStatus(job.getID()).getDocumentsProcessed() != 10+10*200)
       throw new Exception("Expected 2010 documents, saw "+jobManager.getStatus(job.getID()).getDocumentsProcessed());
+    if (endTime - startTime > 150000L)
+      throw new Exception("Expected crawl to complete in less than 150 seconds; took "+(endTime-startTime)+" ms");
     if (endTime-startTime < 96000L)
       throw new Exception("Job finished too quickly; throttling clearly failed");
-    System.out.println("Crawl took "+(endTime-startTime)+" milliseconds");
     
     // Now, delete the job.
     jobManager.deleteJob(job.getID());

diff --git a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/TestingRepositoryConnector.java b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/TestingRepositoryConnector.java
index 004f3b2..8cbdf44 100644
--- a/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/TestingRepositoryConnector.java
+++ b/framework/pull-agent/src/test/java/org/apache/manifoldcf/crawler/tests/TestingRepositoryConnector.java

@@ -81,7 +81,14 @@
         RepositoryDocument rd = new RepositoryDocument();
         byte[] bytes = documentIdentifier.getBytes(StandardCharsets.UTF_8);
         rd.setBinary(new ByteArrayInputStream(bytes),bytes.length);
-        activities.ingestDocument(documentIdentifier,version,"http://"+documentIdentifier,rd);
+        try
+        {
+          activities.ingestDocumentWithException(documentIdentifier,version,"http://"+documentIdentifier,rd);
+        }
+        catch (IOException e)
+        {
+          throw new RuntimeException("Shouldn't be seeing IOException from binary array input stream: "+e.getMessage(),e);
+        }
       }
     }
   }

diff --git a/framework/ui-core/src/main/java/org/apache/manifoldcf/ui/i18n/Messages.java b/framework/ui-core/src/main/java/org/apache/manifoldcf/ui/i18n/Messages.java
index 9572438..e163546 100644
--- a/framework/ui-core/src/main/java/org/apache/manifoldcf/ui/i18n/Messages.java
+++ b/framework/ui-core/src/main/java/org/apache/manifoldcf/ui/i18n/Messages.java

@@ -156,7 +156,7 @@
       String resourcePath = localizeResourceName(pathName, resourceKey, locale);
           
       Writer outputWriter = new OutputWriter(output);
-      engine.mergeTemplate(resourcePath, "UTF-8", context, outputWriter);
+      engine.mergeTemplate(resourcePath, StandardCharsets.UTF_8.name(), context, outputWriter);
       outputWriter.flush();
     } catch (IOException e) {
       throw new ManifoldCFException(e.getMessage(),e);

diff --git a/pom.xml b/pom.xml
index 95eb5e0..d64e8ed 100644
--- a/pom.xml
+++ b/pom.xml

@@ -42,7 +42,7 @@
     <postgresql.version>9.1-901.jdbc4</postgresql.version>
     <mysql.version>5.1.18</mysql.version>
     <hsqldb.version>2.3.1</hsqldb.version>
-    <derby.version>10.10.1.1</derby.version>
+    <derby.version>10.10.2.0</derby.version>
     <jetty.version>7.5.4.v20111024</jetty.version>
     <mail.version>1.4.5</mail.version>
     <activation.version>1.1.1</activation.version>

diff --git a/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml b/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
index 91d16cc..0506264 100644
--- a/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml
+++ b/site/src/documentation/content/xdocs/en_US/end-user-documentation.xml

@@ -696,11 +696,28 @@
         <section id="outputconnectiontypes">
             <title>Output Connection Types</title>
 
+            <section id="amazoncloudsearchoutputconnector">
+                <title>Amazon Cloud Search Output Connection</title>
+                <p>The Amazon Cloud Search Output Connection type send documents to a specific path within a specified Amazon Cloud Search instance.  The
+                      connection type furthermore "batches" documents to reduce cost as much as is reasonable.  As a result, some specified documents may be sent at the
+                      end of a job run, rather than at the time they would typically be indexed.</p>
+                <p>The connection configuration information for the Amazon Cloud Search Output Connection type includes one additional tab: the "Server" tab.
+                      This tab looks like this:</p>
+                <br/><br/>
+                <figure src="images/en_US/amazon-configure-server.PNG" alt="Amazon Output Configuration, Server tab" width="80%"/>
+                <br/><br/>
+                <p>You must supply the "Server host" field in order for the connection to work.</p>
+                <p>The Amazon Cloud Search Output Connection type does not contribute any tabs to a job definition.</p>
+                <p>The Amazon Cloud Search Output Connection type can only accept text content that is encoded in a UTF-8-compatible manner.  It is highly
+                      recommended to use the Tika Content Extractor in the pipeline prior to the Amazon Cloud Search Output Connection type in order to
+                      convert documents to an indexable form.</p>
+            </section>
+            
             <section id="elasticsearchoutputconnector">
                 <title>ElasticSearch Output Connection</title>
-                <p>The ElasticSearch Output Connection allow ManifoldCF to submit documents to an ElasticSearch instance, via the XML over HTTP API. The connector has been designed
+                <p>The ElasticSearch Output Connection type allows ManifoldCF to submit documents to an ElasticSearch instance, via the XML over HTTP API. The connector has been designed
             	to be as easy to use as possible.</p>
-                <p>After creating an ElasticSearch ouput connection, you have to populate the parameters tab. Fill in the fields according your ElasticSearch configuration. Each
+                <p>After creating an ElasticSearch output connection, you have to populate the parameters tab. Fill in the fields according your ElasticSearch configuration. Each
             	ElasticSearch output connector instance works with one index. To work with multiple indexes, just create one output connector for each index.</p>
                 <figure src="images/en_US/elasticsearch-connection-parameters.png" alt="ElasticSearch, parameters tab" width="80%"/>
                 <br />
@@ -908,6 +925,76 @@
 
         </section>
 
+        <section id="transformationconnectiontypes">
+            <title>Transformation Connection Types</title>
+
+            <section id="alloweddocuments">
+                <title>Allowed Documents</title>
+                <p>The Allowed Documents transformation filter is used to limit the documents that will be fetched and passed down the pipeline for indexing.  The
+                      filter allows documents to be restricted by mime type, by extension, and by length.</p>
+                <p>It is important to note that these various methods of filtering rely on the upstream repository connection type to implement.  Some repository connection
+                      types do not implement all of the available methods of filtering.  For example, filtering by URL (and hence file extension) makes little sense in the
+                      context of a repository connection type whose URLs do not include a full file name.</p>
+                <p>As with all document transformers,  more than one Allowed Documents transformation filter can be used in a single pipeline.  This may be useful
+                      if other document transformers (such as the Tika Content Extractor, below) change the characteristics of the document being processed.</p>
+                <p>The Allowed Documents transformation connection type does not require anything other than standard configuration information.</p>
+                <p>The Allowed Documents transformation connection type contributes a single tab to a job definition.  This is the "Allowed Contents" tab, which looks
+                      like this:</p>
+                <br/><br/>
+                <figure src="images/en_US/alloweddocuments-job-allowed-contents.PNG" alt="Allowed Documents specification, Allowed Contents tab" width="80%"/>
+                <br/><br/>
+                <p>Fill in the maximum desired document length, the set of extensions that are allowed, and the set of mime types that are allowed.  All extensions and
+                      mime types are case insensitive.  For extensions, the special value "." matches a missing or empty extension.</p>
+            </section>
+
+            <section id="metadataadjuster">
+                <title>Metadata Adjuster</title>
+                <p>The Metadata Adjuster transformation filter optionally changes the name of incoming metadata, and then optionally adds additional metadata values.
+                      This can be very helpful in many contexts.  For example, you might use the Metadata Adjuster to label all documents from a particular job with a
+                      particular tag in an index.  Or, you might need to map metadata from (say) SharePoint's schema to your final output connection type's schema.
+                      The Metadata Adjuster permits you to handle both of the scenarios.</p>
+                <p>As with all document transformers,  more than one Metadata Adjuster transformation filter can be used in a single pipeline.  This may be useful
+                      if other document transformers (such as the Tika Content Extractor, below) change the metadata of the document being processed.</p>
+                <p>The Metadata Adjuster transformation connection type does not require anything other than standard configuration information.</p>
+                <p>The Metadata Adjuster transformation connection type contributes two tabs to a job definition.  These are "Move metadata" and "Add metadata"
+                      tabs.  The "Move metadata" tab looks like this:</p>
+                <br/><br/>
+                <figure src="images/en_US/metadataadjuster-job-move-metadata.PNG" alt="Metadata Adjuster specification, Move Metadata tab" width="80%"/>
+                <br/><br/>
+                <p>Enter a input metadata name, and a target metadata name, and click the "Add" button to add the mapping to the list.  Uncheck the "Keep all metadata"
+                      checkbox in order to prevent unspecified metadata fields from being passed through.</p>
+                <p>The "Add metadata" tab looks like this:</p>
+                <br/><br/>
+                <figure src="images/en_US/metadataadjuster-job-add-metadata.PNG" alt="Metadata Adjuster specification, Add Metadata tab" width="80%"/>
+                <br/><br/>
+                <p>Enter a parameter name and a value, and then click the "Add" button to add the new metadata field and value to the list.  You may add more than
+                      one value with the same field name.</p>
+            </section>
+
+            <section id="nulltransformer">
+                <title>Null Transformer</title>
+            </section>
+
+            <section id="tikaextractor">
+                <title>Tika Content Extractor</title>
+                <p>The Tika Content Extractor transformation filter converts a binary document into a UTF-8 text stream, plus metadata.  This transformation filter
+                      is used primarily when incoming binary content is a possibility, or content that is not binary but has a non-standard encoding such as Shift-JIS.
+                      The Tika Content Extractor extracts metadata from the incoming stream as well.  This metadata can be mapped within the Tika Content Extractor
+                      to metadata field names appropriate for further use downstream in the pipeline.</p>
+                <p>As with all document transformers,  more than one Tika Content Extractor transformation filter can be used in a single pipeline.  In the case
+                      of the Tika Content Extractor, this does not seem to be of much utility.</p>
+                <p>The Tika Content Extractor transformation connection type does not require anything other than standard configuration information.</p>
+                <p>The Tika Content Extractor transformation connection type contributes a single tab to a job definition.  This the "Field mapping" tab, which
+                      looks like this:</p>
+                <br/><br/>
+                <figure src="images/en_US/tika-job-field-mapping.PNG" alt="Tika Content Extractor specification, Field Mapping tab" width="80%"/>
+                <br/><br/>
+                <p>Enter a Tika-generated metadata field name, and a final field name, and click the "Add" button to add the mapping to the list.  Uncheck the
+                      "Keep all metadata" checkbox if you want unspecified Tika metadata to be excluded from the final document.</p>
+            </section>
+
+        </section>
+        
         <section id="mappingconnectiontypes">
             <title>User Mapping Connection Types</title>
             
@@ -2743,14 +2830,17 @@
                 <br/><br/>
                 <figure src="images/en_US/web-job-inclusions.PNG" alt="Web Job, Inclusions tab" width="80%"/>
                 <br/><br/>
-                <p>You will need to provide a series of zero or more regular expressions, separated by newlines.</p>
+                <p>You will need to provide a series of zero or more regular expressions, separated by newlines.  The regular expressions are considered to match if they are
+                      found anywhere within the URL.  They do not need to match the entire URL.</p>
                 <p>Remember that, by default, a web job includes <b>all</b> documents in the world that are linked to your seeds in any way that the web connection type can determine.</p>
                 <p>If you wish to restrict which documents are actually processed within your overall set of included documents, you may want to supply some regular expressions on the
                        "Exclusions" tab, which looks like this:</p>
                 <br/><br/>
                 <figure src="images/en_US/web-job-exclusions.PNG" alt="Web Job, Exclusions tab" width="80%"/>
                 <br/><br/>
-                <p>Once again you will need to provide a series of zero or more regular expressions, separated by newlines.  It is typical to use the "Exclusions" tab to remove documents from
+                <p>Once again you will need to provide a series of zero or more regular expressions, separated by newlines.  The regular expressions are considered to match if they are
+                      found anywhere within the URL.  They do not need to match the entire URL.</p>
+                <p>It is typical to use the "Exclusions" tab to remove documents from
                        consideration which are suspected to contain content that both has no extractable links, and is not useful to the index you are trying to build, e.g. movie files.</p>
                 <p>The "Security" tab allows you to specify the access tokens that the documents in the web job get indexed with, and looks like this:</p>
                 <br/><br/>

diff --git a/site/src/documentation/content/xdocs/en_US/writing-output-connectors.xml b/site/src/documentation/content/xdocs/en_US/writing-output-connectors.xml
index 2cecac1..db007c0 100644
--- a/site/src/documentation/content/xdocs/en_US/writing-output-connectors.xml
+++ b/site/src/documentation/content/xdocs/en_US/writing-output-connectors.xml

@@ -79,6 +79,7 @@
             <tr><td><strong>getPipelineDescription()</strong></td><td>Use the supplied output specification to come up with an output version string</td></tr>
             <tr><td><strong>addOrReplaceDocument()</strong></td><td>Add or replace the specified document within the target repository, or signal if the document cannot be handled</td></tr>
             <tr><td><strong>removeDocument()</strong></td><td>Remove the specified document from the target repository</td></tr>
+            <tr><td><strong>noteJobComplete()</strong></td><td>Called at the end of a job run or job deletion, so that the index can be updated in batch</td></tr>
             <tr><td><strong>outputConfigurationHeader()</strong></td><td>Output the head-section part of an output connection <em>ConfigParams</em> editing page</td></tr>
             <tr><td><strong>outputConfigurationBody()</strong></td><td>Output the body-section part of an output connection <em>ConfigParams</em> editing page</td></tr>
             <tr><td><strong>processConfigurationPost()</strong></td><td>Receive and process form data from an output connection <em>ConfigParams</em> editing page</td></tr>

diff --git a/site/src/documentation/content/xdocs/en_US/writing-repository-connectors.xml b/site/src/documentation/content/xdocs/en_US/writing-repository-connectors.xml
index af25100..5aee1a2 100644
--- a/site/src/documentation/content/xdocs/en_US/writing-repository-connectors.xml
+++ b/site/src/documentation/content/xdocs/en_US/writing-repository-connectors.xml

@@ -48,6 +48,7 @@
           <tr><td>Configuration parameters</td><td>A hierarchical structure, internally represented as an XML document, which describes a specific configuration of a specific repository connector, i.e. <strong>how</strong> the connector should do its job; see <em>org.apache.manifoldcf.core.interfaces.ConfigParams</em></td></tr>
           <tr><td>Repository connection</td><td>A repository connector instance that has been furnished with configuration data</td></tr>
           <tr><td>Document identifier</td><td>An arbitrary identifier, whose meaning determined only within the context of a specific repository connector, which the connector uses to describe a document within a repository</td></tr>
+          <tr><td>Component identifier</td><td>An arbitrary identifier, whose meaning determined only within the context of a specific document, which the connector uses to describe a component of a document within a repository</td></tr>
           <tr><td>Document URI</td><td>The unique URI (or, in some cases, file IRI) of a document, which is meant to be displayed in search engine results as the link to the document</td></tr>
           <tr><td>Repository document</td><td>An object that describes a document's contents, including raw document data (as a stream), metadata (as either strings or streams), and access tokens; see <em>org.apache.manifoldcf.agents.interfaces.RepositoryDocument</em></td></tr>
           <tr><td>Access token</td><td>A string, which is only meaningful in the context of a specific authority, that describes a quantum of authorization for a user</td></tr>
@@ -70,6 +71,7 @@
           <li>Documentum (uses RMI to segregate native code, etc.)</li>
           <li>FileNet (also uses RMI, but because it is picky about its open-source jar versions)</li>
           <li>File system (a good, but simple, example)</li>
+          <li>Jira (demonstrates good use of session management)</li>
           <li>LiveLink (demonstrates use of local keystore infrastructure)</li>
           <li>Meridio (local keystore, web services, result sets)</li>
           <li>SharePoint (local keystore, web services)</li>
@@ -88,8 +90,7 @@
           <table>
             <tr><th>Method</th><th>What it should do</th></tr>
             <tr><td><strong>addSeedDocuments()</strong></td><td>Use the supplied document specification to come up with an initial set of document identifiers</td></tr>
-            <tr><td><strong>getDocumentVersions()</strong></td><td>Come up with a version string for each of the documents described by the supplied set of document identifiers, or signal if the document is no longer present</td></tr>
-            <tr><td><strong>processDocuments()</strong></td><td>Take the appropriate action (e.g. ingest, or extract references from, or whatever) for a given set of documents described by document identifier and version string</td></tr>
+            <tr><td><strong>processDocuments()</strong></td><td>For a set of documents, compute a version string, and take the appropriate action (e.g. ingest, or extract references from, or whatever)</td></tr>
             <tr><td><strong>outputConfigurationHeader()</strong></td><td>Output the head-section part of a repository connection <em>ConfigParams</em> editing page</td></tr>
             <tr><td><strong>outputConfigurationBody()</strong></td><td>Output the body-section part of a repository connection <em>ConfigParams</em> editing page</td></tr>
             <tr><td><strong>processConfigurationPost()</strong></td><td>Receive and process form data from a repository connection <em>ConfigParams</em> editing page</td></tr>
@@ -140,7 +141,7 @@
           <ul>
             <li>Calculate a version string for the document</li>
             <li>Find child references for the document</li>
-            <li>Get the document's content, metadata, and access tokens</li>
+            <li>Get the document's content, metadata, and access tokens, and/or component content, metadata, and access tokens</li>
           </ul>
           <p></p>
           <p>We highly recommend that no additional information be included in the document identifier, other than what is needed for the above, as that will almost certainly cause problems.</p>
@@ -150,22 +151,23 @@
           <title>Choosing the form of the document version string</title>
           <p></p>
           <p>The document version string is used by ManifoldCF to determine whether or not the document or configuration changed in such a way as to require that the document
-            be reprocessed.  ManifoldCF therefore requests the version string for any document that is ready for processing, and usually does not process the document again if the
+            be reprocessed.  ManifoldCF therefore requires a version string for any document that is to be indexed, and connectors usually do not process the document again if the
             returned version string agrees with the version string it has stored.</p>
           <p></p>
-          <p>Thinking about it more carefully, it is clear that what a connector writer needs to do is include everything in the version string that could potentially affect how the
+          <p>Thinking about this carefully, it is clear that what a connector writer needs to do is include everything in the version string that could potentially affect how the
             document gets processed.  That may include the version of the document in the repository, bits of configuration information, metadata, and even access tokens (if the
             underlying repository versions these things independently from the document itself).  Storing all of that information in the version string seems like a lot - but the string
-            is unlimited in length, and it actually serves another useful purpose to do it that way.  Specifically, when it comes time to do the actual processing, it's often the correct
-            thing to do to obtain the necessary data out of the version string, rather than calculating it or fetching it anew.  That way of working guarantees that the document
-            processing was done in a manner that agrees with its recorded version string, thus eliminating any chance of ManifoldCF getting confused.</p>
-          <p></p>
-          <p>For longer data that needs to persist between the <strong>getDocumentVersions()</strong> method call and the <strong>processDocuments()</strong> method
-            call, the connector is welcome to save this information in a temporary disk file.  To help make sure nothing leaks which this approach is used, the IRepositoryConnector
-            interface has a method that will be called to clean up any temporary files that might have been created in the handling of a given document identifier.</p>
+            is unlimited in length, and it is the only way ManifoldCF knows to determine if something has changed in the repository.</p>
           <p></p>
         </section>
         <section>
+          <title>Document components</title>
+          <p></p>
+          <p>ManifoldCF considers all documents to consist of zero or more components.  A component is what is actually indexed, which means that each component has its own
+            identifier, data, metadata, access tokens, and URI.  It is up to your repository connector to break documents into components, if needed.  Most of the time, a repository document
+            consists of a single component.</p>
+          <p></p>
+        <section>
           <title>Notes on connector UI methods</title>
           <p></p>
           <p>The crawler UI uses a tabbed layout structure, and thus each of these elements must properly implement the tabbed model.  This means that the "header" methods

diff --git a/site/src/documentation/resources/images/en_US/alloweddocuments-job-allowed-contents.PNG b/site/src/documentation/resources/images/en_US/alloweddocuments-job-allowed-contents.PNG
new file mode 100644
index 0000000..e7ab984
--- /dev/null
+++ b/site/src/documentation/resources/images/en_US/alloweddocuments-job-allowed-contents.PNG
Binary files differ

diff --git a/site/src/documentation/resources/images/en_US/amazon-configure-server.PNG b/site/src/documentation/resources/images/en_US/amazon-configure-server.PNG
new file mode 100644
index 0000000..f448e3d
--- /dev/null
+++ b/site/src/documentation/resources/images/en_US/amazon-configure-server.PNG
Binary files differ

diff --git a/site/src/documentation/resources/images/en_US/metadataadjuster-job-add-metadata.PNG b/site/src/documentation/resources/images/en_US/metadataadjuster-job-add-metadata.PNG
new file mode 100644
index 0000000..bb5310d
--- /dev/null
+++ b/site/src/documentation/resources/images/en_US/metadataadjuster-job-add-metadata.PNG
Binary files differ

diff --git a/site/src/documentation/resources/images/en_US/metadataadjuster-job-move-metadata.PNG b/site/src/documentation/resources/images/en_US/metadataadjuster-job-move-metadata.PNG
new file mode 100644
index 0000000..a1f4736
--- /dev/null
+++ b/site/src/documentation/resources/images/en_US/metadataadjuster-job-move-metadata.PNG
Binary files differ

diff --git a/site/src/documentation/resources/images/en_US/tika-job-field-mapping.PNG b/site/src/documentation/resources/images/en_US/tika-job-field-mapping.PNG
new file mode 100644
index 0000000..0348a87
--- /dev/null
+++ b/site/src/documentation/resources/images/en_US/tika-job-field-mapping.PNG
Binary files differ
commit	fa2d2c019201a43d0d4c65b0af773cedb17b3916	[log] [tgz]
author	Karl Wright <kwright@apache.org>	Thu Jul 24 11:58:17 2014 +0000
committer	Karl Wright <kwright@apache.org>	Thu Jul 24 11:58:17 2014 +0000
tree	3b83d34bbc0c3558640fdce3099f1cce97921e94
parent	b75881b23f6be7c4dd40bc5c95eaa403a64d8fb2 [diff]
parent	022045445f20ed21124076e5a08689e3829ed591 [diff]