blob: d95ce2fece254458357c8c90ab43ce0ea741e200 [file] [log] [blame]
/* $Id: BaseOutputConnector.java 998081 2010-09-17 11:33:15Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.agents.output;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import java.io.*;
import java.util.*;
/** This base class describes an instance of a connection between an output pipeline and the Connector Framework.
*
* Each instance of this interface is used in only one thread at a time. Connection Pooling
* on these kinds of objects is performed by the factory which instantiates repository connectors
* from symbolic names and config parameters, and is pooled by these parameters. That is, a pooled connector
* handle is used only if all the connection parameters for the handle match.
*
* Implementers of this interface should provide a default constructor which has this signature:
*
* xxx();
*
* Connectors are either configured or not. If configured, they will persist in a pool, and be
* reused multiple times. Certain methods of a connector may be called before the connector is
* configured. This includes basically all methods that permit inspection of the connector's
* capabilities.
*
*/
public abstract class BaseOutputConnector extends org.apache.manifoldcf.core.connector.BaseConnector implements IOutputConnector
{
public static final String _rcsid = "@(#)$Id: BaseOutputConnector.java 998081 2010-09-17 11:33:15Z kwright $";
/** Return the list of activities that this connector supports (i.e. writes into the log).
*@return the list.
*/
@Override
public String[] getActivitiesList()
{
return new String[0];
}
/** Request arbitrary connector information.
* This method is called directly from the API in order to allow API users to perform any one of several connector-specific
* queries.
*@param output is the response object, to be filled in by this method.
*@param command is the command, which is taken directly from the API request.
*@return true if the resource is found, false if not. In either case, output may be filled in.
*/
@Override
public boolean requestInfo(Configuration output, String command)
throws ManifoldCFException
{
return false;
}
/** Notify the connector of a completed job.
* This is meant to allow the connector to flush any internal data structures it has been keeping around, or to tell the output repository that this
* is a good time to synchronize things. It is called whenever a job is either completed or aborted.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*/
@Override
public void noteJobComplete(IOutputNotifyActivity activities)
throws ManifoldCFException, ServiceInterruption
{
// The base implementation does nothing here.
}
/** Detect if a mime type is acceptable or not. This method is used to determine whether it makes sense to fetch a document
* in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
*@param mimeType is the mime type of the document.
*@param checkActivity is an object including the activities that can be performed by this method.
*@return true if the mime type can be accepted by this connector.
*/
@Override
public boolean checkMimeTypeIndexable(String pipelineDescription, String mimeType, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
return checkMimeTypeIndexable(pipelineDescription, mimeType);
}
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
* unusable documents that will be passed to this output connector.
*@param outputDescription is the document's output version.
*@param mimeType is the mime type of the document.
*@return true if the mime type is indexable by this connector.
*/
public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
throws ManifoldCFException, ServiceInterruption
{
return checkMimeTypeIndexable(mimeType);
}
/** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
* unusable documents that will be passed to this output connector.
*@param mimeType is the mime type of the document.
*@return true if the mime type is indexable by this connector.
*/
public boolean checkMimeTypeIndexable(String mimeType)
throws ManifoldCFException, ServiceInterruption
{
return true;
}
/** Pre-determine whether a document (passed here as a File object) is acceptable or not. This method is
* used to determine whether a document needs to be actually transferred. This hook is provided mainly to support
* search engines that only handle a small set of accepted file types.
*@param pipelineDescription is the document's pipeline version string, for this connection.
*@param localFile is the local file to check.
*@param checkActivity is an object including the activities that can be done by this method.
*@return true if the file is acceptable, false if not.
*/
@Override
public boolean checkDocumentIndexable(String pipelineDescription, File localFile, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
return checkDocumentIndexable(pipelineDescription, localFile);
}
/** Pre-determine whether a document (passed here as a File object) is indexable by this connector. This method is used by participating
* repository connectors to help reduce the number of unmanageable documents that are passed to this output connector in advance of an
* actual transfer. This hook is provided mainly to support search engines that only handle a small set of accepted file types.
*@param outputDescription is the document's output version.
*@param localFile is the local file to check.
*@return true if the file is indexable.
*/
public boolean checkDocumentIndexable(String outputDescription, File localFile)
throws ManifoldCFException, ServiceInterruption
{
return checkDocumentIndexable(localFile);
}
/** Pre-determine whether a document (passed here as a File object) is indexable by this connector. This method is used by participating
* repository connectors to help reduce the number of unmanageable documents that are passed to this output connector in advance of an
* actual transfer. This hook is provided mainly to support search engines that only handle a small set of accepted file types.
*@param localFile is the local file to check.
*@return true if the file is indexable.
*/
public boolean checkDocumentIndexable(File localFile)
throws ManifoldCFException, ServiceInterruption
{
return true;
}
/** Pre-determine whether a document's length is acceptable. This method is used
* to determine whether to fetch a document in the first place.
*@param pipelineDescription is the document's pipeline version string, for this connection.
*@param length is the length of the document.
*@param checkActivity is an object including the activities that can be done by this method.
*@return true if the file is acceptable, false if not.
*/
@Override
public boolean checkLengthIndexable(String pipelineDescription, long length, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
return checkLengthIndexable(pipelineDescription, length);
}
/** Pre-determine whether a document's length is indexable by this connector. This method is used by participating repository connectors
* to help filter out documents that are too long to be indexable.
*@param outputDescription is the document's output version.
*@param length is the length of the document.
*@return true if the file is indexable.
*/
public boolean checkLengthIndexable(String outputDescription, long length)
throws ManifoldCFException, ServiceInterruption
{
return true;
}
/** Pre-determine whether a document's URL is acceptable. This method is used
* to help filter out documents that cannot be indexed in advance.
*@param pipelineDescription is the document's pipeline version string, for this connection.
*@param url is the URL of the document.
*@param checkActivity is an object including the activities that can be done by this method.
*@return true if the file is acceptable, false if not.
*/
@Override
public boolean checkURLIndexable(String pipelineDescription, String url, IOutputCheckActivity checkActivity)
throws ManifoldCFException, ServiceInterruption
{
return checkURLIndexable(pipelineDescription, url);
}
/** Pre-determine whether a document's URL is indexable by this connector. This method is used by participating repository connectors
* to help filter out documents that are not worth indexing.
*@param outputDescription is the document's output version.
*@param url is the URL of the document.
*@return true if the file is indexable.
*/
public boolean checkURLIndexable(String outputDescription, String url)
throws ManifoldCFException, ServiceInterruption
{
return true;
}
/** Get a pipeline version string, given a pipeline specification object. The version string is used to
* uniquely describe the pertinent details of the specification and the configuration, to allow the Connector
* Framework to determine whether a document will need to be processed again.
* Note that the contents of any document cannot be considered by this method; only configuration and specification information
* can be considered.
*
* This method presumes that the underlying connector object has been configured.
*@param spec is the current pipeline specification object for this connection for the job that is doing the crawling.
*@return a string, of unlimited length, which uniquely describes configuration and specification in such a way that
* if two such strings are equal, nothing that affects how or whether the document is indexed will be different.
*/
@Override
public String getPipelineDescription(Specification spec)
throws ManifoldCFException, ServiceInterruption
{
return getOutputDescription((OutputSpecification)spec);
}
/** Get an output version string, given an output specification. The output version string is used to uniquely describe the pertinent details of
* the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
* Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
* is used to describe the version of the actual document.
*
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
* necessary.
*@param spec is the current output specification for the job that is doing the crawling.
*@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
* the document will not need to be sent again to the output data store.
*/
public String getOutputDescription(OutputSpecification spec)
throws ManifoldCFException, ServiceInterruption
{
// Empty string from the base class.
return "";
}
/** Add (or replace) a document in the output data store using the connector.
* This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
* necessary.
* The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
* output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode
* an output description string in order to determine what should be done.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
*@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
*@param document is the document data to be processed (handed to the output data store).
*@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document. May be null.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*@return the document status (accepted or permanently rejected).
*/
@Override
public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
throws ManifoldCFException, ServiceInterruption
{
return DOCUMENTSTATUS_REJECTED;
}
/** Remove a document using the connector.
* Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
*@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
* and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
*@param outputDescription is the last description string that was constructed for this document by the getOutputDescription() method above.
*@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
*/
@Override
public void removeDocument(String documentURI, String outputDescription, IOutputRemoveActivity activities)
throws ManifoldCFException, ServiceInterruption
{
// Does nothing in the base class
}
/** Notify the connector that all records associated with this connection have been removed.
* This method allows the connector to remove any internal data storage that is associated with records sent to the index on
* behalf of a connection. It should not attempt to communicate with the output index.
*/
public void noteAllRecordsRemoved()
throws ManifoldCFException
{
// Does nothing in the base class
}
// UI support methods.
//
// These support methods come in two varieties. The first bunch is involved in setting up connection configuration information. The second bunch
// is involved in presenting and editing output specification information for a job. The two kinds of methods are accordingly treated differently,
// in that the first bunch cannot assume that the current connector object is connected, while the second bunch can. That is why the first bunch
// receives a thread context argument for all UI methods, while the second bunch does not need one (since it has already been applied via the connect()
// method, above).
/** Obtain the name of the form check javascript method to call.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@return the name of the form check javascript method.
*/
@Override
public String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
{
return "checkOutputSpecification";
}
/** Obtain the name of the form presave check javascript method to call.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@return the name of the form presave check javascript method.
*/
@Override
public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
{
return "checkOutputSpecificationForSave";
}
/** Output the specification header section.
* This method is called in the head section of a job page which has selected an output connection of the current type. Its purpose is to add the required tabs
* to the list, and to output any javascript methods that might be needed by the job editing HTML.
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
*/
@Override
public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification os,
int connectionSequenceNumber, List<String> tabsArray)
throws ManifoldCFException, IOException
{
outputSpecificationHeader(out,locale,(OutputSpecification)os,tabsArray);
}
public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os,
int connectionSequenceNumber, List<String> tabsArray)
throws ManifoldCFException, IOException
{
outputSpecificationHeader(out,locale,os,tabsArray);
}
/** Output the specification header section.
* This method is called in the head section of a job page which has selected an output connection of the current type. Its purpose is to add the required tabs
* to the list, and to output any javascript methods that might be needed by the job editing HTML.
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
*/
public void outputSpecificationHeader(IHTTPOutput out, Locale locale, OutputSpecification os, List<String> tabsArray)
throws ManifoldCFException, IOException
{
outputSpecificationHeader(out,os,tabsArray);
}
/** Output the specification header section.
* This method is called in the head section of a job page which has selected an output connection of the current type. Its purpose is to add the required tabs
* to the list, and to output any javascript methods that might be needed by the job editing HTML.
*@param out is the output to which any HTML should be sent.
*@param os is the current output specification for this job.
*@param tabsArray is an array of tab names. Add to this array any tab names that are specific to the connector.
*/
public void outputSpecificationHeader(IHTTPOutput out, OutputSpecification os, List<String> tabsArray)
throws ManifoldCFException, IOException
{
// Call the old method signature, for backwards compatibility
ArrayList<Object> localTabsArray = new ArrayList<Object>();
outputSpecificationHeader(out,os,localTabsArray);
for (Object o : localTabsArray)
{
tabsArray.add((String)o);
}
}
public void outputSpecificationHeader(IHTTPOutput out, OutputSpecification os, ArrayList<Object> tabsArray)
throws ManifoldCFException, IOException
{
}
/** Output the specification body section.
* This method is called in the body section of a job page which has selected an output connection of the current type. Its purpose is to present the required form elements for editing.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
* form is "editjob".
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@param actualSequenceNumber is the connection within the job that has currently been selected.
*@param tabName is the current tab name.
*/
@Override
public void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification os,
int connectionSequenceNumber, int actualSequenceNumber, String tabName)
throws ManifoldCFException, IOException
{
outputSpecificationBody(out,locale,(OutputSpecification)os,tabName);
}
/** Output the specification body section.
* This method is called in the body section of a job page which has selected an output connection of the current type. Its purpose is to present the required form elements for editing.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
* form is "editjob".
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@param tabName is the current tab name.
*/
public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
throws ManifoldCFException, IOException
{
outputSpecificationBody(out,os,tabName);
}
/** Output the specification body section.
* This method is called in the body section of a job page which has selected an output connection of the current type. Its purpose is to present the required form elements for editing.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
* form is "editjob".
*@param out is the output to which any HTML should be sent.
*@param os is the current output specification for this job.
*@param tabName is the current tab name.
*/
public void outputSpecificationBody(IHTTPOutput out, OutputSpecification os, String tabName)
throws ManifoldCFException, IOException
{
}
/** Process a specification post.
* This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
* posted. Its purpose is to gather form information and modify the output specification accordingly.
* The name of the posted form is "editjob".
*@param variableContext contains the post data, including binary file-upload information.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
*/
@Override
public String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification os,
int connectionSequenceNumber)
throws ManifoldCFException
{
return processSpecificationPost(variableContext,locale,(OutputSpecification)os);
}
/** Process a specification post.
* This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
* posted. Its purpose is to gather form information and modify the output specification accordingly.
* The name of the posted form is "editjob".
*@param variableContext contains the post data, including binary file-upload information.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
*/
public String processSpecificationPost(IPostParameters variableContext, Locale locale, OutputSpecification os)
throws ManifoldCFException
{
return processSpecificationPost(variableContext,os);
}
/** Process a specification post.
* This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
* posted. Its purpose is to gather form information and modify the output specification accordingly.
* The name of the posted form is "editjob".
*@param variableContext contains the post data, including binary file-upload information.
*@param os is the current output specification for this job.
*@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
*/
public String processSpecificationPost(IPostParameters variableContext, OutputSpecification os)
throws ManifoldCFException
{
return null;
}
/** View specification.
* This method is called in the body section of a job's view page. Its purpose is to present the output specification information to the user.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param connectionSequenceNumber is the unique number of this connection within the job.
*@param os is the current output specification for this job.
*/
@Override
public void viewSpecification(IHTTPOutput out, Locale locale, Specification os,
int connectionSequenceNumber)
throws ManifoldCFException, IOException
{
viewSpecification(out,locale,(OutputSpecification)os);
}
/** View specification.
* This method is called in the body section of a job's view page. Its purpose is to present the output specification information to the user.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
*@param out is the output to which any HTML should be sent.
*@param locale is the preferred local of the output.
*@param os is the current output specification for this job.
*/
public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
throws ManifoldCFException, IOException
{
viewSpecification(out,os);
}
/** View specification.
* This method is called in the body section of a job's view page. Its purpose is to present the output specification information to the user.
* The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
*@param out is the output to which any HTML should be sent.
*@param os is the current output specification for this job.
*/
public void viewSpecification(IHTTPOutput out, OutputSpecification os)
throws ManifoldCFException, IOException
{
}
}