Pull up CONNECTORS-916 changes
git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-916-rebased@1596720 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/build.xml b/build.xml
index 93ae6ec..3ceddbf 100644
--- a/build.xml
+++ b/build.xml
@@ -1023,7 +1023,7 @@
<mkdir dir="lib"/>
<antcall target="download-via-maven"><param name="target" value="lib"/>
<param name="project-path" value="org/apache/poi"/>
- <param name="artifact-version" value="3.7"/>
+ <param name="artifact-version" value="3.10-beta2"/>
<param name="artifact-name" value="poi"/>
<param name="artifact-type" value="jar"/>
</antcall>
@@ -1579,7 +1579,213 @@
</antcall>
</target>
- <target name="make-core-deps" depends="download-jira-client,download-google-api-client,download-dropbox-client,download-solrj,download-zookeeper,download-httpcomponents,download-json,download-hsqldb,download-xerces,download-commons,download-elasticsearch-plugin,download-solr-plugins,download-sharepoint-plugins,download-jstl,download-xmlgraphics-commons,download-wstx-asl,download-xmlsec,download-xml-apis,download-wss4j,download-velocity,download-streambuffer,download-stax,download-servlet-api,download-xml-resolver,download-osgi,download-opensaml,download-mimepull,download-mail,download-log4j,download-junit,download-jaxws,download-glassfish,download-jaxb,download-tomcat,download-h2,download-h2-support,download-geronimo-specs,download-fop,download-derby,download-postgresql,download-axis,download-saaj,download-wsdl4j,download-castor,download-jetty,download-slf4j,download-xalan,download-activation,download-avalon-framework,download-poi,download-chemistry,download-ecj,download-hadoop,download-protobuf">
+ <target name="download-tika">
+ <mkdir dir="lib"/>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/tika"/>
+ <param name="artifact-version" value="1.5"/>
+ <param name="artifact-name" value="tika-core"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/tika"/>
+ <param name="artifact-version" value="1.5"/>
+ <param name="artifact-name" value="tika-parsers"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/ccil/cowan/tagsoup"/>
+ <param name="artifact-version" value="1.2.1"/>
+ <param name="artifact-name" value="tagsoup"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/james"/>
+ <param name="artifact-name" value="apache-mime4j-core"/>
+ <param name="artifact-version" value="0.7.2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/james"/>
+ <param name="artifact-name" value="apache-mime4j-dom"/>
+ <param name="artifact-version" value="0.7.2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/gagravarr"/>
+ <param name="artifact-name" value="vorbis-java-tika"/>
+ <param name="artifact-version" value="0.1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/poi"/>
+ <param name="artifact-name" value="poi-scratchpad"/>
+ <param name="artifact-version" value="3.10-beta2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/drewnoakes"/>
+ <param name="artifact-name" value="metadata-extractor"/>
+ <param name="artifact-version" value="2.6.2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/aspectj"/>
+ <param name="artifact-name" value="aspectjrt"/>
+ <param name="artifact-version" value="1.6.11"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/uwyn"/>
+ <param name="artifact-name" value="jhighlight"/>
+ <param name="artifact-version" value="1.0"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/xmlbeans"/>
+ <param name="artifact-name" value="xmlbeans"/>
+ <param name="artifact-version" value="2.3.0"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/bouncycastle"/>
+ <param name="artifact-name" value="bcprov-jdk15"/>
+ <param name="artifact-version" value="1.45"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/tukaani"/>
+ <param name="artifact-name" value="xz"/>
+ <param name="artifact-version" value="1.2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/pdfbox"/>
+ <param name="artifact-name" value="jempbox"/>
+ <param name="artifact-version" value="1.8.4"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/pdfbox"/>
+ <param name="artifact-name" value="pdfbox"/>
+ <param name="artifact-version" value="1.8.4"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/pdfbox"/>
+ <param name="artifact-name" value="fontbox"/>
+ <param name="artifact-version" value="1.8.4"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/commons"/>
+ <param name="artifact-name" value="commons-compress"/>
+ <param name="artifact-version" value="1.5"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/googlecode/juniversalchardet"/>
+ <param name="artifact-name" value="juniversalchardet"/>
+ <param name="artifact-version" value="1.0.3"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="edu/ucar"/>
+ <param name="artifact-name" value="netcdf"/>
+ <param name="artifact-version" value="4.2-min"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="de/l3s/boilerpipe"/>
+ <param name="artifact-name" value="boilerpipe"/>
+ <param name="artifact-version" value="1.1.0"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="dom4j"/>
+ <param name="artifact-name" value="dom4j"/>
+ <param name="artifact-version" value="1.6.1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/ow2/asm"/>
+ <param name="artifact-name" value="asm-debug-all"/>
+ <param name="artifact-version" value="4.1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/adobe/xmp"/>
+ <param name="artifact-name" value="xmpcore"/>
+ <param name="artifact-version" value="5.1.2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/gagravarr"/>
+ <param name="artifact-name" value="vorbis-java-core"/>
+ <param name="artifact-version" value="0.1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/poi"/>
+ <param name="artifact-name" value="poi-ooxml"/>
+ <param name="artifact-version" value="3.10-beta2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/poi"/>
+ <param name="artifact-name" value="poi-ooxml-schemas"/>
+ <param name="artifact-version" value="3.10-beta2"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/bouncycastle"/>
+ <param name="artifact-name" value="bcmail-jdk15"/>
+ <param name="artifact-version" value="1.45"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="jdom"/>
+ <param name="artifact-name" value="jdom"/>
+ <param name="artifact-version" value="1.0"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="org/apache/geronimo/specs"/>
+ <param name="artifact-name" value="geronimo-stax-api_1.0_spec"/>
+ <param name="artifact-version" value="1.0.1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="rome"/>
+ <param name="artifact-name" value="rome"/>
+ <param name="artifact-version" value="0.9"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/googlecode/mp4parser"/>
+ <param name="artifact-name" value="isoparser"/>
+ <param name="artifact-version" value="1.0-RC-1"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ </target>
+
+ <target name="download-jackson">
+ <mkdir dir="lib"/>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/fasterxml/jackson/core"/>
+ <param name="artifact-version" value="2.1.3"/>
+ <param name="artifact-name" value="jackson-databind"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ <antcall target="download-via-maven"><param name="target" value="lib"/>
+ <param name="project-path" value="com/fasterxml/jackson/core"/>
+ <param name="artifact-version" value="2.1.2"/>
+ <param name="artifact-name" value="jackson-annotations"/>
+ <param name="artifact-type" value="jar"/>
+ </antcall>
+ </target>
+
+ <target name="make-core-deps" depends="download-jira-client,download-google-api-client,download-dropbox-client,download-solrj,download-zookeeper,download-httpcomponents,download-json,download-hsqldb,download-xerces,download-commons,download-elasticsearch-plugin,download-solr-plugins,download-sharepoint-plugins,download-jstl,download-xmlgraphics-commons,download-wstx-asl,download-xmlsec,download-xml-apis,download-wss4j,download-velocity,download-streambuffer,download-stax,download-servlet-api,download-xml-resolver,download-osgi,download-opensaml,download-mimepull,download-mail,download-log4j,download-junit,download-jaxws,download-glassfish,download-jaxb,download-tomcat,download-h2,download-h2-support,download-geronimo-specs,download-fop,download-derby,download-postgresql,download-axis,download-saaj,download-wsdl4j,download-castor,download-jetty,download-slf4j,download-xalan,download-activation,download-avalon-framework,download-poi,download-chemistry,download-ecj,download-hadoop,download-protobuf,download-tika,download-jackson">
<copy todir="lib">
<fileset dir="lib-license" includes="*.txt"/>
</copy>
diff --git a/connectors/amazoncloudsearch/build.xml b/connectors/amazoncloudsearch/build.xml
new file mode 100644
index 0000000..189e0ca
--- /dev/null
+++ b/connectors/amazoncloudsearch/build.xml
@@ -0,0 +1,134 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project name="amazoncloudsearch" default="all">
+
+ <property environment="env"/>
+ <condition property="mcf-dist" value="${env.MCFDISTPATH}">
+ <isset property="env.MCFDISTPATH"/>
+ </condition>
+ <property name="abs-dist" location="../../dist"/>
+ <condition property="mcf-dist" value="${abs-dist}">
+ <not>
+ <isset property="env.MCFDISTPATH"/>
+ </not>
+ </condition>
+
+ <import file="${mcf-dist}/connector-build.xml"/>
+
+ <path id="connector-classpath">
+ <path refid="mcf-connector-build.connector-classpath"/>
+ <fileset dir="../../lib">
+ <include name="httpclient*.jar"/>
+ <include name="jackson-core*.jar"/>
+ <include name="jackson-databind*.jar"/>
+ <include name="jackson-annotations*.jar"/>
+ <include name="tika-core*.jar"/>
+ <include name="tika-parsers*.jar"/>
+ <include name="tagsoup*.jar"/>
+ <include name="poi*.jar"/>
+ <include name="vorbis-java-tika*.jar"/>
+ <include name="vorbis-java-core*.jar"/>
+ <include name="netcdf*.jar"/>
+ <include name="apache-mime4j-core*.jar"/>
+ <include name="apache-mime4j-dom*.jar"/>
+ <include name="commons-compress*.jar"/>
+ <include name="commons-codec*.jar"/>
+ <include name="pdfbox*.jar"/>
+ <include name="fontbox*.jar"/>
+ <include name="jempbox*.jar"/>
+ <include name="commons-logging*.jar"/>
+ <include name="bcmail-jdk15*.jar"/>
+ <include name="bcprov-jdk15*.jar"/>
+ <include name="poi-scratchpad*.jar"/>
+ <include name="poi-ooxml*.jar"/>
+ <include name="poi-ooxml-schemas*.jar"/>
+ <include name="xmlbeans*.jar"/>
+ <include name="dom4j*.jar"/>
+ <include name="geronimo-stax-api_1.0_spec*.jar"/>
+ <include name="asm-debug-all*.jar"/>
+ <include name="isoparser*.jar"/>
+ <include name="aspectjrt*.jar"/>
+ <include name="metadata-extractor*.jar"/>
+ <include name="xmpcore*.jar"/>
+ <include name="xml-apis*.jar"/>
+ <include name="boilerpipe*.jar"/>
+ <include name="rome*.jar"/>
+ <include name="jdom*.jar"/>
+ <include name="xercesImpl*.jar"/>
+ <include name="vorbis-java-core*.jar"/>
+ <include name="juniversalchardet*.jar"/>
+ <include name="jhighlight*.jar"/>
+ </fileset>
+ </path>
+
+ <target name="lib" depends="mcf-connector-build.lib,precompile-check" if="canBuild">
+ <mkdir dir="dist/lib"/>
+ <copy todir="dist/lib">
+ <fileset dir="../../lib">
+ <include name="httpclient*.jar"/>
+ <include name="jackson-core*.jar"/>
+ <include name="jackson-databind*.jar"/>
+ <include name="jackson-annotations*.jar"/>
+ <include name="tika-core*.jar"/>
+ <include name="tika-parsers*.jar"/>
+ <include name="tagsoup*.jar"/>
+ <include name="poi*.jar"/>
+ <include name="vorbis-java-tika*.jar"/>
+ <include name="vorbis-java-core*.jar"/>
+ <include name="netcdf*.jar"/>
+ <include name="apache-mime4j-core*.jar"/>
+ <include name="apache-mime4j-dom*.jar"/>
+ <include name="commons-compress*.jar"/>
+ <include name="commons-codec*.jar"/>
+ <include name="pdfbox*.jar"/>
+ <include name="fontbox*.jar"/>
+ <include name="jempbox*.jar"/>
+ <include name="commons-logging*.jar"/>
+ <include name="bcmail-jdk15*.jar"/>
+ <include name="bcprov-jdk15*.jar"/>
+ <include name="poi-scratchpad*.jar"/>
+ <include name="poi-ooxml*.jar"/>
+ <include name="poi-ooxml-schemas*.jar"/>
+ <include name="xmlbeans*.jar"/>
+ <include name="dom4j*.jar"/>
+ <include name="geronimo-stax-api_1.0_spec*.jar"/>
+ <include name="asm-debug-all*.jar"/>
+ <include name="isoparser*.jar"/>
+ <include name="aspectjrt*.jar"/>
+ <include name="metadata-extractor*.jar"/>
+ <include name="xmpcore*.jar"/>
+ <include name="xml-apis*.jar"/>
+ <include name="boilerpipe*.jar"/>
+ <include name="rome*.jar"/>
+ <include name="jdom*.jar"/>
+ <include name="xercesImpl*.jar"/>
+ <include name="vorbis-java-core*.jar"/>
+ <include name="juniversalchardet*.jar"/>
+ <include name="jhighlight*.jar"/>
+ </fileset>
+ </copy>
+ </target>
+
+ <target name="deliver-connector" depends="mcf-connector-build.deliver-connector">
+ <antcall target="general-add-output-connector">
+ <param name="connector-label" value="AmazonCloudSearch"/>
+ <param name="connector-class" value="org.apache.manifoldcf.agents.output.amazoncloudsearch.AmazonCloudSearchConnector"/>
+ </antcall>
+ </target>
+
+</project>
diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
new file mode 100644
index 0000000..80a9af7
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConfig.java
@@ -0,0 +1,52 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+/** Parameters for AmazonCloudSearch output connector.
+ */
+public class AmazonCloudSearchConfig {
+
+ // Configuration parameters
+ public static final String SERVER_HOST="serverhost";
+ public static final String SERVER_PATH="serverpath";
+ public static final String PROXY_PROTOCOL="proxyprotocol";
+ public static final String PROXY_HOST="proxyhost";
+ public static final String PROXY_PORT="proxyport";
+
+ public static final String SERVER_HOST_DEFAULT = "";
+ public static final String SERVER_PATH_DEFAULT = "/2013-01-01/documents/batch";
+ public static final String PROXY_PROTOCOL_DEFAULT = "http";
+ public static final String PROXY_HOST_DEFAULT = "";
+ public static final String PROXY_PORT_DEFAULT = "";
+
+ // Specification nodes and values
+ public static final String NODE_MAXLENGTH = "maxlength";
+ public static final String MAXLENGTH_DEFAULT = "";
+ public static final String NODE_MIMETYPES = "mimetypes";
+ public static final String MIMETYPES_DEFAULT = "";
+ public static final String NODE_EXTENSIONS = "extensions";
+ public static final String EXTENSIONS_DEFAULT = "";
+ public static final String NODE_FIELDMAP = "fieldmap";
+ public static final String NODE_KEEPMETADATA = "keepAllMetadata";
+ public static final String ATTRIBUTE_SOURCE = "source";
+ public static final String ATTRIBUTE_TARGET = "target";
+ public static final String ATTRIBUTE_VALUE = "value";
+
+}
diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
new file mode 100644
index 0000000..cd0b44c
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/AmazonCloudSearchConnector.java
@@ -0,0 +1,1126 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InterruptedIOException;
+import java.io.StringReader;
+import java.io.BufferedReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Locale;
+import java.util.Set;
+import java.util.HashSet;
+
+import org.apache.commons.io.FilenameUtils;
+import org.apache.http.Consts;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpHost;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.ClientProtocolException;
+import org.apache.http.client.config.RequestConfig;
+import org.apache.http.client.methods.HttpPost;
+import org.apache.http.entity.StringEntity;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.HttpClients;
+import org.apache.http.util.EntityUtils;
+import org.apache.manifoldcf.agents.interfaces.IOutputAddActivity;
+import org.apache.manifoldcf.agents.interfaces.IOutputRemoveActivity;
+import org.apache.manifoldcf.agents.interfaces.OutputSpecification;
+import org.apache.manifoldcf.agents.interfaces.RepositoryDocument;
+import org.apache.manifoldcf.agents.interfaces.ServiceInterruption;
+import org.apache.manifoldcf.agents.output.BaseOutputConnector;
+import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
+import org.apache.manifoldcf.core.interfaces.ConfigParams;
+import org.apache.manifoldcf.core.interfaces.ConfigurationNode;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IThreadContext;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+import org.apache.manifoldcf.core.interfaces.IPostParameters;
+import org.apache.manifoldcf.core.interfaces.IPasswordMapperActivity;
+import org.apache.manifoldcf.core.interfaces.SpecificationNode;
+import org.apache.manifoldcf.core.system.ManifoldCF;
+import org.apache.manifoldcf.crawler.system.Logging;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+public class AmazonCloudSearchConnector extends BaseOutputConnector {
+
+ /** Ingestion activity */
+ public final static String INGEST_ACTIVITY = "document ingest";
+ /** Document removal activity */
+ public final static String REMOVE_ACTIVITY = "document deletion";
+
+ /** Forward to the javascript to check the configuration parameters */
+ private static final String EDIT_CONFIGURATION_JS = "editConfiguration.js";
+
+ /** Forward to the HTML template to edit the configuration parameters */
+ private static final String EDIT_CONFIGURATION_HTML = "editConfiguration.html";
+
+ /** Forward to the HTML template to view the configuration parameters */
+ private static final String VIEW_CONFIGURATION_HTML = "viewConfiguration.html";
+
+ /** Forward to the javascript to check the specification parameters for the job */
+ private static final String EDIT_SPECIFICATION_JS = "editSpecification.js";
+
+ private static final String EDIT_SPECIFICATION_CONTENTS_HTML = "editSpecification_Contents.html";
+ private static final String EDIT_SPECIFICATION_FIELDMAPPING_HTML = "editSpecification_FieldMapping.html";
+
+ private static final String VIEW_SPECIFICATION_HTML = "viewSpecification.html";
+
+ /** Local connection */
+ protected HttpPost poster = null;
+
+ /** cloudsearch field name for file body text. */
+ private static final String FILE_BODY_TEXT_FIELDNAME = "f_bodytext";
+
+ /** Constructor.
+ */
+ public AmazonCloudSearchConnector(){
+ }
+
+ /** Return the list of activities that this connector supports (i.e. writes into the log).
+ *@return the list.
+ */
+ @Override
+ public String[] getActivitiesList()
+ {
+ return new String[]{INGEST_ACTIVITY,REMOVE_ACTIVITY};
+ }
+
+ /** Connect.
+ *@param configParameters is the set of configuration parameters, which
+ * in this case describe the target appliance, basic auth configuration, etc. (This formerly came
+ * out of the ini file.)
+ */
+ @Override
+ public void connect(ConfigParams configParameters)
+ {
+ super.connect(configParameters);
+ }
+
+ /** This method is called to assess whether to count this connector instance should
+ * actually be counted as being connected.
+ *@return true if the connector instance is actually connected.
+ */
+ @Override
+ public boolean isConnected()
+ {
+ return poster != null;
+ }
+
+ /** Close the connection. Call this before discarding the connection.
+ */
+ @Override
+ public void disconnect()
+ throws ManifoldCFException
+ {
+ if (poster != null)
+ {
+ poster = null;
+ }
+ super.disconnect();
+ }
+
+ /** Set up a session */
+ protected void getSession()
+ throws ManifoldCFException
+ {
+ String serverHost = params.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+ if (serverHost == null)
+ throw new ManifoldCFException("Server host parameter required");
+ String serverPath = params.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+ if (serverPath == null)
+ throw new ManifoldCFException("Server path parameter required");
+ String proxyProtocol = params.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
+ String proxyHost = params.getParameter(AmazonCloudSearchConfig.PROXY_HOST);
+ String proxyPort = params.getParameter(AmazonCloudSearchConfig.PROXY_PORT);
+
+ // Https is OK here without a custom trust store because we know we are talking to an Amazon instance, which has certs that
+ // are presumably non-custom.
+ String urlStr = "https://" + serverHost + serverPath;
+ poster = new HttpPost(urlStr);
+
+ //set proxy
+ if(proxyHost != null && proxyHost.length() > 0)
+ {
+ try
+ {
+ HttpHost proxy = new HttpHost(proxyHost, Integer.parseInt(proxyPort), proxyProtocol);
+ RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
+ poster.setConfig(config);
+ }
+ catch (NumberFormatException e)
+ {
+ throw new ManifoldCFException("Number format exception: "+e.getMessage(),e);
+ }
+ }
+
+ poster.addHeader("Content-Type", "application/json");
+ }
+
+ /** Test the connection. Returns a string describing the connection integrity.
+ *@return the connection's status as a displayable string.
+ */
+ @Override
+ public String check() throws ManifoldCFException {
+ try {
+ getSession();
+ String responsbody = postData("[]");
+ String status = "";
+ try
+ {
+ status = getStatusFromJsonResponse(responsbody);
+ } catch (ManifoldCFException e)
+ {
+ Logging.connectors.debug(e);
+ return "Could not get status from response body. Check Access Policy setting of your domain of Amazon CloudSearch.: " + e.getMessage();
+ }
+
+ // check status message
+ String message = "";
+ if ("error".equals(status)) {
+ JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+ while (parser.nextToken() != JsonToken.END_OBJECT) {
+ String name = parser.getCurrentName();
+ if ("errors".equalsIgnoreCase(name)) {
+ message = parseMessage(parser);
+ break;
+ }
+ }
+ }
+ if ("error".equalsIgnoreCase(status)
+ && "batch must contain at least one operation".equals(message)) {
+ return "Connection working.";
+ }
+ return "Connection NOT working.";
+
+ } catch (ClientProtocolException e) {
+ Logging.connectors.debug(e);
+ return "Protocol exception: "+e.getMessage();
+ } catch (IOException e) {
+ Logging.connectors.debug(e);
+ return "IO exception: "+e.getMessage();
+ } catch (ServiceInterruption e) {
+ Logging.connectors.debug(e);
+ return "Transient exception: "+e.getMessage();
+ }
+ }
+
+ private String getStatusFromJsonResponse(String responsbody) throws ManifoldCFException {
+ try {
+ JsonParser parser = new JsonFactory().createJsonParser(responsbody);
+ while (parser.nextToken() != JsonToken.END_OBJECT)
+ {
+ String name = parser.getCurrentName();
+ if("status".equalsIgnoreCase(name)){
+ parser.nextToken();
+ return parser.getText();
+ }
+ }
+ } catch (JsonParseException e) {
+ throw new ManifoldCFException(e);
+ } catch (IOException e) {
+ throw new ManifoldCFException(e);
+ }
+ return null;
+ }
+
+ private String parseMessage(JsonParser parser) throws JsonParseException, IOException {
+ while(parser.nextToken() != JsonToken.END_ARRAY){
+ String name = parser.getCurrentName();
+ if("message".equalsIgnoreCase(name)){
+ parser.nextToken();
+ return parser.getText();
+ }
+ }
+ return null;
+ }
+
+ /** Get an output version string, given an output specification. The output version string is used to uniquely describe the pertinent details of
+ * the output specification and the configuration, to allow the Connector Framework to determine whether a document will need to be output again.
+ * Note that the contents of the document cannot be considered by this method, and that a different version string (defined in IRepositoryConnector)
+ * is used to describe the version of the actual document.
+ *
+ * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+ * necessary.
+ *@param os is the current output specification for the job that is doing the crawling.
+ *@return a string, of unlimited length, which uniquely describes output configuration and specification in such a way that if two such strings are equal,
+ * the document will not need to be sent again to the output data store.
+ */
+ @Override
+ public String getOutputDescription(OutputSpecification os)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ SpecPacker sp = new SpecPacker(os);
+ return sp.toPackedString();
+ }
+
+ /** Detect if a mime type is indexable or not. This method is used by participating repository connectors to pre-filter the number of
+ * unusable documents that will be passed to this output connector.
+ *@param outputDescription is the document's output version.
+ *@param mimeType is the mime type of the document.
+ *@return true if the mime type is indexable by this connector.
+ */
+ public boolean checkMimeTypeIndexable(String outputDescription, String mimeType)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ SpecPacker sp = new SpecPacker(outputDescription);
+ if (sp.checkMimeType(mimeType))
+ return super.checkMimeTypeIndexable(outputDescription, mimeType);
+ else
+ return false;
+ }
+
+ @Override
+ public boolean checkLengthIndexable(String outputDescription, long length)
+ throws ManifoldCFException, ServiceInterruption {
+ SpecPacker sp = new SpecPacker(outputDescription);
+ if (sp.checkLengthIndexable(length))
+ return super.checkLengthIndexable(outputDescription, length);
+ else
+ return false;
+ }
+
+ @Override
+ public boolean checkURLIndexable(String outputDescription, String url)
+ throws ManifoldCFException, ServiceInterruption {
+ SpecPacker sp = new SpecPacker(outputDescription);
+ if (sp.checkURLIndexable(url))
+ return super.checkURLIndexable(outputDescription, url);
+ else
+ return false;
+ }
+
+ /** Add (or replace) a document in the output data store using the connector.
+ * This method presumes that the connector object has been configured, and it is thus able to communicate with the output data store should that be
+ * necessary.
+ * The OutputSpecification is *not* provided to this method, because the goal is consistency, and if output is done it must be consistent with the
+ * output description, since that was what was partly used to determine if output should be taking place. So it may be necessary for this method to decode
+ * an output description string in order to determine what should be done.
+ *@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
+ * and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+ *@param outputDescription is the description string that was constructed for this document by the getOutputDescription() method.
+ *@param document is the document data to be processed (handed to the output data store).
+ *@param authorityNameString is the name of the authority responsible for authorizing any access tokens passed in with the repository document. May be null.
+ *@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
+ *@return the document status (accepted or permanently rejected).
+ */
+ @Override
+ public int addOrReplaceDocument(String documentURI, String outputDescription, RepositoryDocument document, String authorityNameString, IOutputAddActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // Establish a session
+ getSession();
+
+ SpecPacker sp = new SpecPacker(outputDescription);
+
+ String jsondata = "";
+ try {
+ //build json..
+ SDFModel model = new SDFModel();
+ Document doc = model.new Document();
+ doc.setType("add");
+ doc.setId(ManifoldCF.hash(documentURI));
+
+ HashMap fields = new HashMap();
+ Metadata metadata = extractBinaryFile(document, fields);
+
+ Iterator<String> itr = document.getFields();
+ while(itr.hasNext())
+ {
+ String fName = itr.next();
+ Object[] value = document.getField(fName);
+ String target = sp.getMapping(fName);
+ if(target!=null)
+ {
+ fields.put(target, value);
+ }
+ else
+ {
+ if(sp.keepAllMetadata())
+ {
+ fields.put(fName, value);
+ }
+ }
+ }
+
+ //metadata of binary files.
+ String[] metaNames = metadata.names();
+ for(String mName : metaNames){
+ String value = metadata.get(mName);
+ String target = sp.getMapping(mName);
+ if(target!=null)
+ {
+ fields.put(target, value);
+ }
+ else
+ {
+ if(sp.keepAllMetadata())
+ {
+ fields.put(mName, value);
+ }
+ }
+ }
+ doc.setFields(fields);
+ model.addDocument(doc);
+
+ //generate json data.
+ jsondata = model.toJSON();
+ }
+ catch (SAXException e) {
+ // if document data could not be converted to JSON by jackson.
+ Logging.connectors.debug(e);
+ throw new ManifoldCFException(e);
+ } catch (JsonProcessingException e) {
+ // if document data could not be converted to JSON by jackson.
+ Logging.connectors.debug(e);
+ throw new ManifoldCFException(e);
+ } catch (TikaException e) {
+ // if document could not be parsed by tika.
+ Logging.connectors.debug(e);
+ return DOCUMENTSTATUS_REJECTED;
+ } catch (IOException e) {
+ // if document data could not be read when the document parsing by tika.
+ Logging.connectors.debug(e);
+ throw new ManifoldCFException(e);
+ }
+
+ //post data..
+ String responsbody = postData(jsondata);
+
+ // check status
+ String status = getStatusFromJsonResponse(responsbody);
+ if("success".equals(status))
+ {
+ activities.recordActivity(null,INGEST_ACTIVITY,new Long(document.getBinaryLength()),documentURI,"OK",null);
+ return DOCUMENTSTATUS_ACCEPTED;
+ }
+ else {
+ throw new ManifoldCFException("recieved error status from service after feeding document. response body : " + responsbody);
+ }
+ }
+
+ private Metadata extractBinaryFile(RepositoryDocument document, HashMap fields)
+ throws IOException, SAXException, TikaException {
+
+ //extract body text and metadata fields from binary file.
+ InputStream is = document.getBinaryStream();
+ Parser parser = new AutoDetectParser();
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ parser.parse(is, handler, metadata, new ParseContext());
+ String bodyStr = handler.toString();
+ if(bodyStr != null){
+ bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
+ fields.put(FILE_BODY_TEXT_FIELDNAME, bodyStr);
+ }
+ return metadata;
+ }
+
+ /** Remove a document using the connector.
+ * Note that the last outputDescription is included, since it may be necessary for the connector to use such information to know how to properly remove the document.
+ *@param documentURI is the URI of the document. The URI is presumed to be the unique identifier which the output data store will use to process
+ * and serve the document. This URI is constructed by the repository connector which fetches the document, and is thus universal across all output connectors.
+ *@param outputDescription is the last description string that was constructed for this document by the getOutputDescription() method above.
+ *@param activities is the handle to an object that the implementer of an output connector may use to perform operations, such as logging processing activity.
+ */
+ @Override
+ public void removeDocument(String documentURI, String outputDescription, IOutputRemoveActivity activities)
+ throws ManifoldCFException, ServiceInterruption
+ {
+ // Establish a session
+ getSession();
+
+ String jsonData = "";
+ try {
+ SDFModel model = new SDFModel();
+ SDFModel.Document doc = model.new Document();
+ doc.setType("delete");
+ doc.setId(documentURI);
+ model.addDocument(doc);
+ jsonData = model.toJSON();
+ } catch (JsonProcessingException e) {
+ throw new ManifoldCFException(e);
+ }
+ String responsbody = postData(jsonData);
+
+ // check status
+ String status = getStatusFromJsonResponse(responsbody);
+ if("success".equals(status))
+ {
+ activities.recordActivity(null,REMOVE_ACTIVITY,null,documentURI,"OK",null);
+ }
+ else {
+ throw new ManifoldCFException("recieved error status from service after feeding document.");
+ }
+ }
+
+ /**
+ * Fill in a Server tab configuration parameter map for calling a Velocity
+ * template.
+ *
+ * @param newMap is the map to fill in
+ * @param parameters is the current set of configuration parameters
+ */
+ private static void fillInServerConfigurationMap(Map<String, Object> newMap, IPasswordMapperActivity mapper, ConfigParams parameters) {
+ String serverhost = parameters.getParameter(AmazonCloudSearchConfig.SERVER_HOST);
+ String serverpath = parameters.getParameter(AmazonCloudSearchConfig.SERVER_PATH);
+ String proxyprotocol = parameters.getParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL);
+ String proxyhost = parameters.getParameter(AmazonCloudSearchConfig.PROXY_HOST);
+ String proxyport = parameters.getParameter(AmazonCloudSearchConfig.PROXY_PORT);
+
+ if (serverhost == null)
+ serverhost = AmazonCloudSearchConfig.SERVER_HOST_DEFAULT;
+ if (serverpath == null)
+ serverpath = AmazonCloudSearchConfig.SERVER_PATH_DEFAULT;
+ if (proxyprotocol == null)
+ proxyprotocol = AmazonCloudSearchConfig.PROXY_PROTOCOL_DEFAULT;
+ if (proxyhost == null)
+ proxyhost = AmazonCloudSearchConfig.PROXY_HOST_DEFAULT;
+ if (proxyport == null)
+ proxyport = AmazonCloudSearchConfig.PROXY_PORT_DEFAULT;
+
+ newMap.put("SERVERHOST", serverhost);
+ newMap.put("SERVERPATH", serverpath);
+ newMap.put("PROXYPROTOCOL", proxyprotocol);
+ newMap.put("PROXYHOST", proxyhost);
+ newMap.put("PROXYPORT", proxyport);
+ }
+
+ /**
+ * View configuration. This method is called in the body section of the
+ * connector's view configuration page. Its purpose is to present the
+ * connection information to the user. The coder can presume that the HTML
+ * that is output from this configuration will be within appropriate <html>
+ * and <body> tags.
+ *
+ * @param threadContext is the local thread context.
+ * @param out is the output to which any HTML should be sent.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ */
+ @Override
+ public void viewConfiguration(IThreadContext threadContext, IHTTPOutput out,
+ Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Fill in map from each tab
+ fillInServerConfigurationMap(paramMap, out, parameters);
+
+ Messages.outputResourceWithVelocity(out,locale,VIEW_CONFIGURATION_HTML,paramMap);
+ }
+
+ /**
+ *
+ * Output the configuration header section. This method is called in the
+ * head section of the connector's configuration page. Its purpose is to add
+ * the required tabs to the list, and to output any javascript methods that
+ * might be needed by the configuration editing HTML.
+ *
+ * @param threadContext is the local thread context.
+ * @param out is the output to which any HTML should be sent.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ * @param tabsArray is an array of tab names. Add to this array any tab
+ * names that are specific to the connector.
+ */
+ @Override
+ public void outputConfigurationHeader(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray)
+ throws ManifoldCFException, IOException {
+ // Add the Server tab
+ tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ServerTabName"));
+ // Map the parameters
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Fill in the parameters from each tab
+ fillInServerConfigurationMap(paramMap, out, parameters);
+
+ // Output the Javascript - only one Velocity template for all tabs
+ Messages.outputResourceWithVelocity(out,locale,EDIT_CONFIGURATION_JS,paramMap);
+ }
+
+ @Override
+ public void outputConfigurationBody(IThreadContext threadContext,
+ IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName)
+ throws ManifoldCFException, IOException {
+
+ // Call the Velocity templates for each tab
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Set the tab name
+ paramMap.put("TABNAME", tabName);
+
+ // Fill in the parameters
+ fillInServerConfigurationMap(paramMap, out, parameters);
+
+ // Server tab
+ Messages.outputResourceWithVelocity(out,locale,EDIT_CONFIGURATION_HTML,paramMap);
+ }
+
+ /**
+ * Process a configuration post. This method is called at the start of the
+ * connector's configuration page, whenever there is a possibility that form
+ * data for a connection has been posted. Its purpose is to gather form
+ * information and modify the configuration parameters accordingly. The name
+ * of the posted form is "editconnection".
+ *
+ * @param threadContext is the local thread context.
+ * @param variableContext is the set of variables available from the post,
+ * including binary file post information.
+ * @param parameters are the configuration parameters, as they currently
+ * exist, for this connection being configured.
+ * @return null if all is well, or a string error message if there is an
+ * error that should prevent saving of the connection (and cause a
+ * redirection to an error page).
+ *
+ */
+ @Override
+ public String processConfigurationPost(IThreadContext threadContext,
+ IPostParameters variableContext, ConfigParams parameters)
+ throws ManifoldCFException {
+
+ // Server tab parameters
+ String serverhost = variableContext.getParameter("serverhost");
+ if (serverhost != null)
+ parameters.setParameter(AmazonCloudSearchConfig.SERVER_HOST, serverhost);
+ String serverpath = variableContext.getParameter("serverpath");
+ if (serverpath != null)
+ parameters.setParameter(AmazonCloudSearchConfig.SERVER_PATH, serverpath);
+ String proxyprotocol = variableContext.getParameter("proxyprotocol");
+ if (proxyprotocol != null)
+ parameters.setParameter(AmazonCloudSearchConfig.PROXY_PROTOCOL, proxyprotocol);
+ String proxyhost = variableContext.getParameter("proxyhost");
+ if (proxyhost != null)
+ parameters.setParameter(AmazonCloudSearchConfig.PROXY_HOST, proxyhost);
+ String proxyport = variableContext.getParameter("proxyport");
+ if (proxyport != null)
+ parameters.setParameter(AmazonCloudSearchConfig.PROXY_PORT, proxyport);
+
+ return null;
+ }
+
+ private String postData(String jsonData) throws ServiceInterruption, ManifoldCFException {
+ CloseableHttpClient httpclient = HttpClients.createDefault();
+ try {
+ poster.setEntity(new StringEntity(jsonData, Consts.UTF_8));
+ HttpResponse res = httpclient.execute(poster);
+
+ HttpEntity resEntity = res.getEntity();
+ return EntityUtils.toString(resEntity);
+ } catch (ClientProtocolException e) {
+ throw new ManifoldCFException(e);
+ } catch (IOException e) {
+ handleIOException(e);
+ } finally {
+ try {
+ httpclient.close();
+ } catch (IOException e) {
+ //do nothing
+ }
+ }
+ return null;
+ }
+
+ private static void handleIOException(IOException e)
+ throws ManifoldCFException, ServiceInterruption {
+ if (!(e instanceof java.net.SocketTimeoutException)
+ && (e instanceof InterruptedIOException)) {
+ throw new ManifoldCFException("Interrupted: " + e.getMessage(), e,
+ ManifoldCFException.INTERRUPTED);
+ }
+ Logging.connectors.warn(
+ "Amazon CloudSearch: IO exception: " + e.getMessage(), e);
+ long currentTime = System.currentTimeMillis();
+ throw new ServiceInterruption("IO exception: " + e.getMessage(), e,
+ currentTime + 300000L, currentTime + 3 * 60 * 60000L, -1, false);
+ }
+
+ protected static void fillInFieldMappingSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
+ {
+ // Prep for field mappings
+ List<Map<String,String>> fieldMappings = new ArrayList<Map<String,String>>();
+ String keepAllMetadataValue = "true";
+ for (int i = 0; i < os.getChildCount(); i++)
+ {
+ SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
+ String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
+ String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+ String targetDisplay;
+ if (target == null)
+ {
+ target = "";
+ targetDisplay = "(remove)";
+ }
+ else
+ targetDisplay = target;
+ Map<String,String> fieldMapping = new HashMap<String,String>();
+ fieldMapping.put("SOURCE",source);
+ fieldMapping.put("TARGET",target);
+ fieldMapping.put("TARGETDISPLAY",targetDisplay);
+ fieldMappings.add(fieldMapping);
+ }
+ else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
+ {
+ keepAllMetadataValue = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ }
+ }
+ paramMap.put("FIELDMAPPINGS",fieldMappings);
+ paramMap.put("KEEPALLMETADATA",keepAllMetadataValue);
+ }
+
+ protected static void fillInContentsSpecificationMap(Map<String,Object> paramMap, OutputSpecification os)
+ {
+ String maxFileSize = AmazonCloudSearchConfig.MAXLENGTH_DEFAULT;
+ String allowedMimeTypes = AmazonCloudSearchConfig.MIMETYPES_DEFAULT;
+ String allowedFileExtensions = AmazonCloudSearchConfig.EXTENSIONS_DEFAULT;
+ for (int i = 0; i < os.getChildCount(); i++)
+ {
+ SpecificationNode sn = os.getChild(i);
+ if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
+ maxFileSize = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
+ allowedMimeTypes = sn.getValue();
+ else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
+ allowedFileExtensions = sn.getValue();
+ }
+ paramMap.put("MAXFILESIZE",maxFileSize);
+ paramMap.put("MIMETYPES",allowedMimeTypes);
+ paramMap.put("EXTENSIONS",allowedFileExtensions);
+ }
+
+ /**
+ * Output the specification header section. This method is called in the head
+ * section of a job page which has selected an output connection of the
+ * current type. Its purpose is to add the required tabs to the list, and to
+ * output any javascript methods that might be needed by the job editing HTML.
+ *
+ * @param out is the output to which any HTML should be sent.
+ * @param os is the current output specification for this job.
+ * @param tabsArray is an array of tab names. Add to this array any tab names
+ * that are specific to the connector.
+ */
+ @Override
+ public void outputSpecificationHeader(IHTTPOutput out, Locale locale,
+ OutputSpecification os, List<String> tabsArray)
+ throws ManifoldCFException, IOException
+ {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.FieldMappingTabName"));
+ tabsArray.add(Messages.getString(locale, "AmazonCloudSearchOutputConnector.ContentsTabName"));
+
+ // Fill in the specification header map, using data from all tabs.
+ fillInFieldMappingSpecificationMap(paramMap, os);
+ fillInContentsSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_JS,paramMap);
+ }
+
+ /** Output the specification body section.
+ * This method is called in the body section of a job page which has selected an output connection of the current type. Its purpose is to present the required form elements for editing.
+ * The coder can presume that the HTML that is output from this configuration will be within appropriate <html>, <body>, and <form> tags. The name of the
+ * form is "editjob".
+ *@param out is the output to which any HTML should be sent.
+ *@param os is the current output specification for this job.
+ *@param tabName is the current tab name.
+ */
+ @Override
+ public void outputSpecificationBody(IHTTPOutput out, Locale locale, OutputSpecification os, String tabName)
+ throws ManifoldCFException, IOException
+ {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Set the tab name
+ paramMap.put("TABNAME", tabName);
+
+ // Fill in the field mapping tab data
+ fillInFieldMappingSpecificationMap(paramMap, os);
+ fillInContentsSpecificationMap(paramMap, os);
+ Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_CONTENTS_HTML,paramMap);
+ Messages.outputResourceWithVelocity(out,locale,EDIT_SPECIFICATION_FIELDMAPPING_HTML,paramMap);
+ }
+
+ /** Process a specification post.
+ * This method is called at the start of job's edit or view page, whenever there is a possibility that form data for a connection has been
+ * posted. Its purpose is to gather form information and modify the output specification accordingly.
+ * The name of the posted form is "editjob".
+ *@param variableContext contains the post data, including binary file-upload information.
+ *@param os is the current output specification for this job.
+ *@return null if all is well, or a string error message if there is an error that should prevent saving of the job (and cause a redirection to an error page).
+ */
+ @Override
+ public String processSpecificationPost(IPostParameters variableContext,
+ Locale locale, OutputSpecification os) throws ManifoldCFException {
+ String x;
+
+ x = variableContext.getParameter("maxfilesize");
+ if (x != null)
+ {
+ int i = 0;
+ while (i < os.getChildCount())
+ {
+ SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH))
+ os.removeChild(i);
+ else
+ i++;
+ }
+ SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MAXLENGTH);
+ sn.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE,x);
+ os.addChild(os.getChildCount(),sn);
+ }
+
+ x = variableContext.getParameter("mimetypes");
+ if (x != null)
+ {
+ int i = 0;
+ while (i < os.getChildCount())
+ {
+ SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES))
+ os.removeChild(i);
+ else
+ i++;
+ }
+ SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_MIMETYPES);
+ sn.setValue(x);
+ os.addChild(os.getChildCount(),sn);
+ }
+
+ x = variableContext.getParameter("extensions");
+ if (x != null)
+ {
+ int i = 0;
+ while (i < os.getChildCount())
+ {
+ SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS))
+ os.removeChild(i);
+ else
+ i++;
+ }
+ SpecificationNode sn = new SpecificationNode(AmazonCloudSearchConfig.NODE_EXTENSIONS);
+ sn.setValue(x);
+ os.addChild(os.getChildCount(),sn);
+ }
+
+ x = variableContext.getParameter("cloudsearch_fieldmapping_count");
+ if (x != null && x.length() > 0)
+ {
+ // About to gather the fieldmapping nodes, so get rid of the old ones.
+ int i = 0;
+ while (i < os.getChildCount())
+ {
+ SpecificationNode node = os.getChild(i);
+ if (node.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP) || node.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA))
+ os.removeChild(i);
+ else
+ i++;
+ }
+ int count = Integer.parseInt(x);
+ i = 0;
+ while (i < count)
+ {
+ String prefix = "cloudsearch_fieldmapping_";
+ String suffix = "_"+Integer.toString(i);
+ String op = variableContext.getParameter(prefix+"op"+suffix);
+ if (op == null || !op.equals("Delete"))
+ {
+ // Gather the fieldmap etc.
+ String source = variableContext.getParameter(prefix+"source"+suffix);
+ String target = variableContext.getParameter(prefix+"target"+suffix);
+ if (target == null)
+ target = "";
+ SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
+ os.addChild(os.getChildCount(),node);
+ }
+ i++;
+ }
+
+ String addop = variableContext.getParameter("cloudsearch_fieldmapping_op");
+ if (addop != null && addop.equals("Add"))
+ {
+ String source = variableContext.getParameter("cloudsearch_fieldmapping_source");
+ String target = variableContext.getParameter("cloudsearch_fieldmapping_target");
+ if (target == null)
+ target = "";
+ SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_FIELDMAP);
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE,source);
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_TARGET,target);
+ os.addChild(os.getChildCount(),node);
+ }
+
+ // Gather the keep all metadata parameter to be the last one
+ SpecificationNode node = new SpecificationNode(AmazonCloudSearchConfig.NODE_KEEPMETADATA);
+ String keepAll = variableContext.getParameter("cloudsearch_keepallmetadata");
+ if (keepAll != null)
+ {
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, keepAll);
+ }
+ else
+ {
+ node.setAttribute(AmazonCloudSearchConfig.ATTRIBUTE_VALUE, "false");
+ }
+ // Add the new keepallmetadata config parameter
+ os.addChild(os.getChildCount(), node);
+ }
+
+ return null;
+ }
+
+
+ /** View specification.
+ * This method is called in the body section of a job's view page. Its purpose is to present the output specification information to the user.
+ * The coder can presume that the HTML that is output from this configuration will be within appropriate <html> and <body> tags.
+ *@param out is the output to which any HTML should be sent.
+ *@param os is the current output specification for this job.
+ */
+ @Override
+ public void viewSpecification(IHTTPOutput out, Locale locale, OutputSpecification os)
+ throws ManifoldCFException, IOException
+ {
+ Map<String, Object> paramMap = new HashMap<String, Object>();
+
+ // Fill in the map with data from all tabs
+ fillInFieldMappingSpecificationMap(paramMap, os);
+ fillInContentsSpecificationMap(paramMap, os);
+
+ Messages.outputResourceWithVelocity(out,locale,VIEW_SPECIFICATION_HTML,paramMap);
+
+ }
+
+ protected static void fillSet(Set<String> set, String input) {
+ try
+ {
+ StringReader sr = new StringReader(input);
+ BufferedReader br = new BufferedReader(sr);
+ String line = null;
+ while ((line = br.readLine()) != null)
+ {
+ line = line.trim();
+ if (line.length() > 0)
+ set.add(line);
+ }
+ }
+ catch (IOException e)
+ {
+ // Should never happen
+ throw new RuntimeException("IO exception reading strings: "+e.getMessage(),e);
+ }
+ }
+
+ protected static class SpecPacker {
+
+ private final Map<String,String> sourceTargets = new HashMap<String,String>();
+ private final boolean keepAllMetadata;
+ private final Set<String> extensions = new HashSet<String>();
+ private final Set<String> mimeTypes = new HashSet<String>();
+ private final Long lengthCutoff;
+
+ public SpecPacker(OutputSpecification os) {
+ boolean keepAllMetadata = true;
+ Long lengthCutoff = null;
+ String extensions = null;
+ String mimeTypes = null;
+ for (int i = 0; i < os.getChildCount(); i++) {
+ SpecificationNode sn = os.getChild(i);
+
+ if(sn.getType().equals(AmazonCloudSearchConfig.NODE_KEEPMETADATA)) {
+ String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ keepAllMetadata = Boolean.parseBoolean(value);
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_FIELDMAP)) {
+ String source = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_SOURCE);
+ String target = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_TARGET);
+
+ if (target == null) {
+ target = "";
+ }
+ sourceTargets.put(source, target);
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MIMETYPES)) {
+ mimeTypes = sn.getValue();
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_EXTENSIONS)) {
+ extensions = sn.getValue();
+ } else if (sn.getType().equals(AmazonCloudSearchConfig.NODE_MAXLENGTH)) {
+ String value = sn.getAttributeValue(AmazonCloudSearchConfig.ATTRIBUTE_VALUE);
+ lengthCutoff = new Long(value);
+ }
+ }
+ this.keepAllMetadata = keepAllMetadata;
+ this.lengthCutoff = lengthCutoff;
+ fillSet(this.extensions, extensions);
+ fillSet(this.mimeTypes, mimeTypes);
+ }
+
+ public SpecPacker(String packedString) {
+
+ int index = 0;
+
+ // Mappings
+ final List<String> packedMappings = new ArrayList<String>();
+ index = unpackList(packedMappings,packedString,index,'+');
+ String[] fixedList = new String[2];
+ for (String packedMapping : packedMappings) {
+ unpackFixedList(fixedList,packedMapping,0,':');
+ sourceTargets.put(fixedList[0], fixedList[1]);
+ }
+
+ // Keep all metadata
+ if (packedString.length() > index)
+ keepAllMetadata = (packedString.charAt(index++) == '+');
+ else
+ keepAllMetadata = true;
+
+ // Max length
+ final StringBuilder sb = new StringBuilder();
+ if (packedString.length() > index) {
+ if (packedString.charAt(index++) == '+') {
+ index = unpack(sb,packedString,index,'+');
+ this.lengthCutoff = new Long(sb.toString());
+ } else
+ this.lengthCutoff = null;
+ } else
+ this.lengthCutoff = null;
+
+ // Mime types
+ final List<String> mimeBuffer = new ArrayList<String>();
+ index = unpackList(mimeBuffer,packedString,index,'+');
+ for (String mimeType : mimeBuffer) {
+ this.mimeTypes.add(mimeType);
+ }
+
+ // Extensions
+ final List<String> extensionsBuffer = new ArrayList<String>();
+ index = unpackList(extensionsBuffer,packedString,index,'+');
+ for (String extension : extensionsBuffer) {
+ this.extensions.add(extension);
+ }
+ }
+
+ public String toPackedString() {
+ StringBuilder sb = new StringBuilder();
+ int i;
+
+ // Mappings
+ final String[] sortArray = new String[sourceTargets.size()];
+ i = 0;
+ for (String source : sourceTargets.keySet()) {
+ sortArray[i++] = source;
+ }
+ java.util.Arrays.sort(sortArray);
+
+ List<String> packedMappings = new ArrayList<String>();
+ String[] fixedList = new String[2];
+ for (String source : sortArray) {
+ String target = sourceTargets.get(source);
+ StringBuilder localBuffer = new StringBuilder();
+ fixedList[0] = source;
+ fixedList[1] = target;
+ packFixedList(localBuffer,fixedList,':');
+ packedMappings.add(localBuffer.toString());
+ }
+ packList(sb,packedMappings,'+');
+
+ // Keep all metadata
+ if (keepAllMetadata)
+ sb.append('+');
+ else
+ sb.append('-');
+
+ // Max length
+ if (lengthCutoff == null)
+ sb.append('-');
+ else {
+ sb.append('+');
+ pack(sb,lengthCutoff.toString(),'+');
+ }
+
+ // Mime types
+ String[] mimeTypes = new String[this.mimeTypes.size()];
+ i = 0;
+ for (String mimeType : this.mimeTypes) {
+ mimeTypes[i++] = mimeType;
+ }
+ java.util.Arrays.sort(mimeTypes);
+ packList(sb,mimeTypes,'+');
+
+ // Extensions
+ String[] extensions = new String[this.extensions.size()];
+ i = 0;
+ for (String extension : this.extensions) {
+ extensions[i++] = extension;
+ }
+ java.util.Arrays.sort(extensions);
+ packList(sb,extensions,'+');
+
+ return sb.toString();
+ }
+
+ public boolean checkLengthIndexable(long length) {
+ if (lengthCutoff == null)
+ return true;
+ return (length <= lengthCutoff.longValue());
+ }
+
+ public boolean checkMimeType(String mimeType) {
+ if (mimeType == null)
+ mimeType = "application/unknown";
+ return mimeTypes.contains(mimeType);
+ }
+
+ public boolean checkURLIndexable(String url) {
+ String extension = FilenameUtils.getExtension(url);
+ if (extension == null || extension.length() == 0)
+ extension = ".";
+ return extensions.contains(extension);
+ }
+
+ public String getMapping(String source) {
+ return sourceTargets.get(source);
+ }
+
+ public boolean keepAllMetadata() {
+ return keepAllMetadata;
+ }
+ }
+
+}
diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/Messages.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/Messages.java
new file mode 100644
index 0000000..b1e1cbc
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/Messages.java
@@ -0,0 +1,141 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+import java.util.Locale;
+import java.util.Map;
+import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
+import org.apache.manifoldcf.core.interfaces.IHTTPOutput;
+
+public class Messages extends org.apache.manifoldcf.ui.i18n.Messages
+{
+ public static final String DEFAULT_BUNDLE_NAME="org.apache.manifoldcf.agents.output.amazoncloudsearch.common";
+ public static final String DEFAULT_PATH_NAME="org.apache.manifoldcf.agents.output.amazoncloudsearch";
+
+ /** Constructor - do no instantiate
+ */
+ protected Messages()
+ {
+ }
+
+ public static String getString(Locale locale, String messageKey)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, null);
+ }
+
+ public static String getString(Locale locale, String messageKey, Object[] args)
+ {
+ return getString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(DEFAULT_BUNDLE_NAME, locale, messageKey, args);
+ }
+
+ // More general methods which allow bundlenames and class loaders to be specified.
+
+ public static String getString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getAttributeJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getAttributeJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ public static String getBodyJavascriptString(String bundleName, Locale locale, String messageKey, Object[] args)
+ {
+ return getBodyJavascriptString(Messages.class, bundleName, locale, messageKey, args);
+ }
+
+ // Resource output
+
+ public static void outputResource(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResource(output,Messages.class,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,String> substitutionParameters, boolean mapToUpperCase)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ substitutionParameters,mapToUpperCase);
+ }
+
+ public static void outputResourceWithVelocity(IHTTPOutput output, Locale locale, String resourceKey,
+ Map<String,Object> contextObjects)
+ throws ManifoldCFException
+ {
+ outputResourceWithVelocity(output,Messages.class,DEFAULT_BUNDLE_NAME,DEFAULT_PATH_NAME,locale,resourceKey,
+ contextObjects);
+ }
+
+}
+
diff --git a/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
new file mode 100644
index 0000000..c1ce94c
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/SDFModel.java
@@ -0,0 +1,75 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize.Inclusion;
+
+public class SDFModel {
+
+ private List<Document> documentList = new ArrayList<Document>();
+
+ public void addDocument(Document doc){
+ documentList.add(doc);
+ }
+
+ public String toJSON() throws JsonProcessingException{
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.setSerializationInclusion(Include.NON_NULL);
+ return mapper.writeValueAsString(documentList);
+ }
+
+ public class Document {
+ private String type;
+ private String id;
+ private Map<String,Object> fields;
+
+ public String getType() {
+ return type;
+ }
+
+ public void setType(String type) {
+ this.type = type;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public Map getFields() {
+ return fields;
+ }
+
+ public void setFields(Map<String,Object> fields) {
+ this.fields = fields;
+ }
+ }
+}
diff --git a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties
new file mode 100644
index 0000000..af5cdab
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_en_US.properties
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AmazonCloudSearchOutputConnector.ServerTabName=Server
+AmazonCloudSearchOutputConnector.ServerHostColon=Server host:
+AmazonCloudSearchOutputConnector.ServerPathColon=Server path:
+AmazonCloudSearchOutputConnector.ProxyProtocolColon=Proxy protocol:
+AmazonCloudSearchOutputConnector.ProxyHostColon=Proxy host:
+AmazonCloudSearchOutputConnector.ProxyPortColon=Proxy port:
+AmazonCloudSearchOutputConnector.ServerHostCannotBeNull=Server host cannot be null
+AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash=Server path must start with a '/'
+AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger=Proxy port must be an integer
+AmazonCloudSearchOutputConnector.FieldMappingTabName=CloudSearch Field Mapping
+AmazonCloudSearchOutputConnector.ContentsTabName=CloudSearch Contents
+AmazonCloudSearchOutputConnector.FieldMappings=Field Mappings
+AmazonCloudSearchOutputConnector.MetadataFieldName=Metadata Field Name
+AmazonCloudSearchOutputConnector.CloudSearchFieldName=CloudSearch Field Name
+AmazonCloudSearchOutputConnector.DeleteFieldMapping=Delete field mapping
+AmazonCloudSearchOutputConnector.AddFieldMapping=Add field mapping
+AmazonCloudSearchOutputConnector.KeepAllMetadata=Keep all metadata:
+AmazonCloudSearchOutputConnector.Add=Add
+AmazonCloudSearchOutputConnector.NoFieldMappingSpecified=No field mapping specified
+AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon=Max file size (bytes):
+AmazonCloudSearchOutputConnector.AllowedMIMETypesColon=Allowed MIME types:
+AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon=Allowed file extensions:
diff --git a/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties
new file mode 100644
index 0000000..e7c2898
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/native2ascii/org/apache/manifoldcf/agents/output/amazoncloudsearch/common_ja_JP.properties
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+AmazonCloudSearchOutputConnector.ServerTabName=サーバー
+AmazonCloudSearchOutputConnector.ServerHostColon=サーバー名:
+AmazonCloudSearchOutputConnector.ServerPathColon=パス:
+AmazonCloudSearchOutputConnector.ProxyProtocolColon=プロキシ プロトコル:
+AmazonCloudSearchOutputConnector.ProxyHostColon=プロキシ ホスト:
+AmazonCloudSearchOutputConnector.ProxyPortColon=プロキシ ポート:
+AmazonCloudSearchOutputConnector.ServerHostCannotBeNull=サーバー名は必須です。
+AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash=パスは / から入力してください。
+AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger=プロキシ ポートは数値を入力してください。
+AmazonCloudSearchOutputConnector.FieldMappingTabName=CloudSearch フィールドマッピング
+AmazonCloudSearchOutputConnector.ContentsTabName=CloudSearch コンテンツ
+AmazonCloudSearchOutputConnector.FieldMappings=フィールドマッピング
+AmazonCloudSearchOutputConnector.MetadataFieldName=メタデータフィールド名
+AmazonCloudSearchOutputConnector.CloudSearchFieldName=CloudSearch フィールド名
+AmazonCloudSearchOutputConnector.DeleteFieldMapping=フィールドマッピングを削除
+AmazonCloudSearchOutputConnector.AddFieldMapping=フィールドマッピングを追加
+AmazonCloudSearchOutputConnector.KeepAllMetadata=全てのメタデータを保持する:
+AmazonCloudSearchOutputConnector.Add=追加
+AmazonCloudSearchOutputConnector.NoFieldMappingSpecified=フィールドマッピングを入力してください
+AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon=最大ファイルサイズ (バイト):
+AmazonCloudSearchOutputConnector.AllowedMIMETypesColon=利用可能なMIMEタイプ:
+AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon=利用可能なファイル拡張子:
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.html
new file mode 100644
index 0000000..b253e9d
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.html
@@ -0,0 +1,70 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerTabName'))
+
+<table class="displaytable">
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerHostColon'))</nobr></td>
+ <td class="value"><input name="serverhost" type="text" value="$Encoder.attributeEscape($SERVERHOST)" size="32" /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerPathColon'))</nobr></td>
+ <td class="value"><input name="serverpath" type="text" value="$Encoder.attributeEscape($SERVERPATH)" size="32" /></td>
+ </tr>
+
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyProtocolColon'))</nobr></td>
+ <td class="value">
+ <select name="proxyprotocol" size="2">
+ #if($PROXYPROTOCOL == 'http')
+ <option value="http" selected="true">http</option>
+ #else
+ <option value="http">http</option>
+ #end
+ #if($PROXYPROTOCOL == 'https')
+ <option value="https" selected="true">https</option>
+ #else
+ <option value="https">https</option>
+ #end
+ </select>
+ </td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyHostColon'))</nobr></td>
+ <td class="value"><input name="proxyhost" type="text" value="$Encoder.attributeEscape($PROXYHOST)" size="32" /></td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyPortColon'))</nobr></td>
+ <td class="value"><input name="proxyport" type="text" value="$Encoder.attributeEscape($PROXYPORT)" size="5" /></td>
+ </tr>
+
+</table>
+
+#else
+
+<input type="hidden" name="serverhost" value="$Encoder.attributeEscape($SERVERHOST)" />
+<input type="hidden" name="serverpath" value="$Encoder.attributeEscape($SERVERPATH)" />
+<input type="hidden" name="proxyprotocol" value="$Encoder.attributeEscape($PROXYPROTOCOL)" />
+<input type="hidden" name="proxyhost" value="$Encoder.attributeEscape($PROXYHOST)" />
+<input type="hidden" name="proxyport" value="$Encoder.attributeEscape($PROXYPORT)" />
+
+#end
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.js b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.js
new file mode 100644
index 0000000..4cb9a85
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editConfiguration.js
@@ -0,0 +1,46 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function checkConfigForSave()
+{
+ if (editconnection.serverhost.value == "")
+ {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerHostCannotBeNull'))");
+ SelectTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerTabName'))");
+ editconnection.serverhost.focus();
+ return false;
+ }
+ if (!editconnection.serverpath.value.indexOf("/") == 0)
+ {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerPathMustStartWithSlash'))");
+ SelectTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerTabName'))");
+ editconnection.serverpath.focus();
+ return false;
+ }
+ if (editconnection.proxyport.value != "" && !isInteger(editconnection.proxyport.value))
+ {
+ alert("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyPortMustBeAnInteger'))");
+ SelectTab("$Encoder.bodyJavascriptEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerTabName'))");
+ editconnection.proxyport.focus();
+ return false;
+ }
+ return true;
+}
+//-->
+</script>
\ No newline at end of file
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
new file mode 100644
index 0000000..b8695e5
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification.js
@@ -0,0 +1,51 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<script type="text/javascript">
+<!--
+function checkOutputSpecification()
+{
+ return true;
+}
+
+function addFieldMapping()
+{
+ if (editjob.cloudsearch_fieldmapping_source.value == "")
+ {
+ alert("$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))");
+ editjob.solr_fieldmapping_source.focus();
+ return;
+ }
+ editjob.cloudsearch_fieldmapping_op.value="Add";
+ postFormSetAnchor("cloudsearch_fieldmapping");
+}
+
+function deleteFieldMapping(i)
+{
+ // Set the operation
+ eval("editjob.cloudsearch_fieldmapping_op_"+i+".value=\"Delete\"");
+ // Submit
+ if (editjob.cloudsearch_fieldmapping_count.value==i)
+ postFormSetAnchor("cloudsearch_fieldmapping");
+ else
+ postFormSetAnchor("cloudsearch_fieldmapping_"+i)
+ // Undo, so we won't get two deletes next time
+ eval("editjob.cloudsearch_fieldmapping_op_"+i+".value=\"Continue\"");
+}
+
+//-->
+</script>
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html
new file mode 100644
index 0000000..ce1962e
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_Contents.html
@@ -0,0 +1,50 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('AmazonCloudSearchOutputConnector.ContentsTabName'))
+
+<table class="displaytable">
+ <tr>
+ <td class="description">
+ <nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon'))</nobr>
+ </td>
+ <td class="value"><input name="maxfilesize" type="text"
+ value="$Encoder.attributeEscape($MAXFILESIZE)" size="24" /></td>
+ </tr>
+ <tr>
+ <td class="description">
+ <nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedMIMETypesColon'))</nobr>
+ </td>
+ <td class="value">
+ <textarea rows="10" cols="64" name="mimetypes">$Encoder.bodyEscape($MIMETYPES)</textarea>
+ </td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon'))</nobr></td>
+ <td class="value">
+ <textarea rows="10" cols="12" name="extensions">$Encoder.bodyEscape($EXTENSIONS)</textarea>
+ </td>
+ </tr>
+</table>
+
+#else
+
+<input type="hidden" name="maxfilesize" value="$Encoder.attributeEscape($MAXFILESIZE)" />
+<input type="hidden" name="mimetypes" value="$Encoder.attributeEscape($MIMETYPES)" />
+<input type="hidden" name="extensions" value="$Encoder.attributeEscape($EXTENSIONS)" />
+
+#end
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html
new file mode 100644
index 0000000..1e242ba
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/editSpecification_FieldMapping.html
@@ -0,0 +1,107 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+#if($TABNAME == $ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappingTabName'))
+
+<table class="displaytable">
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappings'))</nobr></td>
+ <td class="boxcell">
+ <table class="formtable">
+ <tr class="formheaderrow">
+ <td class="formcolumnheader"></td>
+ <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MetadataFieldName'))</nobr></td>
+ <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.CloudSearchFieldName'))</nobr></td>
+ </tr>
+
+ #set($fieldcounter = 0)
+ #foreach($fieldmapping in $FIELDMAPPINGS)
+ #set($fieldcounterdisplay = $fieldcounter + 1)
+ #if(($fieldcounter % 2) == 0)
+ <tr class="evenformrow">
+ #else
+ <tr class="oddformrow">
+ #end
+ <td class="formcolumncell">
+ <a name="cloudsearch_fieldmapping_$fieldcounter">
+ <input type="button" value="Delete" alt="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.DeleteFieldMapping'))$fieldcounterdisplay" onclick='javascript:deleteFieldMapping("$fieldcounter");'/>
+ <input type="hidden" name="cloudsearch_fieldmapping_op_$fieldcounter" value="Continue"/>
+ <input type="hidden" name="cloudsearch_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+ <input type="hidden" name="cloudsearch_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+ </a>
+ </td>
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
+ </td>
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
+ </td>
+ </tr>
+ #set($fieldcounter = $fieldcounter + 1)
+ #end
+
+ #if($fieldcounter == 0)
+ <tr class="formrow"><td class="formmessage" colspan="3">$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))</td></tr>
+ #end
+
+ <tr class="formrow"><td class="formseparator" colspan="3"><hr/></td></tr>
+ <tr class="formrow">
+ <td class="formcolumncell">
+ <a name="cloudsearch_fieldmapping">
+ <input type="button" value="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.Add'))" alt="$Encoder.attributeEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AddFieldMapping'))" onclick="javascript:addFieldMapping();"/>
+ </a>
+ <input type="hidden" name="cloudsearch_fieldmapping_count" value="$fieldcounter"/>
+ <input type="hidden" name="cloudsearch_fieldmapping_op" value="Continue"/>
+ </td>
+ <td class="formcolumncell">
+ <nobr><input type="text" size="15" name="cloudsearch_fieldmapping_source" value=""/></nobr>
+ </td>
+ <td class="formcolumncell">
+ <nobr><input type="text" size="15" name="cloudsearch_fieldmapping_target" value=""/></nobr>
+ </td>
+ </tr>
+ </table>
+ </td>
+ </tr>
+
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.KeepAllMetadata'))</nobr></td>
+ <td class="value">
+ #if($KEEPALLMETADATA == 'true')
+ <input type="checkbox" checked="true" name="cloudsearch_keepallmetadata" value="true"/>
+ #else
+ <input type="checkbox" name="cloudsearch_keepallmetadata" value="true"/>
+ #end
+ </td>
+ </tr>
+</table>
+
+#else
+
+ #set($fieldcounter = 0)
+ #foreach($fieldmapping in $FIELDMAPPINGS)
+<input type="hidden" name="cloudsearch_fieldmapping_source_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('SOURCE'))"/>
+<input type="hidden" name="cloudsearch_fieldmapping_target_$fieldcounter" value="$Encoder.attributeEscape($fieldmapping.get('TARGET'))"/>
+ #set($fieldcounter = $fieldcounter + 1)
+ #end
+<input type="hidden" name="cloudsearch_fieldmapping_count" value="$fieldcounter"/>
+<input type="hidden" name="cloudsearch_keepallmetadata" value="$Encoder.bodyEscape($KEEPALLMETADATA)"/>
+
+#end
\ No newline at end of file
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewConfiguration.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewConfiguration.html
new file mode 100644
index 0000000..e93d716
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewConfiguration.html
@@ -0,0 +1,42 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerHostColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($SERVERHOST)</td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ServerPathColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($SERVERPATH)</td>
+ </tr>
+
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyProtocolColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($PROXYPROTOCOL)</td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyHostColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($PROXYHOST)</td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.ProxyPortColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($PROXYPORT)</td>
+ </tr>
+</table>
\ No newline at end of file
diff --git a/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
new file mode 100644
index 0000000..a38e893
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/main/resources/org/apache/manifoldcf/agents/output/amazoncloudsearch/viewSpecification.html
@@ -0,0 +1,70 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<table class="displaytable">
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MaxFileSizeBytesColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($MAXFILESIZE)</td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedMIMETypesColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($MIMETYPES)</td>
+ </tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.AllowedFileExtensionsColon'))</nobr></td>
+ <td class="value">$Encoder.bodyEscape($EXTENSIONS)</td>
+ </tr>
+
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.FieldMappings'))</nobr></td>
+ <td class="boxcell">
+ <table class="formtable">
+ <tr class="formheaderrow">
+ <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.MetadataFieldName'))</nobr></td>
+ <td class="formcolumnheader"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.CloudSearchFieldName'))</nobr></td>
+ </tr>
+#set($fieldcounter = 0)
+#foreach($fieldmapping in $FIELDMAPPINGS)
+ #if(($fieldcounter % 2) == 0)
+ <tr class="evenformrow">
+ #else
+ <tr class="oddformrow">
+ #end
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($fieldmapping.get('SOURCE'))</nobr>
+ </td>
+ <td class="formcolumncell">
+ <nobr>$Encoder.bodyEscape($fieldmapping.get('TARGETDISPLAY'))</nobr>
+ </td>
+ </tr>
+ #set($fieldcounter = $fieldcounter + 1)
+#end
+#if($fieldcounter == 0)
+ <tr class="formrow"><td class="formmessage" colspan="2">$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.NoFieldMappingSpecified'))</td></tr>
+#end
+ </table>
+ </td>
+ </tr>
+ <tr><td class="separator" colspan="2"><hr/></td></tr>
+ <tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('AmazonCloudSearchOutputConnector.KeepAllMetadata'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($KEEPALLMETADATA)</nobr></td>
+ </tr>
+
+</table>
diff --git a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java
new file mode 100644
index 0000000..2f3d386
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/AmazonCloudSearchConnectorTest.java
@@ -0,0 +1,111 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch.tests;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel;
+import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel.Document;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class AmazonCloudSearchConnectorTest {
+
+ public static void main(String[] args){
+ InputStream is;
+ try {
+
+
+
+ is = new FileInputStream(new File("000407.htm"));
+ Parser parser = new HtmlParser();
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ parser.parse(is, handler, metadata, new ParseContext());
+
+ //build json..
+ SDFModel model = new SDFModel();
+ Document doc = model.new Document();
+ doc.setType("add");
+ doc.setId("aabbcc");
+
+ //set body text.
+ Map<String,Object> fields = new HashMap<String,Object>();
+ String bodyStr = handler.toString();
+ if(bodyStr != null){
+ bodyStr = handler.toString().replaceAll("\\n", "").replaceAll("\\t", "");
+ fields.put("body", bodyStr);
+ }
+
+ //mapping metadata to SDF fields.
+ String contenttype = metadata.get("Content-Style-Type");
+ String title = metadata.get("dc.title");
+ String size = metadata.get("Content-Length");
+ String description = metadata.get("description");
+ String keywords = metadata.get("keywords");
+ if(contenttype != null && !"".equals(contenttype)) fields.put("content_type", contenttype);
+ if(title != null && !"".equals(title)) fields.put("title", title);
+ if(size != null && !"".equals(size)) fields.put("size", size);
+ if(description != null && !"".equals(description)) fields.put("description", description);
+ if(keywords != null && !"".equals(keywords))
+ {
+ List<String> keywordList = new ArrayList<String>();
+ for(String tmp : keywords.split(",")){
+ keywordList.add(tmp);
+ }
+ fields.put("keywords", keywordList);
+ }
+ doc.setFields(fields);
+ model.addDocument(doc);
+
+ //generate json data.
+ String jsondata = model.toJSON();
+ System.out.println(jsondata);
+
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (SAXException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (TikaException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ }
+
+}
diff --git a/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java
new file mode 100644
index 0000000..f745469
--- /dev/null
+++ b/connectors/amazoncloudsearch/connector/src/test/java/org/apache/manifoldcf/agents/output/amazoncloudsearch/tests/SDFModelTest.java
@@ -0,0 +1,66 @@
+/* $Id$ */
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.manifoldcf.agents.output.amazoncloudsearch.tests;
+
+import static org.junit.Assert.*;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.manifoldcf.agents.output.amazoncloudsearch.SDFModel;
+import org.junit.Test;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+
+public class SDFModelTest {
+
+ @Test
+ public void testToJSON() {
+ SDFModel model = new SDFModel();
+
+ SDFModel.Document doc = model.new Document();
+ doc.setType("add");
+ doc.setId("aaaabbbbcccc");
+ Map fields = new HashMap();
+ fields.put("title", "The Seeker: The Dark Is Rising");
+ fields.put("director", "Cunningham, David L.");
+ String[] genre = {"Adventure","Drama","Fantasy","Thriller"};
+ fields.put("genre", genre);
+ doc.setFields(fields);
+
+ model.addDocument(doc);
+
+ SDFModel.Document doc2 = model.new Document();
+ doc2.setType("delete");
+ doc2.setId("xxxxxffffddddee");
+ model.addDocument(doc2);
+
+ try {
+ String jsonStr = model.toJSON();
+ System.out.println(jsonStr);
+ String expect = "[{\"type\":\"add\",\"id\":\"aaaabbbbcccc\",\"fields\":{\"genre\":[\"Adventure\",\"Drama\",\"Fantasy\",\"Thriller\"],\"title\":\"The Seeker: The Dark Is Rising\",\"director\":\"Cunningham, David L.\"}},{\"type\":\"delete\",\"id\":\"xxxxxffffddddee\"}]";
+ assertEquals(expect, jsonStr);
+
+ } catch (JsonProcessingException e) {
+ e.printStackTrace();
+ fail();
+ }
+ }
+
+}
diff --git a/connectors/amazoncloudsearch/pom.xml b/connectors/amazoncloudsearch/pom.xml
new file mode 100644
index 0000000..2dad398
--- /dev/null
+++ b/connectors/amazoncloudsearch/pom.xml
@@ -0,0 +1,391 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <groupId>org.apache.manifoldcf</groupId>
+ <artifactId>mcf-connectors</artifactId>
+ <version>1.6-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <developers>
+ <developer>
+ <name>Takumi Yoshida</name>
+ <url>http://yoshi0309.hatenablog.com/</url>
+ </developer>
+ </developers>
+
+ <properties>
+ <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+ <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
+ </properties>
+
+ <artifactId>mcf-amazoncloudsearch-connector</artifactId>
+ <name>ManifoldCF - Connectors - Amazon CloudSearch Output</name>
+
+ <build>
+ <defaultGoal>integration-test</defaultGoal>
+ <sourceDirectory>${basedir}/connector/src/main/java</sourceDirectory>
+ <testSourceDirectory>${basedir}/connector/src/test/java</testSourceDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}/connector/src/main/native2ascii</directory>
+ <includes>
+ <include>**/*.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>${basedir}/connector/src/main/resources</directory>
+ <includes>
+ <include>**/*.html</include>
+ <include>**/*.js</include>
+ </includes>
+ </resource>
+ </resources>
+ <testResources>
+ <testResource>
+ <directory>${basedir}/connector/src/test/resources</directory>
+ </testResource>
+ </testResources>
+
+ <plugins>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>native2ascii-maven-plugin</artifactId>
+ <version>1.0-beta-1</version>
+ <configuration>
+ <workDir>target/classes</workDir>
+ </configuration>
+ <executions>
+ <execution>
+ <id>native2ascii-utf8</id>
+ <goals>
+ <goal>native2ascii</goal>
+ </goals>
+ <configuration>
+ <encoding>UTF8</encoding>
+ <includes>
+ <include>**/*.properties</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <!-- Test plugin configuration -->
+ <plugin>
+ <artifactId>maven-dependency-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>copy-war</id>
+ <phase>generate-resources</phase>
+ <goals>
+ <goal>copy</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/dependency</outputDirectory>
+ <artifactItems>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-api-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-api-service.war</destFileName>
+ </artifactItem>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-authority-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-authority-service.war</destFileName>
+ </artifactItem>
+ <artifactItem>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-crawler-ui</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <overWrite>false</overWrite>
+ <destFileName>mcf-crawler-ui.war</destFileName>
+ </artifactItem>
+ </artifactItems>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <configuration>
+ <excludes>
+ <exclude>**/*Postgresql*.java</exclude>
+ <exclude>**/*MySQL*.java</exclude>
+ </excludes>
+ <forkMode>always</forkMode>
+ <workingDirectory>target/test-output</workingDirectory>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <version>2.12.3</version>
+ <configuration>
+ <skipTests>${skipITs}</skipTests>
+ <systemPropertyVariables>
+ <crawlerWarPath>../dependency/mcf-crawler-ui.war</crawlerWarPath>
+ <authorityserviceWarPath>../dependency/mcf-authority-service.war</authorityserviceWarPath>
+ <apiWarPath>../dependency/mcf-api-service.war</apiWarPath>
+ </systemPropertyVariables>
+ <excludes>
+ <exclude>**/*Postgresql*.java</exclude>
+ <exclude>**/*MySQL*.java</exclude>
+ </excludes>
+ <forkMode>always</forkMode>
+ <workingDirectory>target/test-output</workingDirectory>
+ </configuration>
+ <executions>
+ <execution>
+ <id>integration-test</id>
+ <goals>
+ <goal>integration-test</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>verify</id>
+ <goals>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-agents</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-ui-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.httpcomponents</groupId>
+ <artifactId>httpclient</artifactId>
+ <version>${httpcomponent.httpclient.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>2.1.3</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ <version>2.3.2</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ <version>2.3.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>1.5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>1.5</version>
+ </dependency>
+
+ <!-- Testing dependencies -->
+
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>${junit.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-core</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-agents</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-pull-agent</artifactId>
+ <version>${project.version}</version>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>postgresql</groupId>
+ <artifactId>postgresql</artifactId>
+ <version>${postgresql.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.hsqldb</groupId>
+ <artifactId>hsqldb</artifactId>
+ <version>${hsqldb.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.derby</groupId>
+ <artifactId>derby</artifactId>
+ <version>${derby.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>mysql</groupId>
+ <artifactId>mysql-connector-java</artifactId>
+ <version>${mysql.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-api-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-authority-service</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>${project.groupId}</groupId>
+ <artifactId>mcf-crawler-ui</artifactId>
+ <version>${project.version}</version>
+ <type>war</type>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-util</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-webapp</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-http</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-io</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-security</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-continuation</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-xml</artifactId>
+ <version>${jetty.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-api-2.1-glassfish</artifactId>
+ <version>${glassfish.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jsp-2.1-glassfish</artifactId>
+ <version>${glassfish.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>${slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-simple</artifactId>
+ <version>${slf4j.version}</version>
+ <scope>test</scope>
+ </dependency>
+
+ </dependencies>
+</project>
diff --git a/connectors/pom.xml b/connectors/pom.xml
index d5f8859..688e5f9 100644
--- a/connectors/pom.xml
+++ b/connectors/pom.xml
@@ -57,6 +57,7 @@
<module>generic</module>
<module>regexpmapper</module>
<module>email</module>
+ <module>amazoncloudsearch</module>
</modules>
</project>
diff --git a/lib-license/LICENSE.txt b/lib-license/LICENSE.txt
index dfb9c5e..6b54daf 100644
--- a/lib-license/LICENSE.txt
+++ b/lib-license/LICENSE.txt
@@ -178,6 +178,7 @@
Includes software from other Apache Software Foundation projects,
including, but not limited to:
+ - Apache Tika
- Apache Tomcat
- Apache Commons
- Apache Geronimo
@@ -299,7 +300,7 @@
This product includes a json-simple-1.1.jar.
License: Apache 2 (http://www.apache.org/licenses/LICENSE-2.0.txt)
-This product includes a jackson-core-2.1.3.jar.
+This product includes a jackson-core-2.1.3.jar, jackson-databind-2.1.3.jar, and jackson-annotations-2.1.2.jar.
License: Dual license; we choose to distribute under Apache 2 (http://www.apache.org/licenses/LICENSE-2.0.txt)
This product includes a google-api-client-1.14.1-beta.jar.
@@ -323,6 +324,9 @@
This product includes a guava.jar.
License: Apache 2 (http://www.apache.org/licenses/LICENSE-2.0.txt)
+This product includes a tagsoup.jar.
+License: Apache 2 (http://home.ccil.org/~cowan/XML/tagsoup/)
+
This product may include pdf files that embed IPA-licensed fonts.
License: IPA Font License Agreement v1.0 (http://ossipedia.ipa.go.jp/ipafont/index.html#LicenseEng)