TIKA-3251 -- WIP -- do not merge -- first steps towards adding fetchers
diff --git a/pom.xml b/pom.xml
index 87328c4..920e937 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,6 +50,7 @@
<module>tika-example</module>
<module>tika-java7</module>
<module>tika-eval</module>
+ <module>tika-fetchers</module>
</modules>
<profiles>
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 3eeb3e7..0a50914 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -48,6 +48,8 @@
import org.apache.tika.detect.EncodingDetector;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.fetcher.DefaultFetcher;
+import org.apache.tika.fetcher.Fetcher;
import org.apache.tika.language.translate.DefaultTranslator;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.filter.CompositeMetadataFilter;
@@ -112,6 +114,10 @@
return new DefaultMetadataFilter(loader);
}
+ private static Fetcher getDefaultFetcher(ServiceLoader loader) {
+ return new DefaultFetcher(loader);
+ }
+
//use this to look for unneeded instantiations of TikaConfig
protected static AtomicInteger TIMES_INSTANTIATED = new AtomicInteger();
@@ -124,6 +130,7 @@
private final ExecutorService executorService;
private final EncodingDetector encodingDetector;
private final MetadataFilter metadataFilter;
+ private final Fetcher fetcher;
public TikaConfig(String file)
throws TikaException, IOException, SAXException {
@@ -200,6 +207,7 @@
this.translator = translatorLoader.loadOverall(element, mimeTypes, loader);
this.executorService = executorLoader.loadOverall(element, mimeTypes, loader);
this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, loader);
+ this.fetcher = new FetcherXmlLoader().loadOverall(element, mimeTypes, loader);
this.serviceLoader = loader;
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -226,6 +234,7 @@
this.translator = getDefaultTranslator(serviceLoader);
this.executorService = getDefaultExecutorService();
this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+ this.fetcher = getDefaultFetcher(serviceLoader);
TIMES_INSTANTIATED.incrementAndGet();
}
@@ -262,6 +271,7 @@
this.translator = getDefaultTranslator(serviceLoader);
this.executorService = getDefaultExecutorService();
this.metadataFilter = getDefaultMetadataFilter(serviceLoader);
+ this.fetcher = getDefaultFetcher(serviceLoader);
} else {
ServiceLoader tmpServiceLoader = new ServiceLoader();
try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) {
@@ -284,6 +294,7 @@
this.translator = translatorLoader.loadOverall(element, mimeTypes, serviceLoader);
this.executorService = executorLoader.loadOverall(element, mimeTypes, serviceLoader);
this.metadataFilter = metadataFilterXmlLoader.loadOverall(element, mimeTypes, serviceLoader);
+ this.fetcher = new FetcherXmlLoader().loadOverall(element, mimeTypes, serviceLoader);
} catch (SAXException e) {
throw new TikaException(
"Specified Tika configuration has syntax errors: "
@@ -560,6 +571,10 @@
initializableProblemHandler));
}
+ public Fetcher getFetcher() {
+ return fetcher;
+ }
+
private static abstract class XmlLoader<CT,T> {
protected static final String PARAMS_TAG_NAME = "params";
@@ -1262,4 +1277,90 @@
}
}
+ private static class FetcherXmlLoader extends
+ XmlLoader<Fetcher, Fetcher> {
+
+ boolean supportsComposite() {
+ return true;
+ }
+
+ String getParentTagName() {
+ return "fetchers";
+ }
+
+ String getLoaderTagName() {
+ return "fetcher";
+ }
+
+ @Override
+ Class<? extends Fetcher> getLoaderClass() {
+ return Fetcher.class;
+ }
+
+
+ @Override
+ boolean isComposite(Fetcher loaded) {
+ return loaded instanceof DefaultFetcher;
+ }
+
+ @Override
+ boolean isComposite(Class<? extends Fetcher> loadedClass) {
+ return DefaultFetcher.class.isAssignableFrom(loadedClass);
+ }
+
+ @Override
+ Fetcher preLoadOne(Class<? extends Fetcher> loadedClass,
+ String classname, MimeTypes mimeTypes) throws TikaException {
+ // Check for classes which can't be set in config
+ // Continue with normal loading
+ return null;
+ }
+
+ @Override
+ Fetcher createDefault(MimeTypes mimeTypes, ServiceLoader loader) {
+ return getDefaultFetcher(loader);
+ }
+
+ //this ignores the service loader
+ @Override
+ Fetcher createComposite(List<Fetcher> loaded, MimeTypes mimeTypes, ServiceLoader loader) {
+ return new DefaultFetcher(loaded);
+ }
+
+ @Override
+ Fetcher createComposite(Class<? extends Fetcher> fetcherClass,
+ List<Fetcher> childFetchers,
+ Set<Class<? extends Fetcher>> excludeFilters,
+ Map<String, Param> params, MimeTypes mimeTypes, ServiceLoader loader)
+ throws InvocationTargetException, IllegalAccessException,
+ InstantiationException {
+ Fetcher fetcher = null;
+ Constructor<? extends Fetcher> c;
+
+ // Try the possible default and composite detector constructors
+ if (fetcher == null) {
+ try {
+ c = fetcherClass.getConstructor(ServiceLoader.class, Collection.class);
+ fetcher = c.newInstance(loader, excludeFilters);
+ } catch (NoSuchMethodException me) {
+ me.printStackTrace();
+ }
+ }
+ if (fetcher == null) {
+ try {
+ c = fetcherClass.getConstructor(List.class);
+ fetcher = c.newInstance(childFetchers);
+ } catch (NoSuchMethodException me) {
+ me.printStackTrace();
+ }
+ }
+
+ return fetcher;
+ }
+
+ @Override
+ Fetcher decorate(Fetcher created, Element element) {
+ return created; // No decoration of Fetchers
+ }
+ }
}
diff --git a/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java b/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java
new file mode 100644
index 0000000..7f02ff9
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/exception/NoFetcherAvailableException.java
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.exception;
+
+public class NoFetcherAvailableException extends TikaException {
+ public NoFetcherAvailableException(String msg) {
+ super(msg);
+ }
+
+ public NoFetcherAvailableException(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
new file mode 100644
index 0000000..868edab
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/DefaultFetcher.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.config.ServiceLoader;
+import org.apache.tika.exception.NoFetcherAvailableException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.utils.ServiceLoaderUtils;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+import java.util.Optional;
+
+public class DefaultFetcher implements Fetcher {
+
+ private final List<Fetcher> fetchers;
+
+ public DefaultFetcher() {
+ this(new ServiceLoader());
+ }
+
+ public DefaultFetcher(ServiceLoader serviceLoader) {
+ fetchers = serviceLoader.loadServiceProviders(Fetcher.class);
+ ServiceLoaderUtils.sortLoadedClasses(fetchers);
+ }
+
+ public DefaultFetcher(List<Fetcher> fetchers) {
+ this.fetchers = fetchers;
+ }
+
+ @Override
+ public boolean canFetch(String url) {
+ for (Fetcher fetcher : fetchers) {
+ if (fetcher.canFetch(url)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException {
+ for (Fetcher fetcher : fetchers) {
+ if (fetcher.canFetch(url)) {
+ return fetcher.fetch(url, metadata);
+ }
+ }
+ StringBuilder sb = new StringBuilder();
+ int i = 0;
+ for (Fetcher fetcher : fetchers) {
+ if (i++ > 0) {
+ sb.append(", ");
+ }
+ sb.append(fetcher.getClass());
+ }
+ throw new NoFetcherAvailableException("No suitable fetcher found for: "
+ + url + " in " + sb.toString());
+ }
+
+ public List<Fetcher> getFetchers() {
+ return fetchers;
+ }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
new file mode 100644
index 0000000..688e687
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/Fetcher.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Optional;
+
+/**
+ * Based on a key, this will fetch a resource and update the
+ * metadata. There are some use cases, where the goal is simply
+ * to update the metadata, in which cases, the InputStream is
+ * not present.
+ */
+public interface Fetcher {
+
+ boolean canFetch(String url);
+
+ Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException;
+}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java
new file mode 100644
index 0000000..6b8ad90
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/FileFetcher.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Paths;
+import java.util.Optional;
+
+public class FileFetcher implements Fetcher {
+
+ private static final String PREFIX = "file:";
+
+ @Override
+ public boolean canFetch(String url) {
+ if (url.startsWith(PREFIX)) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException {
+ return Optional.of(TikaInputStream.get(Paths.get(url)));
+ }
+}
diff --git a/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java b/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java
new file mode 100644
index 0000000..4a09e26
--- /dev/null
+++ b/tika-core/src/main/java/org/apache/tika/fetcher/URLFetcher.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.Optional;
+
+public class URLFetcher implements Fetcher {
+
+ private static final String HTTP_PREFIX = "http:";
+ private static final String HTTPS_PREFIX = "https:";
+ private static final String FTP_PREFIX = "ftp:";
+
+ @Override
+ public boolean canFetch(String url) {
+ if (url.startsWith(HTTP_PREFIX) ||
+ url.startsWith(HTTPS_PREFIX) ||
+ url.startsWith(FTP_PREFIX)) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public Optional<InputStream> fetch(String url, Metadata metadata)
+ throws TikaException, IOException {
+ return Optional.of(TikaInputStream.get(new URL(url)));
+ }
+}
diff --git a/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher b/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher
new file mode 100644
index 0000000..decdd2f
--- /dev/null
+++ b/tika-core/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.fetcher.FileFetcher
+org.apache.tika.fetcher.URLFetcher
\ No newline at end of file
diff --git a/tika-fetchers/pom.xml b/tika-fetchers/pom.xml
new file mode 100644
index 0000000..e7954d8
--- /dev/null
+++ b/tika-fetchers/pom.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-parent</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>tika-fetchers</artifactId>
+ <packaging>pom</packaging>
+
+ <modules>
+ <module>s3-fetcher</module>
+ </modules>
+
+
+</project>
\ No newline at end of file
diff --git a/tika-fetchers/s3-fetcher/pom.xml b/tika-fetchers/s3-fetcher/pom.xml
new file mode 100644
index 0000000..a8e4b6d
--- /dev/null
+++ b/tika-fetchers/s3-fetcher/pom.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <parent>
+ <artifactId>tika-fetchers</artifactId>
+ <groupId>org.apache.tika</groupId>
+ <version>2.0.0-SNAPSHOT</version>
+ </parent>
+ <modelVersion>4.0.0</modelVersion>
+
+ <artifactId>s3-fetcher</artifactId>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.amazonaws</groupId>
+ <artifactId>aws-java-sdk-s3</artifactId>
+ <version>1.11.920</version>
+ <scope>provided</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java b/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
new file mode 100644
index 0000000..0ea5452
--- /dev/null
+++ b/tika-fetchers/s3-fetcher/src/main/java/org/apache/tika/fetcher/s3/S3Fetcher.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.fetcher.s3;
+
+import com.amazonaws.auth.profile.ProfileCredentialsProvider;
+import com.amazonaws.regions.Regions;
+import com.amazonaws.services.s3.AmazonS3;
+import com.amazonaws.services.s3.AmazonS3ClientBuilder;
+import com.amazonaws.services.s3.model.GetObjectRequest;
+import com.amazonaws.services.s3.model.ObjectMetadata;
+import com.amazonaws.services.s3.model.S3Object;
+import org.apache.tika.config.Field;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.fetcher.Fetcher;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Optional;
+
+public class S3Fetcher implements Fetcher {
+
+ private static final String PREFIX = "s3:";
+
+ @Field
+ private String bucket;
+
+ @Field
+ private String key;
+
+ @Field
+ private String region;
+
+ @Override
+ public boolean canFetch(String url) {
+ return url.startsWith(PREFIX);
+ }
+
+ @Override
+ public Optional<InputStream> fetch(String url, Metadata metadata) throws TikaException, IOException {
+ //TODO cache this client so we're not starting a new one with every request
+ S3Object fullObject = null;
+ try {
+ AmazonS3 s3Client = AmazonS3ClientBuilder.standard()
+ .withRegion(getRegion())
+ .withCredentials(new ProfileCredentialsProvider())
+ .build();
+ fullObject = s3Client.getObject(new GetObjectRequest(bucket, key));
+ updateMetadata(fullObject.getObjectMetadata(), metadata);
+ return Optional.of(TikaInputStream.get(fullObject.getObjectContent()));
+ } finally {
+ if (fullObject != null) {
+ fullObject.close();
+ }
+ }
+ }
+
+ private void updateMetadata(ObjectMetadata objectMetadata, Metadata metadata) {
+ //TODO: what else do we want to grab?
+ for (Map.Entry<String, String> e : objectMetadata.getUserMetadata().entrySet()) {
+ metadata.add(PREFIX+e.getKey(), e.getValue());
+ }
+ }
+
+ public Regions getRegion() {
+ if (region == null) {
+ return Regions.DEFAULT_REGION;
+ } else {
+ return Regions.fromName(region);
+ }
+ }
+
+ public String getBucket() {
+ return bucket;
+ }
+
+ public String getKey() {
+ return key;
+ }
+}
diff --git a/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher b/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher
new file mode 100644
index 0000000..a41f741
--- /dev/null
+++ b/tika-fetchers/s3-fetcher/src/main/resources/META-INF/services/org.apache.tika.fetcher.Fetcher
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+org.apache.tika.fetcher.s3.S3Fetcher
\ No newline at end of file
diff --git a/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java b/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java
new file mode 100644
index 0000000..e964739
--- /dev/null
+++ b/tika-fetchers/s3-fetcher/src/test/java/org/apache/tika/fetcher/s3/S3FetcherTest.java
@@ -0,0 +1,27 @@
+package org.apache.tika.fetcher.s3;
+
+import com.amazonaws.regions.Regions;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.fetcher.DefaultFetcher;
+import org.apache.tika.fetcher.Fetcher;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class S3FetcherTest {
+
+ @Test
+ public void testBasic() throws Exception {
+ TikaConfig config = new TikaConfig(
+ S3FetcherTest.class.getResourceAsStream("/org/apache/tika/fetcher/s3/S3TikaConfig.xml"));
+ Fetcher defaultFetcher = config.getFetcher();
+ for (Fetcher fetcher : ((DefaultFetcher)defaultFetcher).getFetchers()) {
+ if (fetcher instanceof S3Fetcher) {
+ S3Fetcher s3Fetcher = (S3Fetcher) fetcher;
+ assertEquals(Regions.US_WEST_2, s3Fetcher.getRegion());
+ assertEquals("myKey", s3Fetcher.getKey());
+ assertEquals("myBucket", s3Fetcher.getBucket());
+ }
+ }
+ }
+}
diff --git a/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml b/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml
new file mode 100644
index 0000000..ffbb022
--- /dev/null
+++ b/tika-fetchers/s3-fetcher/src/test/resources/org/apache/tika/fetcher/s3/S3TikaConfig.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <fetchers class="org.apache.tika.fetcher.DefaultFetcher">
+ <fetcher class="org.apache.tika.fetcher.s3.S3Fetcher">
+ <params>
+ <param name="region" type="string">us-west-2</param>
+ <param name="bucket" type="string">myBucket</param>
+ <param name="key" type="string">myKey</param>
+ </params>
+ </fetcher>
+ </fetchers>
+</properties>
\ No newline at end of file