blob: 52ce1f2b87f12f0e5cae26bbb500b6d9620f93d9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.osgi;
import com.google.common.collect.Maps;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.WriteOutContentHandler;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.ops4j.pax.exam.Configuration;
import org.ops4j.pax.exam.CoreOptions;
import org.ops4j.pax.exam.Option;
import org.ops4j.pax.exam.junit.PaxExam;
import org.ops4j.pax.exam.options.DefaultCompositeOption;
import org.ops4j.pax.exam.spi.reactors.ExamReactorStrategy;
import org.ops4j.pax.exam.spi.reactors.PerClass;
import org.osgi.framework.Bundle;
import org.osgi.framework.BundleContext;
import org.osgi.framework.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import javax.inject.Inject;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;
import static java.util.Arrays.stream;
import static org.junit.Assert.*;
import static org.ops4j.pax.exam.CoreOptions.*;
@RunWith(PaxExam.class)
@ExamReactorStrategy(PerClass.class)
public class TikaExtractionOsgiIT {
private static final Logger LOG = LoggerFactory.getLogger(TikaExtractionOsgiIT.class);
private static final String VERSION_PROP_RESOURCE_NAME = "versions.properties";
private static final String TIKA_VERSION = "tika";
private static final String POI_VERSION = "poi";
private static final String COLLECTIONS4_VERSION = "commons-collections4";
private static final String COMPRESS_VERSION = "commons-compress";
private static final String LANG3_VERSION = "commons-lang3";
private static final String MATH3_VERSION = "commons-math3";
private static final String[] VERSION_KEYS = new String[]{TIKA_VERSION, POI_VERSION
, COLLECTIONS4_VERSION, COMPRESS_VERSION
, LANG3_VERSION, MATH3_VERSION};
private static final String EXPECTED_TEXT_FRAGMENT = "A sample document";
@Configuration
public Option[] configuration() throws IOException {
return CoreOptions.options(
junitBundles(),
mavenBundle( "org.ops4j.pax.logging", "pax-logging-api", "1.7.2" ),
frameworkProperty("repository.home").value("target"),
setupTikaAndPoi(),
jpmsOptions()
// to debug a test, un-comment this and "run" the test which would block due to suspend="y"
// then run debugger on a remote app with specified port
// , vmOption( "-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005" )
);
}
private Option jpmsOptions(){
DefaultCompositeOption composite = new DefaultCompositeOption();
if (Version.parseVersion(System.getProperty("java.specification.version")).getMajor() > 1){
if (java.nio.file.Files.exists(java.nio.file.FileSystems.getFileSystem(URI.create("jrt:/")).getPath("modules", "java.se.ee"))){
composite.add(vmOption("--add-modules=java.se.ee"));
}
composite.add(vmOption("--add-opens=java.base/jdk.internal.loader=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.lang=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.lang.invoke=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.io=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.net=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.nio=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.util=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.util.jar=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.util.regex=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/java.util.zip=ALL-UNNAMED"));
composite.add(vmOption("--add-opens=java.base/sun.nio.ch=ALL-UNNAMED"));
}
return composite;
}
private Option setupTikaAndPoi() throws IOException {
Map<String, String> versions = setupVersions();
return composite(
composite(
mavenBundle("org.apache.tika", "tika-core", versions.get(TIKA_VERSION))
, mavenBundle("org.apache.tika", "tika-parsers", versions.get(TIKA_VERSION))
// poi dependency start
, wrappedBundle(mavenBundle("org.apache.poi", "poi", versions.get(POI_VERSION)))
, wrappedBundle(mavenBundle("org.apache.poi", "poi-scratchpad", versions.get(POI_VERSION)))
, wrappedBundle(mavenBundle("org.apache.poi", "poi-ooxml", versions.get(POI_VERSION)))
, wrappedBundle(mavenBundle("org.apache.poi", "poi-ooxml-schemas", versions.get(POI_VERSION)))
.instructions("DynamicImport-Package=*")
, wrappedBundle(mavenBundle("org.apache.poi", "ooxml-security", "1.0"))
, wrappedBundle(mavenBundle("org.apache.xmlbeans", "xmlbeans", "3.1.0"))
, wrappedBundle(mavenBundle("com.drewnoakes", "metadata-extractor", "2.6.2"))
, mavenBundle("org.apache.commons", "commons-collections4", versions.get(COLLECTIONS4_VERSION))
, mavenBundle("org.apache.commons", "commons-compress", versions.get(COMPRESS_VERSION))
, mavenBundle("org.apache.commons", "commons-lang3", versions.get(LANG3_VERSION))
, mavenBundle("org.apache.commons", "commons-math3", versions.get(MATH3_VERSION))
// poi dependency end
)
, jarBundles()
);
}
private Option jarBundles() throws MalformedURLException {
String[] jarNames = new String[]{"commons-io.jar", "commons-codec.jar"};
File jarDir = new File("target", "test-bundles");
DefaultCompositeOption composite = new DefaultCompositeOption();
List<File> jarFiles = stream(jarNames).map(jarName -> new File(jarDir, jarName)).collect(Collectors.toList());
for (File jar : jarFiles) {
composite.add(bundle(jar.toURI().toURL().toString()));
}
return composite;
}
private static Map<String, String> setupVersions() throws IOException {
Properties props = new Properties();
props.load(TikaExtractionOsgiIT.class.getClassLoader().getResourceAsStream(VERSION_PROP_RESOURCE_NAME));
assertEquals("Unexpected number of properties found in " + VERSION_PROP_RESOURCE_NAME,
VERSION_KEYS.length, props.size());
Map<String, String> versions = Maps.newHashMap();
for (String versionKey : VERSION_KEYS) {
String version = props.getProperty(versionKey);
assertNotNull("Version value not found for " + versionKey, version);
assertFalse("Version (key: " + versionKey + ", value: " + version + ") didn't get filtered by maven",
version.contains("{"));
versions.put(versionKey, version);
}
return versions;
}
@Inject
private BundleContext context;
@Inject
private Parser registeredParser;
@Test
public void listBundles() {
for (Bundle bundle : context.getBundles()) {
LOG.info("Bundle listing :: {} - {}", bundle, bundle.getVersion());
}
}
@Test
public void doc() throws Exception {
assertFileContains("test.doc");
}
@Test
public void docx() throws Exception {
assertFileContains("test.docx");
}
@Test
public void rtf() throws Exception {
assertFileContains("test.rtf");
}
@Test
public void text() throws Exception {
assertFileContains("test.txt");
}
private void assertFileContains(String resName) throws Exception {
AutoDetectParser parser = new AutoDetectParser(registeredParser);
ContentHandler handler = new WriteOutContentHandler();
Metadata metadata = new Metadata();
InputStream stream = getClass().getResourceAsStream(resName);
assertNotNull("Input stream must not be null", stream);
try {
parser.parse(stream, handler, metadata);
String actual = handler.toString().trim();
assertEquals(EXPECTED_TEXT_FRAGMENT, actual);
} finally {
stream.close();
}
}
}