blob: 5ff0f113f7da7db0dd4632e581b32c92fd093adb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.server.core;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import javax.ws.rs.ProcessingException;
import javax.ws.rs.core.Response;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.io.FileUtils;
import org.apache.cxf.jaxrs.client.WebClient;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.serialization.JsonFetchEmitTuple;
import org.apache.tika.pipes.FetchEmitTuple;
import org.apache.tika.pipes.HandlerConfig;
import org.apache.tika.pipes.emitter.EmitKey;
import org.apache.tika.pipes.fetcher.FetchKey;
import org.apache.tika.utils.ProcessUtils;
public class TikaServerPipesIntegrationTest extends IntegrationTestBase {
private static final Logger LOG =
LoggerFactory.getLogger(TikaServerPipesIntegrationTest.class);
private static final String EMITTER_NAME = "fse";
private static final String FETCHER_NAME = "fsf";
private static Path TMP_DIR;
private static Path TMP_OUTPUT_DIR;
private static Path TIKA_CONFIG;
private static Path TIKA_CONFIG_TIMEOUT;
private static String[] FILES =
new String[]{"hello_world.xml", "heavy_hang_30000.xml", "fake_oom.xml",
"system_exit.xml", "null_pointer.xml"};
@BeforeClass
public static void setUpBeforeClass() throws Exception {
TMP_DIR = Files.createTempDirectory("tika-emitter-test-");
Path inputDir = TMP_DIR.resolve("input");
TMP_OUTPUT_DIR = TMP_DIR.resolve("output");
Files.createDirectories(inputDir);
Files.createDirectories(TMP_OUTPUT_DIR);
for (String mockFile : FILES) {
Files.copy(
TikaPipesTest.class.getResourceAsStream("/test-documents/mock/" + mockFile),
inputDir.resolve(mockFile));
}
TIKA_CONFIG = TMP_DIR.resolve("tika-config.xml");
TIKA_CONFIG_TIMEOUT = TMP_DIR.resolve("tika-config-timeout.xml");
//TODO -- clean this up so that port is sufficient and we don't need portString
String xml1 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<properties>" + "<fetchers>" +
"<fetcher class=\"org.apache.tika.pipes.fetcher.FileSystemFetcher\">" + "<params>" +
"<name>" + FETCHER_NAME + "</name>" +
"<basePath>" + inputDir.toAbsolutePath() +
"</basePath>" + "</params>" + "</fetcher>" + "</fetchers>" + "<emitters>" +
"<emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\">" +
"<params>" + "<name>" + EMITTER_NAME + "</name>" +
"<basePath>" + TMP_OUTPUT_DIR.toAbsolutePath() +
"</basePath>" + "</params>" + "</emitter>" + "</emitters>" + "<server><params>" +
"<enableUnsecureFeatures>true</enableUnsecureFeatures>" + "<port>9999</port>" +
"<endpoints>" + "<endpoint>pipes</endpoint>" + "<endpoint>status</endpoint>" +
"</endpoints>";
String xml2 = "</params></server>" +
"<pipes><params><tikaConfig>" +
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString()) +
"</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m" +
"</arg></forkedJvmArgs><timeoutMillis>5000</timeoutMillis>" +
"</params></pipes>" + "</properties>";
String tikaConfigXML = xml1 + xml2;
FileUtils.write(TIKA_CONFIG.toFile(), tikaConfigXML, UTF_8);
String tikaConfigTimeoutXML = xml1 + "<taskPulseMillis>100</taskPulseMillis>" +
"<taskTimeoutMillis>10000</taskTimeoutMillis>" + xml2;
FileUtils.write(TIKA_CONFIG_TIMEOUT.toFile(), tikaConfigTimeoutXML, UTF_8);
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
FileUtils.deleteDirectory(TMP_DIR.toFile());
}
@After
public void tear() throws Exception {
Thread.sleep(500);
}
@Before
public void setUpEachTest() throws Exception {
for (String problemFile : FILES) {
Path targ = TMP_OUTPUT_DIR.resolve(problemFile + ".json");
if (Files.exists(targ)) {
Files.delete(targ);
assertFalse(Files.isRegularFile(targ));
}
}
}
@Test
public void testBasic() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config",
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString())});
JsonNode node = testOne("hello_world.xml", true);
assertEquals("ok", node.get("status").asText());
} catch (Exception e) {
e.printStackTrace();
fail("shouldn't have an exception" + e.getMessage());
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
@Test
public void testNPEDefault() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config",
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString())});
JsonNode node = testOne("null_pointer.xml", true);
assertEquals("ok", node.get("status").asText());
assertContains("java.lang.NullPointerException", node.get("parse_exception").asText());
} catch (Exception e) {
fail("shouldn't have an exception" + e.getMessage());
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
@Test
public void testNPESkip() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config",
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString())});
JsonNode node =
testOne("null_pointer.xml", false, FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP);
assertEquals("ok", node.get("status").asText());
assertContains("java.lang.NullPointerException", node.get("parse_exception").asText());
} catch (Exception e) {
fail("shouldn't have an exception" + e.getMessage());
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
@Test
public void testSystemExit() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config",
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString())});
JsonNode node = testOne("system_exit.xml", false);
assertEquals("parse_error", node.get("status").asText());
assertContains("unknown_crash", node.get("parse_error").asText());
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
@Test
public void testOOM() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config",
ProcessUtils.escapeCommandLine(TIKA_CONFIG.toAbsolutePath().toString())});
JsonNode node = testOne("fake_oom.xml", false);
assertEquals("parse_error", node.get("status").asText());
assertContains("oom", node.get("parse_error").asText());
} catch (ProcessingException e) {
//depending on timing, there may be a connection exception --
// TODO add more of a delay to server shutdown to ensure message is sent
// before shutdown.
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
@Test
public void testTimeout() throws Exception {
Process p = null;
try {
p = startProcess(new String[]{"-config", ProcessUtils.escapeCommandLine(
TIKA_CONFIG_TIMEOUT.toAbsolutePath().toString())});
JsonNode node = testOne("heavy_hang_30000.xml", false);
assertEquals("parse_error", node.get("status").asText());
assertContains("timeout", node.get("parse_error").asText());
} finally {
if (p != null) {
p.destroyForcibly();
}
}
}
private JsonNode testOne(String fileName, boolean shouldFileExist) throws Exception {
return testOne(fileName, shouldFileExist, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
}
private JsonNode testOne(String fileName, boolean shouldFileExist,
FetchEmitTuple.ON_PARSE_EXCEPTION onParseException) throws Exception {
awaitServerStartup();
Response response = WebClient
.create(endPoint + "/pipes")
.accept("application/json")
.post(getJsonString(fileName, onParseException));
if (response.getStatus() == 200) {
Path targFile = TMP_OUTPUT_DIR.resolve(fileName + ".json");
if (shouldFileExist) {
assertTrue(Files.size(targFile) > 1);
} else {
assertFalse(Files.isRegularFile(targFile));
}
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
return new ObjectMapper().readTree(reader);
}
return null;
}
private String getJsonString(String fileName,
FetchEmitTuple.ON_PARSE_EXCEPTION onParseException)
throws IOException {
FetchEmitTuple t = new FetchEmitTuple(fileName, new FetchKey(FETCHER_NAME, fileName),
new EmitKey(EMITTER_NAME, ""), new Metadata(), HandlerConfig.DEFAULT_HANDLER_CONFIG,
onParseException);
return JsonFetchEmitTuple.toJson(t);
}
}