blob: d1d61691ab5de4943414a6fe73714f18e2c15713 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.camel.component.tika;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import org.apache.camel.EndpointInject;
import org.apache.camel.Exchange;
import org.apache.camel.Predicate;
import org.apache.camel.builder.RouteBuilder;
import org.apache.camel.component.mock.MockEndpoint;
import org.apache.camel.spi.Registry;
import org.apache.camel.support.SimpleRegistry;
import org.apache.camel.test.junit4.CamelTestSupport;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.txt.UniversalEncodingDetector;
import org.junit.Test;
import static org.hamcrest.Matchers.*;
public class TikaParseTest extends CamelTestSupport {
@EndpointInject("mock:result")
protected MockEndpoint resultEndpoint;
@Test
public void testDocumentParse() throws Exception {
File document = new File("src/test/resources/test.doc");
template.sendBody("direct:start", document);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
Charset detectedCharset = null;
try {
InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes());
UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
} catch (IOException e1) {
fail();
}
assertThat(detectedCharset.name(), startsWith(Charset.defaultCharset().name()));
assertThat((String) body, containsString("test"));
assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
@Test
public void testDocumentParseWithEncoding() throws Exception {
File document = new File("src/test/resources/testOpenOffice2.odt");
template.sendBody("direct:start4", document);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
Charset detectedCharset = null;
try {
InputStream bodyIs = new ByteArrayInputStream(((String)body).getBytes(StandardCharsets.UTF_16));
UniversalEncodingDetector encodingDetector = new UniversalEncodingDetector();
detectedCharset = encodingDetector.detect(bodyIs, new Metadata());
} catch (IOException e1) {
fail();
}
assertThat(detectedCharset.name(), startsWith(StandardCharsets.UTF_16.name()));
assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/vnd.oasis.opendocument.text"));
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
@Test
public void testImageParse() throws Exception {
File document = new File("src/test/resources/testGIF.gif");
template.sendBody("direct:start", document);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("image/gif"));
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
@Test
public void testEmptyConfigDocumentParse() throws Exception {
File document = new File("src/test/resources/test.doc");
template.sendBody("direct:start3", document);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
@Test
public void testRegistryConfigDocumentParse() throws Exception {
File document = new File("src/test/resources/test.doc");
template.sendBody("direct:start3", document);
resultEndpoint.setExpectedMessageCount(1);
resultEndpoint.expectedMessagesMatches(new Predicate() {
@Override
public boolean matches(Exchange exchange) {
Object body = exchange.getIn().getBody(String.class);
Map<String, Object> headerMap = exchange.getIn().getHeaders();
assertThat(body, instanceOf(String.class));
assertThat((String) body, containsString("<body/>"));
assertThat(headerMap.get(Exchange.CONTENT_TYPE), equalTo("application/msword"));
return true;
}
});
resultEndpoint.assertIsSatisfied();
}
@Override
protected RouteBuilder createRouteBuilder() throws Exception {
return new RouteBuilder() {
@Override
public void configure() throws Exception {
from("direct:start").to("tika:parse").to("mock:result");
from("direct:start2").to("tika:parse?tikaConfigUri=src/test/resources/tika-empty.xml")
.to("mock:result");
from("direct:start3").to("tika:parse?tikaConfig=#testConfig").to("mock:result");
from("direct:start4").to("tika:parse?tikaParseOutputEncoding=" + StandardCharsets.UTF_16.name()).to("mock:result");
}
};
}
@Override
protected Registry createCamelRegistry() throws Exception {
Registry reg = new SimpleRegistry();
reg.bind("testConfig", new TikaEmptyConfig());
return reg;
}
}