blob: 17c38df7ba0ba04ded66223651278e4f14808ff0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.server.standard;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.cxf.helpers.IOUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.junit.Assert;
import org.junit.Ignore;
import org.junit.Test;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.serialization.JsonMetadata;
import org.apache.tika.server.core.CXFTestBase;
import org.apache.tika.server.core.resource.MetadataResource;
import org.apache.tika.server.core.writer.CSVMessageBodyWriter;
import org.apache.tika.server.core.writer.JSONMessageBodyWriter;
import org.apache.tika.server.core.writer.TextMessageBodyWriter;
import org.apache.tika.server.standard.resource.XMPMetadataResource;
import org.apache.tika.server.standard.writer.XMPMessageBodyWriter;
public class MetadataResourceTest extends CXFTestBase {
private static final String META_PATH = "/meta";
@Override
protected void setUpResources(JAXRSServerFactoryBean sf) {
sf.setResourceClasses(MetadataResource.class, XMPMetadataResource.class);
sf.setResourceProvider(MetadataResource.class,
new SingletonResourceProvider(new MetadataResource()));
sf.setResourceProvider(XMPMetadataResource.class,
new SingletonResourceProvider(new XMPMetadataResource()));
}
@Override
protected void setUpProviders(JAXRSServerFactoryBean sf) {
List<Object> providers = new ArrayList<>();
providers.add(new JSONMessageBodyWriter());
providers.add(new CSVMessageBodyWriter());
providers.add(new XMPMessageBodyWriter());
providers.add(new TextMessageBodyWriter());
sf.setProviders(providers);
}
@Test
public void testSimpleWord() throws Exception {
Response response =
WebClient.create(endPoint + META_PATH).type("application/msword").accept("text/csv")
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
CSVParser csvReader = new CSVParser(reader, CSVFormat.EXCEL);
Map<String, String> metadata = new HashMap<>();
for (CSVRecord r : csvReader) {
metadata.put(r.get(0), r.get(1));
}
csvReader.close();
assertNotNull(metadata.get(TikaCoreProperties.CREATOR.getName()));
assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR.getName()));
assertEquals("X-TIKA:digest:MD5", "f8be45c34e8919eedba48cc8d207fbf0",
metadata.get("X-TIKA:digest:MD5"));
}
@Test
public void testPasswordProtected() throws Exception {
Response response = WebClient.create(endPoint + META_PATH).type("application/vnd.ms-excel")
.accept("text/csv").put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
// Won't work, no password given
assertEquals(500, response.getStatus());
// Try again, this time with the wrong password
response = WebClient.create(endPoint + META_PATH).type("application/vnd.ms-excel")
.accept("text/csv").header("Password", "wrong password").put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
assertEquals(500, response.getStatus());
// Try again, this time with the password
response = WebClient.create(endPoint + META_PATH).type("application/vnd.ms-excel")
.accept("text/csv").header("Password", "password").put(ClassLoader
.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
// Will work
assertEquals(200, response.getStatus());
// Check results
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
CSVParser csvReader = new CSVParser(reader, CSVFormat.EXCEL);
Map<String, String> metadata = new HashMap<>();
for (CSVRecord r : csvReader) {
metadata.put(r.get(0), r.get(1));
}
csvReader.close();
assertNotNull(metadata.get(TikaCoreProperties.CREATOR.getName()));
assertEquals("pavel", metadata.get(TikaCoreProperties.CREATOR.getName()));
}
@Test
public void testJSON() throws Exception {
Response response = WebClient.create(endPoint + META_PATH).type("application/msword")
.accept("application/json")
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8);
Metadata metadata = JsonMetadata.fromJson(reader);
assertNotNull(metadata.get(TikaCoreProperties.CREATOR));
assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR));
}
@Test
public void testXMP() throws Exception {
Response response = WebClient.create(endPoint + META_PATH).type("application/msword")
.accept("application/rdf+xml")
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
String result = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", result);
}
//Now test requesting one field
@Test
public void testGetField_XXX_NotFound() throws Exception {
Response response =
WebClient.create(endPoint + META_PATH + "/xxx").type("application/msword")
.accept(MediaType.APPLICATION_JSON)
.put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC));
Assert.assertEquals(Response.Status.NOT_FOUND.getStatusCode(), response.getStatus());
}
@Test
public void testGetField_Author_TEXT_Partial_BAD_REQUEST() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response =
WebClient.create(endPoint + META_PATH + "/Author").type("application/msword")
.accept(MediaType.TEXT_PLAIN).put(copy(stream, 8000));
Assert.assertEquals(Response.Status.BAD_REQUEST.getStatusCode(), response.getStatus());
}
@Test
@Ignore("TODO: add back in xmp handler")
public void testGetField_Author_TEXT_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response =
WebClient.create(endPoint + META_PATH + "/" + TikaCoreProperties.CREATOR.getName())
.type("application/msword").accept(MediaType.TEXT_PLAIN)
.put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
String s = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertEquals("Maxim Valyanskiy", s);
}
@Test
public void testGetField_Author_JSON_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response =
WebClient.create(endPoint + META_PATH + "/" + TikaCoreProperties.CREATOR.getName())
.type("application/msword").accept(MediaType.APPLICATION_JSON)
.put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
Metadata metadata = JsonMetadata
.fromJson(new InputStreamReader((InputStream) response.getEntity(), UTF_8));
assertEquals("Maxim Valyanskiy", metadata.get(TikaCoreProperties.CREATOR));
assertEquals(1, metadata.names().length);
}
@Test
@Ignore("TODO: until we can reintegrate xmpwriter")
public void testGetField_Author_XMP_Partial_Found() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_DOC);
Response response =
WebClient.create(endPoint + META_PATH + "/dc:creator").type("application/msword")
.accept("application/rdf+xml").put(copy(stream, 12000));
Assert.assertEquals(Response.Status.OK.getStatusCode(), response.getStatus());
String s = IOUtils.readStringFromStream((InputStream) response.getEntity());
assertContains("<rdf:li>Maxim Valyanskiy</rdf:li>", s);
}
}