blob: b22e5a69b8434e8b1683b7e0bf06047415f9b355 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.metadata;
//JDK imports
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Properties;
import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.junit.Test;
import org.apache.tika.utils.DateUtils;
//Junit imports
/**
* JUnit based tests of class {@link org.apache.tika.metadata.Metadata}.
*/
public class TestMetadata {
private static final String CONTENTTYPE = "contenttype";
/**
* Test for the <code>add(String, String)</code> method.
*/
@Test
public void testAdd() {
String[] values = null;
Metadata meta = new Metadata();
values = meta.getValues(CONTENTTYPE);
assertEquals(0, values.length);
meta.add(CONTENTTYPE, "value1");
values = meta.getValues(CONTENTTYPE);
assertEquals(1, values.length);
assertEquals("value1", values[0]);
meta.add(CONTENTTYPE, "value2");
values = meta.getValues(CONTENTTYPE);
assertEquals(2, values.length);
assertEquals("value1", values[0]);
assertEquals("value2", values[1]);
// NOTE : For now, the same value can be added many times.
// Should it be changed?
meta.add(CONTENTTYPE, "value1");
values = meta.getValues(CONTENTTYPE);
assertEquals(3, values.length);
assertEquals("value1", values[0]);
assertEquals("value2", values[1]);
assertEquals("value1", values[2]);
Property nonMultiValued = Property.internalText("nonMultiValued");
meta.add(nonMultiValued, "value1");
try {
meta.add(nonMultiValued, "value2");
fail("add should fail on the second call of a non-multi valued item");
} catch (PropertyTypeException e) {
//swallow
}
}
/**
* Test for the <code>set(String, String)</code> method.
*/
@Test
public void testSet() {
String[] values = null;
Metadata meta = new Metadata();
values = meta.getValues(CONTENTTYPE);
assertEquals(0, values.length);
meta.set(CONTENTTYPE, "value1");
values = meta.getValues(CONTENTTYPE);
assertEquals(1, values.length);
assertEquals("value1", values[0]);
meta.set(CONTENTTYPE, "value2");
values = meta.getValues(CONTENTTYPE);
assertEquals(1, values.length);
assertEquals("value2", values[0]);
meta.set(CONTENTTYPE, "new value 1");
meta.add("contenttype", "new value 2");
values = meta.getValues(CONTENTTYPE);
assertEquals(2, values.length);
assertEquals("new value 1", values[0]);
assertEquals("new value 2", values[1]);
}
/**
* Test for <code>setAll(Properties)</code> method.
*/
@Test
public void testSetProperties() {
String[] values = null;
Metadata meta = new Metadata();
Properties props = new Properties();
meta.setAll(props);
assertEquals(0, meta.size());
props.setProperty("name-one", "value1.1");
meta.setAll(props);
assertEquals(1, meta.size());
values = meta.getValues("name-one");
assertEquals(1, values.length);
assertEquals("value1.1", values[0]);
props.setProperty("name-two", "value2.1");
meta.setAll(props);
assertEquals(2, meta.size());
values = meta.getValues("name-one");
assertEquals(1, values.length);
assertEquals("value1.1", values[0]);
values = meta.getValues("name-two");
assertEquals(1, values.length);
assertEquals("value2.1", values[0]);
}
/**
* Test for <code>get(String)</code> method.
*/
@Test
public void testGet() {
Metadata meta = new Metadata();
assertNull(meta.get("a-name"));
meta.add("a-name", "value-1");
assertEquals("value-1", meta.get("a-name"));
meta.add("a-name", "value-2");
assertEquals("value-1", meta.get("a-name"));
}
/**
* Test for <code>isMultiValued()</code> method.
*/
@Test
public void testIsMultiValued() {
Metadata meta = new Metadata();
assertFalse(meta.isMultiValued("key"));
meta.add("key", "value1");
assertFalse(meta.isMultiValued("key"));
meta.add("key", "value2");
assertTrue(meta.isMultiValued("key"));
}
/**
* Test for <code>names</code> method.
*/
@Test
public void testNames() {
String[] names = null;
Metadata meta = new Metadata();
names = meta.names();
assertEquals(0, names.length);
meta.add("name-one", "value");
names = meta.names();
assertEquals(1, names.length);
assertEquals("name-one", names[0]);
meta.add("name-two", "value");
names = meta.names();
assertEquals(2, names.length);
}
/**
* Test for <code>remove(String)</code> method.
*/
@Test
public void testRemove() {
Metadata meta = new Metadata();
meta.remove("name-one");
assertEquals(0, meta.size());
meta.add("name-one", "value-1.1");
meta.add("name-one", "value-1.2");
meta.add("name-two", "value-2.2");
assertEquals(2, meta.size());
assertNotNull(meta.get("name-one"));
assertNotNull(meta.get("name-two"));
meta.remove("name-one");
assertEquals(1, meta.size());
assertNull(meta.get("name-one"));
assertNotNull(meta.get("name-two"));
meta.remove("name-two");
assertEquals(0, meta.size());
assertNull(meta.get("name-one"));
assertNull(meta.get("name-two"));
}
/**
* Test for <code>equals(Object)</code> method.
*/
@Test
public void testObject() {
Metadata meta1 = new Metadata();
Metadata meta2 = new Metadata();
assertFalse(meta1.equals(null));
assertFalse(meta1.equals("String"));
assertTrue(meta1.equals(meta2));
meta1.add("name-one", "value-1.1");
assertFalse(meta1.equals(meta2));
meta2.add("name-one", "value-1.1");
assertTrue(meta1.equals(meta2));
meta1.add("name-one", "value-1.2");
assertFalse(meta1.equals(meta2));
meta2.add("name-one", "value-1.2");
assertTrue(meta1.equals(meta2));
meta1.add("name-two", "value-2.1");
assertFalse(meta1.equals(meta2));
meta2.add("name-two", "value-2.1");
assertTrue(meta1.equals(meta2));
meta1.add("name-two", "value-2.2");
assertFalse(meta1.equals(meta2));
meta2.add("name-two", "value-2.x");
assertFalse(meta1.equals(meta2));
}
/**
* Tests for getting and setting integer
* based properties
*/
@Test
public void testGetSetInt() {
Metadata meta = new Metadata();
// Isn't initially set, will get null back
assertEquals(null, meta.get(Metadata.IMAGE_WIDTH));
assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
// Can only set as a single valued int
try {
meta.set(Metadata.BITS_PER_SAMPLE, 1);
fail("Shouldn't be able to set a multi valued property as an int");
} catch (PropertyTypeException e) {
//swallow
}
try {
meta.set(TikaCoreProperties.CREATED, 1);
fail("Shouldn't be able to set a date property as an int");
} catch (PropertyTypeException e) {
//swallow
}
// Can set it and retrieve it
meta.set(Metadata.IMAGE_WIDTH, 22);
assertEquals("22", meta.get(Metadata.IMAGE_WIDTH));
assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
// If you save a non int value, you get null
meta.set(Metadata.IMAGE_WIDTH, "INVALID");
assertEquals("INVALID", meta.get(Metadata.IMAGE_WIDTH));
assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH));
// If you try to retrieve a non simple int value, you get null
meta.set(Metadata.IMAGE_WIDTH, 22);
assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue());
assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
assertEquals(null, meta.getInt(TikaCoreProperties.CREATED));
}
/**
* Tests for getting and setting date
* based properties
*/
@Test
public void testGetSetDate() {
Metadata meta = new Metadata();
long hour = 60 * 60 * 1000;
// Isn't initially set, will get null back
assertEquals(null, meta.get(TikaCoreProperties.CREATED));
assertEquals(null, meta.getInt(TikaCoreProperties.CREATED));
// Can only set as a single valued date
try {
meta.set(Metadata.BITS_PER_SAMPLE, new Date(1000));
fail("Shouldn't be able to set a multi valued property as a date");
} catch (PropertyTypeException e) {
//swallow
}
try {
meta.set(Metadata.IMAGE_WIDTH, new Date(1000));
fail("Shouldn't be able to set an int property as an date");
} catch (PropertyTypeException e) {
//swallow
}
// Can set it and retrieve it
meta.set(TikaCoreProperties.CREATED, new Date(1000));
assertEquals("1970-01-01T00:00:01Z", meta.get(TikaCoreProperties.CREATED));
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
// If you save a non date value, you get null
meta.set(TikaCoreProperties.CREATED, "INVALID");
assertEquals("INVALID", meta.get(TikaCoreProperties.CREATED));
assertEquals(null, meta.getDate(TikaCoreProperties.CREATED));
// If you try to retrieve a non simple date value, you get null
meta.set(TikaCoreProperties.CREATED, new Date(1000));
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE));
assertEquals(null, meta.getInt(TikaCoreProperties.CREATED));
// Our format doesn't include milliseconds
// This means things get rounded
meta.set(TikaCoreProperties.CREATED, new Date(1050));
assertEquals("1970-01-01T00:00:01Z", meta.get(TikaCoreProperties.CREATED));
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
// We can accept a number of different ISO-8601 variants
meta.set(TikaCoreProperties.CREATED, "1970-01-01T00:00:01Z");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1970-01-01 00:00:01Z");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1970-01-01T01:00:01+01:00");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1970-01-01 01:00:01+01:00");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1970-01-01T12:00:01+12:00");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1969-12-31T12:00:01-12:00");
assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime());
// Dates without times, come in at midday UTC
meta.set(TikaCoreProperties.CREATED, "1970-01-01");
assertEquals(12 * hour, meta.getDate(TikaCoreProperties.CREATED).getTime());
meta.set(TikaCoreProperties.CREATED, "1970:01:01");
assertEquals(12 * hour, meta.getDate(TikaCoreProperties.CREATED).getTime());
}
/**
* Some documents, like jpegs, might have date in unspecified time zone
* which should be handled like strings but verified to have parseable ISO 8601 format
*/
@Test
public void testGetSetDateUnspecifiedTimezone() {
Metadata meta = new Metadata();
// Set explictly without a timezone
meta.set(TikaCoreProperties.CREATED, "1970-01-01T00:00:01");
assertEquals("should return string without time zone specifier because zone is not known",
"1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED));
// Now ask DateUtils to format for us without one
meta.set(TikaCoreProperties.CREATED, DateUtils.formatDateUnknownTimezone(new Date(1000)));
assertEquals("should return string without time zone specifier because zone is not known",
"1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED));
}
/**
* Defines a composite property, then checks that when set as the
* composite the value can be retrieved with the property or the aliases
*/
@SuppressWarnings("deprecation")
@Test
public void testCompositeProperty() {
Metadata meta = new Metadata();
Property compositeProperty = Property.composite(DublinCore.DESCRIPTION,
new Property[]{TikaCoreProperties.DESCRIPTION,
Property.internalText("testDescriptionAlt")});
String message = "composite description";
meta.set(compositeProperty, message);
// Fetch as the composite
assertEquals(message, meta.get(compositeProperty));
// Fetch as the primary property on the composite
assertEquals(message, meta.get(DublinCore.DESCRIPTION));
// Fetch as the aliases
assertEquals(message, meta.get("testDescriptionAlt"));
}
@Test
public void testMultithreadedDates() throws Exception {
int numThreads = 10;
ExecutorService executorService = Executors.newFixedThreadPool(numThreads);
ExecutorCompletionService<Integer> executorCompletionService =
new ExecutorCompletionService<>(executorService);
for (int i = 0; i < numThreads; i++) {
executorCompletionService.submit(new MetadataDateAdder());
}
int finished = 0;
while (finished < numThreads) {
Future<Integer> future = executorCompletionService.take();
if (future != null && future.isDone()) {
Integer retVal = future.get();
finished++;
}
}
}
@Test
public void testEquals() {
Metadata meta1 = new Metadata();
meta1.add("key", "value1");
meta1.add("key", "value2");
meta1.add("key2", "value12");
Metadata meta2 = new Metadata();
meta2.add("key", "value1");
meta2.add("key", "value2");
meta2.add("key2", "value12");
assertEquals(meta1, meta2);
}
@Test
public void testNotEquals() {
Metadata meta1 = new Metadata();
meta1.add("key", "value1");
meta1.add("key", "value2");
meta1.add("key2", "value12");
Metadata meta2 = new Metadata();
meta2.add("key", "value1");
meta2.add("key", "value2");
meta2.add("key2", "value22");
assertFalse(meta1.equals(meta2));
}
@Test
public void testEqualAndHashCode() {
Metadata meta1 = new Metadata();
meta1.add("key", "value1");
meta1.add("key", "value2");
meta1.add("key2", "value12");
Metadata meta2 = new Metadata();
meta2.add("key", "value1");
meta2.add("key", "value2");
meta2.add("key2", "value12");
assertEquals(meta1, meta2);
assertEquals(meta1.hashCode(), meta2.hashCode());
}
@Test
public void testToStringWithManyEntries() {
Metadata m = new Metadata();
m.add("key", "value1");
m.add("key", "value2");
m.add("key2", "value12");
assertEquals("key2=value12 key=value1 key=value2", m.toString());
}
@Test
public void testToStringWithSingleEntry() {
Metadata m = new Metadata();
m.add("key", "value1");
assertEquals("key=value1", m.toString());
}
private static class MetadataDateAdder implements Callable<Integer> {
private final Random random = new Random();
@Override
public Integer call() throws Exception {
for (int i = 0; i < 1000; i++) {
Metadata m = new Metadata();
long start = System.currentTimeMillis();
start += random.nextInt(1000000);
Date now = new Date(start);
DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);
m.set(TikaCoreProperties.CREATED, df.format(now));
df.setTimeZone(TimeZone.getTimeZone("UTC"));
assertTrue(
Math.abs(now.getTime() - m.getDate(TikaCoreProperties.CREATED).getTime()) <
2000);
}
return 1;
}
}
}