| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.metadata; |
| |
| //JDK imports |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertNotNull; |
| import static org.junit.Assert.assertNull; |
| import static org.junit.Assert.assertTrue; |
| import static org.junit.Assert.fail; |
| |
| import java.text.DateFormat; |
| import java.text.SimpleDateFormat; |
| import java.util.Date; |
| import java.util.Locale; |
| import java.util.Properties; |
| import java.util.Random; |
| import java.util.TimeZone; |
| import java.util.concurrent.Callable; |
| import java.util.concurrent.ExecutorCompletionService; |
| import java.util.concurrent.ExecutorService; |
| import java.util.concurrent.Executors; |
| import java.util.concurrent.Future; |
| |
| import org.junit.Test; |
| |
| import org.apache.tika.utils.DateUtils; |
| |
| //Junit imports |
| |
| /** |
| * JUnit based tests of class {@link org.apache.tika.metadata.Metadata}. |
| */ |
| public class TestMetadata { |
| |
| private static final String CONTENTTYPE = "contenttype"; |
| |
| /** |
| * Test for the <code>add(String, String)</code> method. |
| */ |
| @Test |
| public void testAdd() { |
| String[] values = null; |
| Metadata meta = new Metadata(); |
| |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(0, values.length); |
| |
| meta.add(CONTENTTYPE, "value1"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(1, values.length); |
| assertEquals("value1", values[0]); |
| |
| meta.add(CONTENTTYPE, "value2"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(2, values.length); |
| assertEquals("value1", values[0]); |
| assertEquals("value2", values[1]); |
| |
| // NOTE : For now, the same value can be added many times. |
| // Should it be changed? |
| meta.add(CONTENTTYPE, "value1"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(3, values.length); |
| assertEquals("value1", values[0]); |
| assertEquals("value2", values[1]); |
| assertEquals("value1", values[2]); |
| |
| Property nonMultiValued = Property.internalText("nonMultiValued"); |
| meta.add(nonMultiValued, "value1"); |
| try { |
| meta.add(nonMultiValued, "value2"); |
| fail("add should fail on the second call of a non-multi valued item"); |
| } catch (PropertyTypeException e) { |
| //swallow |
| } |
| } |
| |
| /** |
| * Test for the <code>set(String, String)</code> method. |
| */ |
| @Test |
| public void testSet() { |
| String[] values = null; |
| Metadata meta = new Metadata(); |
| |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(0, values.length); |
| |
| meta.set(CONTENTTYPE, "value1"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(1, values.length); |
| assertEquals("value1", values[0]); |
| |
| meta.set(CONTENTTYPE, "value2"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(1, values.length); |
| assertEquals("value2", values[0]); |
| |
| meta.set(CONTENTTYPE, "new value 1"); |
| meta.add("contenttype", "new value 2"); |
| values = meta.getValues(CONTENTTYPE); |
| assertEquals(2, values.length); |
| assertEquals("new value 1", values[0]); |
| assertEquals("new value 2", values[1]); |
| } |
| |
| /** |
| * Test for <code>setAll(Properties)</code> method. |
| */ |
| @Test |
| public void testSetProperties() { |
| String[] values = null; |
| Metadata meta = new Metadata(); |
| Properties props = new Properties(); |
| |
| meta.setAll(props); |
| assertEquals(0, meta.size()); |
| |
| props.setProperty("name-one", "value1.1"); |
| meta.setAll(props); |
| assertEquals(1, meta.size()); |
| values = meta.getValues("name-one"); |
| assertEquals(1, values.length); |
| assertEquals("value1.1", values[0]); |
| |
| props.setProperty("name-two", "value2.1"); |
| meta.setAll(props); |
| assertEquals(2, meta.size()); |
| values = meta.getValues("name-one"); |
| assertEquals(1, values.length); |
| assertEquals("value1.1", values[0]); |
| values = meta.getValues("name-two"); |
| assertEquals(1, values.length); |
| assertEquals("value2.1", values[0]); |
| } |
| |
| /** |
| * Test for <code>get(String)</code> method. |
| */ |
| @Test |
| public void testGet() { |
| Metadata meta = new Metadata(); |
| assertNull(meta.get("a-name")); |
| meta.add("a-name", "value-1"); |
| assertEquals("value-1", meta.get("a-name")); |
| meta.add("a-name", "value-2"); |
| assertEquals("value-1", meta.get("a-name")); |
| } |
| |
| /** |
| * Test for <code>isMultiValued()</code> method. |
| */ |
| @Test |
| public void testIsMultiValued() { |
| Metadata meta = new Metadata(); |
| assertFalse(meta.isMultiValued("key")); |
| meta.add("key", "value1"); |
| assertFalse(meta.isMultiValued("key")); |
| meta.add("key", "value2"); |
| assertTrue(meta.isMultiValued("key")); |
| } |
| |
| /** |
| * Test for <code>names</code> method. |
| */ |
| @Test |
| public void testNames() { |
| String[] names = null; |
| Metadata meta = new Metadata(); |
| names = meta.names(); |
| assertEquals(0, names.length); |
| |
| meta.add("name-one", "value"); |
| names = meta.names(); |
| assertEquals(1, names.length); |
| assertEquals("name-one", names[0]); |
| meta.add("name-two", "value"); |
| names = meta.names(); |
| assertEquals(2, names.length); |
| } |
| |
| /** |
| * Test for <code>remove(String)</code> method. |
| */ |
| @Test |
| public void testRemove() { |
| Metadata meta = new Metadata(); |
| meta.remove("name-one"); |
| assertEquals(0, meta.size()); |
| meta.add("name-one", "value-1.1"); |
| meta.add("name-one", "value-1.2"); |
| meta.add("name-two", "value-2.2"); |
| assertEquals(2, meta.size()); |
| assertNotNull(meta.get("name-one")); |
| assertNotNull(meta.get("name-two")); |
| meta.remove("name-one"); |
| assertEquals(1, meta.size()); |
| assertNull(meta.get("name-one")); |
| assertNotNull(meta.get("name-two")); |
| meta.remove("name-two"); |
| assertEquals(0, meta.size()); |
| assertNull(meta.get("name-one")); |
| assertNull(meta.get("name-two")); |
| } |
| |
| /** |
| * Test for <code>equals(Object)</code> method. |
| */ |
| @Test |
| public void testObject() { |
| Metadata meta1 = new Metadata(); |
| Metadata meta2 = new Metadata(); |
| assertFalse(meta1.equals(null)); |
| assertFalse(meta1.equals("String")); |
| assertTrue(meta1.equals(meta2)); |
| meta1.add("name-one", "value-1.1"); |
| assertFalse(meta1.equals(meta2)); |
| meta2.add("name-one", "value-1.1"); |
| assertTrue(meta1.equals(meta2)); |
| meta1.add("name-one", "value-1.2"); |
| assertFalse(meta1.equals(meta2)); |
| meta2.add("name-one", "value-1.2"); |
| assertTrue(meta1.equals(meta2)); |
| meta1.add("name-two", "value-2.1"); |
| assertFalse(meta1.equals(meta2)); |
| meta2.add("name-two", "value-2.1"); |
| assertTrue(meta1.equals(meta2)); |
| meta1.add("name-two", "value-2.2"); |
| assertFalse(meta1.equals(meta2)); |
| meta2.add("name-two", "value-2.x"); |
| assertFalse(meta1.equals(meta2)); |
| } |
| |
| /** |
| * Tests for getting and setting integer |
| * based properties |
| */ |
| @Test |
| public void testGetSetInt() { |
| Metadata meta = new Metadata(); |
| |
| // Isn't initially set, will get null back |
| assertEquals(null, meta.get(Metadata.IMAGE_WIDTH)); |
| assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH)); |
| |
| // Can only set as a single valued int |
| try { |
| meta.set(Metadata.BITS_PER_SAMPLE, 1); |
| fail("Shouldn't be able to set a multi valued property as an int"); |
| } catch (PropertyTypeException e) { |
| //swallow |
| } |
| try { |
| meta.set(TikaCoreProperties.CREATED, 1); |
| fail("Shouldn't be able to set a date property as an int"); |
| } catch (PropertyTypeException e) { |
| //swallow |
| } |
| |
| // Can set it and retrieve it |
| meta.set(Metadata.IMAGE_WIDTH, 22); |
| assertEquals("22", meta.get(Metadata.IMAGE_WIDTH)); |
| assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue()); |
| |
| // If you save a non int value, you get null |
| meta.set(Metadata.IMAGE_WIDTH, "INVALID"); |
| assertEquals("INVALID", meta.get(Metadata.IMAGE_WIDTH)); |
| assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH)); |
| |
| // If you try to retrieve a non simple int value, you get null |
| meta.set(Metadata.IMAGE_WIDTH, 22); |
| assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue()); |
| assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE)); |
| assertEquals(null, meta.getInt(TikaCoreProperties.CREATED)); |
| } |
| |
| /** |
| * Tests for getting and setting date |
| * based properties |
| */ |
| @Test |
| public void testGetSetDate() { |
| Metadata meta = new Metadata(); |
| long hour = 60 * 60 * 1000; |
| |
| // Isn't initially set, will get null back |
| assertEquals(null, meta.get(TikaCoreProperties.CREATED)); |
| assertEquals(null, meta.getInt(TikaCoreProperties.CREATED)); |
| |
| // Can only set as a single valued date |
| try { |
| meta.set(Metadata.BITS_PER_SAMPLE, new Date(1000)); |
| fail("Shouldn't be able to set a multi valued property as a date"); |
| } catch (PropertyTypeException e) { |
| //swallow |
| } |
| try { |
| meta.set(Metadata.IMAGE_WIDTH, new Date(1000)); |
| fail("Shouldn't be able to set an int property as an date"); |
| } catch (PropertyTypeException e) { |
| //swallow |
| } |
| |
| // Can set it and retrieve it |
| meta.set(TikaCoreProperties.CREATED, new Date(1000)); |
| assertEquals("1970-01-01T00:00:01Z", meta.get(TikaCoreProperties.CREATED)); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| // If you save a non date value, you get null |
| meta.set(TikaCoreProperties.CREATED, "INVALID"); |
| assertEquals("INVALID", meta.get(TikaCoreProperties.CREATED)); |
| assertEquals(null, meta.getDate(TikaCoreProperties.CREATED)); |
| |
| // If you try to retrieve a non simple date value, you get null |
| meta.set(TikaCoreProperties.CREATED, new Date(1000)); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE)); |
| assertEquals(null, meta.getInt(TikaCoreProperties.CREATED)); |
| |
| // Our format doesn't include milliseconds |
| // This means things get rounded |
| meta.set(TikaCoreProperties.CREATED, new Date(1050)); |
| assertEquals("1970-01-01T00:00:01Z", meta.get(TikaCoreProperties.CREATED)); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| // We can accept a number of different ISO-8601 variants |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01T00:00:01Z"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01 00:00:01Z"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01T01:00:01+01:00"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01 01:00:01+01:00"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01T12:00:01+12:00"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1969-12-31T12:00:01-12:00"); |
| assertEquals(1000, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| // Dates without times, come in at midday UTC |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01"); |
| assertEquals(12 * hour, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| |
| meta.set(TikaCoreProperties.CREATED, "1970:01:01"); |
| assertEquals(12 * hour, meta.getDate(TikaCoreProperties.CREATED).getTime()); |
| } |
| |
| /** |
| * Some documents, like jpegs, might have date in unspecified time zone |
| * which should be handled like strings but verified to have parseable ISO 8601 format |
| */ |
| @Test |
| public void testGetSetDateUnspecifiedTimezone() { |
| Metadata meta = new Metadata(); |
| |
| // Set explictly without a timezone |
| meta.set(TikaCoreProperties.CREATED, "1970-01-01T00:00:01"); |
| assertEquals("should return string without time zone specifier because zone is not known", |
| "1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED)); |
| |
| // Now ask DateUtils to format for us without one |
| meta.set(TikaCoreProperties.CREATED, DateUtils.formatDateUnknownTimezone(new Date(1000))); |
| assertEquals("should return string without time zone specifier because zone is not known", |
| "1970-01-01T00:00:01", meta.get(TikaCoreProperties.CREATED)); |
| } |
| |
| /** |
| * Defines a composite property, then checks that when set as the |
| * composite the value can be retrieved with the property or the aliases |
| */ |
| @SuppressWarnings("deprecation") |
| @Test |
| public void testCompositeProperty() { |
| Metadata meta = new Metadata(); |
| Property compositeProperty = Property.composite(DublinCore.DESCRIPTION, |
| new Property[]{TikaCoreProperties.DESCRIPTION, |
| Property.internalText("testDescriptionAlt")}); |
| String message = "composite description"; |
| meta.set(compositeProperty, message); |
| |
| // Fetch as the composite |
| assertEquals(message, meta.get(compositeProperty)); |
| // Fetch as the primary property on the composite |
| assertEquals(message, meta.get(DublinCore.DESCRIPTION)); |
| // Fetch as the aliases |
| assertEquals(message, meta.get("testDescriptionAlt")); |
| } |
| |
| @Test |
| public void testMultithreadedDates() throws Exception { |
| int numThreads = 10; |
| ExecutorService executorService = Executors.newFixedThreadPool(numThreads); |
| ExecutorCompletionService<Integer> executorCompletionService = |
| new ExecutorCompletionService<>(executorService); |
| for (int i = 0; i < numThreads; i++) { |
| executorCompletionService.submit(new MetadataDateAdder()); |
| } |
| int finished = 0; |
| while (finished < numThreads) { |
| Future<Integer> future = executorCompletionService.take(); |
| if (future != null && future.isDone()) { |
| Integer retVal = future.get(); |
| finished++; |
| } |
| } |
| |
| } |
| |
| @Test |
| public void testEquals() { |
| Metadata meta1 = new Metadata(); |
| meta1.add("key", "value1"); |
| meta1.add("key", "value2"); |
| meta1.add("key2", "value12"); |
| |
| Metadata meta2 = new Metadata(); |
| meta2.add("key", "value1"); |
| meta2.add("key", "value2"); |
| meta2.add("key2", "value12"); |
| |
| assertEquals(meta1, meta2); |
| } |
| |
| @Test |
| public void testNotEquals() { |
| Metadata meta1 = new Metadata(); |
| meta1.add("key", "value1"); |
| meta1.add("key", "value2"); |
| meta1.add("key2", "value12"); |
| |
| Metadata meta2 = new Metadata(); |
| meta2.add("key", "value1"); |
| meta2.add("key", "value2"); |
| meta2.add("key2", "value22"); |
| |
| assertFalse(meta1.equals(meta2)); |
| } |
| |
| @Test |
| public void testEqualAndHashCode() { |
| Metadata meta1 = new Metadata(); |
| meta1.add("key", "value1"); |
| meta1.add("key", "value2"); |
| meta1.add("key2", "value12"); |
| |
| Metadata meta2 = new Metadata(); |
| meta2.add("key", "value1"); |
| meta2.add("key", "value2"); |
| meta2.add("key2", "value12"); |
| |
| assertEquals(meta1, meta2); |
| assertEquals(meta1.hashCode(), meta2.hashCode()); |
| } |
| |
| @Test |
| public void testToStringWithManyEntries() { |
| Metadata m = new Metadata(); |
| m.add("key", "value1"); |
| m.add("key", "value2"); |
| m.add("key2", "value12"); |
| assertEquals("key2=value12 key=value1 key=value2", m.toString()); |
| } |
| |
| @Test |
| public void testToStringWithSingleEntry() { |
| Metadata m = new Metadata(); |
| m.add("key", "value1"); |
| assertEquals("key=value1", m.toString()); |
| } |
| |
| private static class MetadataDateAdder implements Callable<Integer> { |
| private final Random random = new Random(); |
| |
| @Override |
| public Integer call() throws Exception { |
| for (int i = 0; i < 1000; i++) { |
| Metadata m = new Metadata(); |
| long start = System.currentTimeMillis(); |
| start += random.nextInt(1000000); |
| Date now = new Date(start); |
| DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); |
| m.set(TikaCoreProperties.CREATED, df.format(now)); |
| df.setTimeZone(TimeZone.getTimeZone("UTC")); |
| assertTrue( |
| Math.abs(now.getTime() - m.getDate(TikaCoreProperties.CREATED).getTime()) < |
| 2000); |
| |
| } |
| return 1; |
| } |
| } |
| } |