| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.parser.jpeg; |
| |
| import java.io.InputStream; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import junit.framework.TestCase; |
| |
| import org.apache.tika.metadata.Metadata; |
| import org.apache.tika.metadata.TIFF; |
| import org.apache.tika.parser.ParseContext; |
| import org.apache.tika.parser.Parser; |
| import org.xml.sax.helpers.DefaultHandler; |
| |
| public class JpegParserTest extends TestCase { |
| private final Parser parser = new JpegParser(); |
| |
| public void testJPEG() throws Exception { |
| Metadata metadata = new Metadata(); |
| metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); |
| InputStream stream = |
| getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg"); |
| parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); |
| |
| // Core EXIF/TIFF tags |
| assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); |
| assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH)); |
| assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE)); |
| assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL)); |
| |
| assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600 |
| assertEquals("5.6", metadata.get(Metadata.F_NUMBER)); |
| assertEquals("false", metadata.get(Metadata.FLASH_FIRED)); |
| assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH)); |
| assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS)); |
| assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE)); |
| assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL)); |
| assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE)); |
| assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present |
| assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL)); |
| assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL)); |
| assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT)); |
| |
| // Check that EXIF/TIFF tags come through with their raw values too |
| // (This may be removed for Tika 1.0, as we support more of them |
| // with explicit Metadata entries) |
| assertEquals("Canon EOS 40D", metadata.get("Model")); |
| |
| // Common tags |
| //assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED)); |
| assertEquals("Date/Time Original for when the photo was taken, unspecified time zone", |
| "2009-08-11T09:09:45", metadata.get(Metadata.DATE)); |
| List<String> keywords = Arrays.asList(metadata.getValues(Metadata.SUBJECT)); |
| assertTrue("'canon-55-250' expected in " + keywords, keywords.contains("canon-55-250")); |
| assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds")); |
| assertTrue("'serbor' expected in " + keywords, keywords.contains("serbor")); |
| assertFalse(keywords.contains("canon-55-250 moscow-birds serbor")); |
| } |
| |
| public void testJPEGGeo() throws Exception { |
| Metadata metadata = new Metadata(); |
| metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); |
| InputStream stream = |
| getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg"); |
| parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); |
| |
| // Geo tags |
| assertEquals("12.54321", metadata.get(Metadata.LATITUDE)); |
| assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE)); |
| |
| // Core EXIF/TIFF tags |
| assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH)); |
| assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH)); |
| assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE)); |
| assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL)); |
| |
| assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600 |
| assertEquals("5.6", metadata.get(Metadata.F_NUMBER)); |
| assertEquals("false", metadata.get(Metadata.FLASH_FIRED)); |
| assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH)); |
| assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS)); |
| assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE)); |
| assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL)); |
| assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE)); |
| assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present |
| assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL)); |
| assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL)); |
| assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT)); |
| |
| // Common tags |
| assertEquals("Date/Time Original for when the photo was taken, unspecified time zone", |
| "2009-08-11T09:09:45", metadata.get(Metadata.DATE)); |
| assertEquals("This image has different Date/Time than Date/Time Original, so it is probably modification date", |
| "2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED)); |
| assertEquals("Date/Time Original should be stored in EXIF field too", |
| "2009-08-11T09:09:45", metadata.get(TIFF.ORIGINAL_DATE)); |
| assertEquals("canon-55-250", metadata.getValues(Metadata.KEYWORDS)[0]); |
| } |
| |
| public void testJPEGTitleAndDescription() throws Exception { |
| Metadata metadata = new Metadata(); |
| metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); |
| InputStream stream = |
| getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg"); |
| parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); |
| |
| // embedded comments with non-ascii characters |
| assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE)); |
| assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION)); |
| assertEquals("Some Tourist", metadata.get(Metadata.AUTHOR)); |
| assertEquals("Some Tourist", metadata.get(Metadata.CREATOR)); // Dublin Core |
| // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands" |
| // but we have to replace them with underscore |
| |
| List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS)); |
| assertTrue(keywords.contains("coast")); |
| assertTrue(keywords.contains("bird watching")); |
| assertEquals(keywords, Arrays.asList(metadata.getValues(Metadata.SUBJECT))); |
| |
| // Core EXIF/TIFF tags |
| assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH)); |
| assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH)); |
| assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE)); |
| assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL)); |
| |
| assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000 |
| assertEquals("2.8", metadata.get(Metadata.F_NUMBER)); |
| assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH)); |
| assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS)); |
| assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE)); |
| assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL)); |
| assertEquals(null, metadata.get(Metadata.SOFTWARE)); |
| assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present |
| assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL)); |
| assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL)); |
| assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT)); |
| } |
| |
| public void testJPEGTitleAndDescriptionPhotoshop() throws Exception { |
| Metadata metadata = new Metadata(); |
| metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); |
| InputStream stream = |
| getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg"); |
| parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); |
| |
| // embedded comments with non-ascii characters |
| assertEquals("Tosteberga \u00C4ngar", metadata.get(Metadata.TITLE)); |
| assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION)); |
| assertEquals("Some Tourist", metadata.get(Metadata.CREATOR)); |
| List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT)); |
| assertTrue("got " + subject, subject.contains("bird watching")); |
| } |
| |
| public void testJPEGTitleAndDescriptionXnviewmp() throws Exception { |
| Metadata metadata = new Metadata(); |
| metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); |
| InputStream stream = |
| getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg"); |
| parser.parse(stream, new DefaultHandler(), metadata, new ParseContext()); |
| |
| // XnViewMp's default comment dialog has only comment, not headline. |
| // Comment is embedded only if "Write comments in XMP" is enabled in settings |
| assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(Metadata.DESCRIPTION)); |
| // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands" |
| // but we have to replace them with underscore |
| String[] subject = metadata.getValues(Metadata.SUBJECT); |
| List<String> keywords = Arrays.asList(subject); |
| assertTrue("'coast'" + " not in " + keywords, keywords.contains("coast")); |
| assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve")); |
| } |
| } |