blob: aee73c781945803d1d19433ab458007bf614d13b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.iwork;
import junit.framework.TestCase;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import java.io.InputStream;
/**
* Tests if the IWork parser parses the content and metadata properly of the supported formats.
*/
public class IWorkParserTest extends TestCase {
private IWorkPackageParser iWorkParser;
private ParseContext parseContext;
@Override
protected void setUp() throws Exception {
iWorkParser = new IWorkPackageParser();
parseContext = new ParseContext();
parseContext.set(Parser.class, new AutoDetectParser());
}
public void testParseKeynote() throws Exception {
InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testKeynote.key");
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
iWorkParser.parse(input, handler, metadata, parseContext);
assertEquals(6, metadata.size());
assertEquals("application/vnd.apple.keynote", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("3", metadata.get(Metadata.SLIDE_COUNT));
assertEquals("1024", metadata.get(KeynoteContentHandler.PRESENTATION_WIDTH));
assertEquals("768", metadata.get(KeynoteContentHandler.PRESENTATION_HEIGHT));
assertEquals("Tika user", metadata.get(Metadata.AUTHOR));
assertEquals("Apache tika", metadata.get(Metadata.TITLE));
String content = handler.toString();
assertTrue(content.contains("A sample presentation"));
assertTrue(content.contains("For the Apache Tika project"));
assertTrue(content.contains("Slide 1"));
assertTrue(content.contains("Some random text for the sake of testability."));
assertTrue(content.contains("A nice comment"));
assertTrue(content.contains("A nice note"));
// test table data
assertTrue(content.contains("Cell one"));
assertTrue(content.contains("Cell two"));
assertTrue(content.contains("Cell three"));
assertTrue(content.contains("Cell four"));
assertTrue(content.contains("Cell 5"));
assertTrue(content.contains("Cell six"));
assertTrue(content.contains("7"));
assertTrue(content.contains("Cell eight"));
assertTrue(content.contains("5/5/1985"));
}
public void testParsePages() throws Exception {
InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testPages.pages");
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
iWorkParser.parse(input, handler, metadata, parseContext);
assertEquals(51, metadata.size());
assertEquals("application/vnd.apple.pages", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("Tika user", metadata.get(Metadata.AUTHOR));
assertEquals("Apache tika", metadata.get(Metadata.TITLE));
assertEquals("2010-05-09T21:34:38+0200", metadata.get(Metadata.CREATION_DATE));
assertEquals("2010-05-09T23:50:36+0200", metadata.get(Metadata.LAST_MODIFIED));
assertEquals("en", metadata.get(Metadata.LANGUAGE));
assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
String content = handler.toString();
// text on page 1
assertTrue(content.contains("Sample pages document"));
assertTrue(content.contains("Some plain text to parse."));
assertTrue(content.contains("Cell one"));
assertTrue(content.contains("Cell two"));
assertTrue(content.contains("Cell three"));
assertTrue(content.contains("Cell four"));
assertTrue(content.contains("Cell five"));
assertTrue(content.contains("Cell six"));
assertTrue(content.contains("Cell seven"));
assertTrue(content.contains("Cell eight"));
assertTrue(content.contains("Cell nine"));
assertTrue(content.contains("Both Pages 1.x and Keynote 2.x")); // ...
// text on page 2
assertTrue(content.contains("A second page...."));
assertTrue(content.contains("Extensible Markup Language")); // ...
}
public void testParseNumbers() throws Exception {
InputStream input = IWorkParserTest.class.getResourceAsStream("/test-documents/testNumbers.numbers");
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
iWorkParser.parse(input, handler, metadata, parseContext);
String content = handler.toString();
assertEquals(9, metadata.size());
assertEquals("2", metadata.get(Metadata.PAGE_COUNT));
assertEquals("Tika User", metadata.get(Metadata.AUTHOR));
assertEquals("Account checking", metadata.get(Metadata.TITLE));
assertEquals("a comment", metadata.get(Metadata.COMMENT));
assertTrue(content.contains("Category"));
assertTrue(content.contains("Home"));
assertTrue(content.contains("-226"));
assertTrue(content.contains("-137.5"));
assertTrue(content.contains("Checking Account: 300545668"));
assertTrue(content.contains("4650"));
assertTrue(content.contains("Credit Card"));
assertTrue(content.contains("Groceries"));
assertTrue(content.contains("-210"));
assertTrue(content.contains("Food"));
assertTrue(content.contains("Try adding your own account transactions to this table."));
}
}