| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.parser.prt; |
| |
| import static org.junit.Assert.assertEquals; |
| |
| import java.io.InputStream; |
| |
| import org.junit.Test; |
| import org.xml.sax.ContentHandler; |
| |
| import org.apache.tika.TikaTest; |
| import org.apache.tika.metadata.Metadata; |
| import org.apache.tika.metadata.TikaCoreProperties; |
| import org.apache.tika.sax.BodyContentHandler; |
| |
| public class PRTParserTest extends TikaTest { |
| /** |
| * Try with a simple file |
| */ |
| @Test |
| public void testPRTParserBasics() throws Exception { |
| try (InputStream input = getResourceAsStream("/test-documents/testCADKEY.prt")) { |
| Metadata metadata = new Metadata(); |
| ContentHandler handler = new BodyContentHandler(); |
| new PRTParser().parse(input, handler, metadata); |
| |
| assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE)); |
| |
| // This file has a date |
| assertEquals("2011-06-20T16:54:00", metadata.get(TikaCoreProperties.CREATED)); |
| |
| // But no description |
| assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION)); |
| |
| String contents = handler.toString(); |
| |
| assertContains("Front View", contents); |
| assertContains("Back View", contents); |
| assertContains("Bottom View", contents); |
| assertContains("Right View", contents); |
| assertContains("Left View", contents); |
| //assertContains("Isometric View", contents); // Can't detect yet |
| assertContains("Axonometric View", contents); |
| |
| assertContains("You've managed to extract all the text!", contents); |
| assertContains("This is more text", contents); |
| assertContains("Text Inside a PRT file", contents); |
| } |
| } |
| |
| /** |
| * Now a more complex one |
| */ |
| @Test |
| public void testPRTParserComplex() throws Exception { |
| try (InputStream input = getResourceAsStream("/test-documents/testCADKEY2.prt")) { |
| Metadata metadata = new Metadata(); |
| ContentHandler handler = new BodyContentHandler(); |
| new PRTParser().parse(input, handler, metadata); |
| |
| assertEquals("application/x-prt", metadata.get(Metadata.CONTENT_TYPE)); |
| |
| // File has both a date and a description |
| assertEquals("1997-04-01T08:59:00", metadata.get(TikaCoreProperties.CREATED)); |
| assertEquals("TIKA TEST PART DESCRIPTION INFORMATION\r\n", |
| metadata.get(TikaCoreProperties.DESCRIPTION)); |
| |
| String contents = handler.toString(); |
| |
| assertContains("ITEM", contents); |
| assertContains("REQ.", contents); |
| assertContains("DESCRIPTION", contents); |
| assertContains("MAT'L", contents); |
| assertContains("TOLERANCES UNLESS", contents); |
| assertContains("FRACTIONS", contents); |
| assertContains("ANGLES", contents); |
| assertContains("Acme Corporation", contents); |
| |
| assertContains("DATE", contents); |
| assertContains("CHANGE", contents); |
| assertContains("DRAWN BY", contents); |
| assertContains("SCALE", contents); |
| assertContains("TIKA TEST DRAWING", contents); |
| assertContains("TIKA LETTERS", contents); |
| assertContains("5.82", contents); |
| assertContains("112" + '\u00b0', contents); // Degrees |
| assertContains("TIKA TEST LETTER", contents); |
| assertContains("17.11", contents); |
| assertContains('\u00d8' + "\ufffd2.000", contents); // Diameter |
| assertContains("Diameter", contents); |
| assertContains("The Apache Tika toolkit", contents); |
| } |
| } |
| } |