| /* |
| * Copyright 2008-2010 Digital Enterprise Research Institute (DERI) |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.deri.any23.mime; |
| |
| import junit.framework.Assert; |
| import org.junit.After; |
| import org.junit.Before; |
| import org.junit.Test; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.ByteArrayOutputStream; |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| |
| /** |
| * Test case for {@link org.deri.any23.mime.TikaMIMETypeDetector} class. |
| * |
| * @author juergen |
| * @author Michele Mostarda (michele.mostarda@gmail.com) |
| */ |
| public class TikaMIMETypeDetectorTest { |
| |
| private static final String PLAIN = "text/plain"; |
| private static final String HTML = "text/html"; |
| private static final String XML = "application/xml"; |
| private final static String XHTML = "application/xhtml+xml"; |
| private final static String RDFXML = "application/rdf+xml"; |
| private final static String TURTLE = "application/x-turtle"; |
| private final static String N3 = "text/rdf+n3"; |
| private final static String NQuads = "text/rdf+nq"; |
| |
| private TikaMIMETypeDetector detector; |
| |
| @Before |
| public void setUp() throws Exception { |
| detector = new TikaMIMETypeDetector(); |
| } |
| |
| @After |
| public void tearDown() throws Exception { |
| detector = null; |
| } |
| |
| @Test |
| public void testN3Detection() throws IOException { |
| assertN3Detection("<http://example.org/path> <http://foo.com> <http://example.org/Document/foo#> ."); |
| assertN3Detection("_:bnode1 <http://foo.com> _:bnode2 ."); |
| assertN3Detection("<http://www.example.com> <http://purl.org/dc/elements/1.1/title> \"x\" ."); |
| assertN3Detection("<http://www.example.com> <http://purl.org/dc/elements/1.1/title> \"x\"@it ."); |
| assertN3Detection("<http://www.example.com> <http://purl.org/dc/elements/1.1/title> \"x\"^^<http://xxx.net> ."); |
| assertN3Detection("<http://www.example.com> <http://purl.org/dc/elements/1.1/title> \"x\"^^xsd:integer ."); |
| |
| // Wrong N3 line '.' |
| assertN3DetectionFail("" + |
| "<http://wrong.example.org/path> <http://wrong.foo.com> . <http://wrong.org/Document/foo#>" |
| ); |
| // NQuads is not mislead with N3. |
| assertN3DetectionFail( |
| "<http://example.org/path> <http://foo.com> <http://dom.org/Document/foo#> <http://path/to/graph> ." |
| ); |
| } |
| |
| @Test |
| public void testNQuadsDetection() throws IOException { |
| assertNQuadsDetection( |
| "<http://www.ex.eu> <http://foo.com> <http://example.org/Document/foo#> <http://path.to.graph> ." |
| ); |
| assertNQuadsDetection( |
| "_:bnode1 <http://foo.com> _:bnode2 <http://path.to.graph> ." |
| ); |
| assertNQuadsDetection( |
| "<http://www.ex.eu> <http://purl.org/dc/elements/1.1/title> \"x\" <http://path.to.graph> ." |
| ); |
| assertNQuadsDetection( |
| "<http://www.ex.eu> <http://purl.org/dc/elements/1.1/title> \"x\"@it <http://path.to.graph> ." |
| ); |
| assertNQuadsDetection( |
| "<http://www.ex.eu> <http://dd.cc.org/1.1/p> \"xxx\"^^<http://www.sp.net/a#tt> <http://path.to.graph> ." |
| ); |
| assertNQuadsDetection( |
| "<http://www.ex.eu> <http://purlo.org/1.1/title> \"yyy\"^^xsd:datetime <http://path.to.graph> ." |
| ); |
| |
| // Wrong NQuads line. |
| assertNQuadsDetectionFail( |
| "<http://www.wrong.com> <http://wrong.com/1.1/tt> \"x\"^^<http://xxx.net/int> . <http://path.to.graph>" |
| ); |
| // N3 is not mislead with NQuads. |
| assertNQuadsDetectionFail( |
| "<http://example.org/path> <http://foo.com> <http://example.org/Document/foo#> ." |
| ); |
| } |
| |
| /* BEGIN: by content. */ |
| |
| @Test |
| public void testDetectRSS1ByContent() throws Exception { |
| detectMIMEtypeByContent("application/rdf+xml", "src/test/resources/application/rss1"); |
| } |
| |
| @Test |
| public void testDetectRSS2ByContent() throws Exception { |
| detectMIMEtypeByContent("application/rss+xml", "src/test/resources/application/rss2"); |
| } |
| |
| @Test |
| public void testDetectRDFN3ByContent() throws Exception { |
| detectMIMEtypeByContent("text/n3", "src/test/resources/application/rdfn3"); |
| } |
| |
| @Test |
| public void testDetectRDFNQuadsByContent() throws Exception { |
| detectMIMEtypeByContent("text/nq", "src/test/resources/application/nquads"); |
| } |
| |
| @Test |
| public void testDetectRDFXMLByContent() throws Exception { |
| detectMIMEtypeByContent("application/rdf+xml", "src/test/resources/application/rdfxml"); |
| } |
| |
| @Test |
| public void testDetectAtomByContent() throws Exception { |
| detectMIMEtypeByContent("application/atom+xml", "src/test/resources/application/atom"); |
| } |
| |
| @Test |
| public void testDetectHTMLByContent() throws Exception { |
| detectMIMEtypeByContent("text/html", "src/test/resources/text/html"); |
| } |
| |
| @Test |
| public void testDetectRDFaByContent() throws Exception { |
| detectMIMEtypeByContent("application/xhtml+xml", "src/test/resources/application/rdfa"); |
| } |
| |
| @Test |
| public void testDetectXHTMLByContent() throws Exception { |
| detectMIMEtypeByContent("application/xhtml+xml", "src/test/resources/application/xhtml"); |
| } |
| |
| @Test |
| public void testDetectWSDLByContent() throws Exception { |
| detectMIMEtypeByContent("application/x-wsdl", "src/test/resources/application/wsdl"); |
| } |
| |
| @Test |
| public void testDetectZIPByContent() throws Exception { |
| detectMIMEtypeByContent("application/zip", "src/test/resources/application/zip"); |
| } |
| |
| /* END: by content. */ |
| |
| /* BEGIN: by content metadata. */ |
| |
| @Test |
| public void testDetectContentPlainByMeta() throws IOException { |
| detectMIMETypeByMetadata("text/plain", "text/plain", "foo.rdf"); |
| } |
| |
| @Test |
| public void testDetectTextRDFByMeta() throws IOException { |
| detectMIMETypeByMetadata("application/rdf+xml", "text/rdf", "foo"); |
| } |
| |
| @Test |
| public void testDetectTextN3ByMeta() throws IOException { |
| detectMIMETypeByMetadata(N3, "text/rdf+n3", "foo"); |
| } |
| |
| @Test |
| public void testDetectTextNQuadsByMeta() throws IOException { |
| detectMIMETypeByMetadata(NQuads, "text/rdf+nq", "foo"); |
| } |
| |
| @Test |
| public void testDetectTextTurtleByMeta() throws IOException { |
| detectMIMETypeByMetadata(TURTLE, "text/turtle", "foo"); |
| } |
| |
| @Test |
| public void testDetectRDFXMLByMeta() throws IOException { |
| detectMIMETypeByMetadata(RDFXML, "application/rdf+xml", "foo"); |
| } |
| |
| @Test |
| public void testDetectXMLByMeta() throws IOException { |
| detectMIMETypeByMetadata(XML, "application/xml", "foo.rdf"); |
| } |
| |
| @Test |
| public void testDetectXMLByMeta2() throws IOException { |
| detectMIMETypeByMetadata(XML, "application/xml", "foo"); |
| } |
| |
| @Test |
| public void testDetectExtensionN3ByMeta() throws IOException { |
| detectMIMETypeByMetadata(PLAIN, "text/plain", "foo.n3"); |
| } |
| |
| @Test |
| public void testDetectXHTMLByMeta() throws IOException { |
| detectMIMETypeByMetadata(XHTML, "application/xhtml+xml", "foo"); |
| } |
| |
| @Test |
| public void testDetectTextHTMLByMeta() throws IOException { |
| detectMIMETypeByMetadata(HTML, "text/html", "foo"); |
| } |
| |
| @Test |
| public void testDetectTextPlainByMeta() throws IOException { |
| detectMIMETypeByMetadata(PLAIN, "text/plain", "foo.html"); |
| detectMIMETypeByMetadata(PLAIN, "text/plain", "foo.htm"); |
| detectMIMETypeByMetadata(PLAIN, "text/plain", "foo.xhtml"); |
| } |
| |
| @Test |
| public void testDetectApplicationXMLByMeta() throws IOException { |
| detectMIMETypeByMetadata(XML, "application/xml", "foo.html"); |
| detectMIMETypeByMetadata(XML, "application/xml", "foo.htm"); |
| detectMIMETypeByMetadata(XML, "application/xml", "foo.xhtml"); |
| } |
| |
| /* END: by content metadata. */ |
| |
| /* BEGIN: by content and name. */ |
| |
| @Test |
| public void testRDFXMLByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/rdf+xml", "src/test/resources/application/rdfxml"); |
| } |
| |
| @Test |
| public void testRSS1ByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/rdf+xml", "src/test/resources/application/rss1"); |
| } |
| |
| @Test |
| public void testRSS2ByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/rss+xml", "src/test/resources/application/rss2"); |
| } |
| |
| @Test |
| public void testDetectRDFN3ByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("text/n3", "src/test/resources/application/rdfn3"); |
| } |
| |
| @Test |
| public void testDetectRDFNQuadsByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("text/rdf+nq", "src/test/resources/application/nquads"); |
| } |
| |
| @Test |
| public void testAtomByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/atom+xml", "src/test/resources/application/atom"); |
| } |
| |
| @Test |
| public void testHTMLByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("text/html", "src/test/resources/text/html"); |
| } |
| |
| @Test |
| public void testXHTMLByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/xhtml+xml", "src/test/resources/application/xhtml"); |
| } |
| |
| @Test |
| public void testWSDLByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/x-wsdl", "src/test/resources/application/wsdl"); |
| } |
| |
| @Test |
| public void testZipByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/zip", "src/test/resources/application/zip"); |
| } |
| |
| @Test |
| public void testRDFaByContentAndName() throws Exception { |
| detectMIMETypeByContentAndName("application/xhtml+xml", "src/test/resources/application/rdfa"); |
| } |
| |
| /* END: by content and name. */ |
| |
| private void assertN3Detection(String n3Exp) throws IOException { |
| ByteArrayInputStream bais = new ByteArrayInputStream( n3Exp.getBytes() ); |
| Assert.assertTrue( TikaMIMETypeDetector.checkN3Format(bais) ); |
| } |
| |
| private void assertN3DetectionFail(String n3Exp) throws IOException { |
| ByteArrayInputStream bais = new ByteArrayInputStream( n3Exp.getBytes() ); |
| Assert.assertFalse( TikaMIMETypeDetector.checkN3Format(bais) ); |
| } |
| |
| private void assertNQuadsDetection(String n4Exp) throws IOException { |
| ByteArrayInputStream bais = new ByteArrayInputStream( n4Exp.getBytes() ); |
| Assert.assertTrue( TikaMIMETypeDetector.checkNQuadsFormat(bais) ); |
| } |
| |
| private void assertNQuadsDetectionFail(String n4Exp) throws IOException { |
| ByteArrayInputStream bais = new ByteArrayInputStream( n4Exp.getBytes() ); |
| Assert.assertFalse( TikaMIMETypeDetector.checkNQuadsFormat(bais) ); |
| } |
| |
| /** |
| * Checks the detection of a specific MIME based on content analysis. |
| * |
| * @param expectedMimeType the expected mime type. |
| * @param testDir the target file. |
| * @throws IOException |
| */ |
| private void detectMIMEtypeByContent(String expectedMimeType, String testDir) |
| throws IOException { |
| File f = new File(testDir); |
| String detectedMimeType; |
| for (File test : f.listFiles()) { |
| if (test.getName().startsWith(".")) continue; |
| InputStream is = getInputStream(test); |
| detectedMimeType = detector.guessMIMEType( |
| null, |
| is, |
| null |
| ).toString(); |
| if (test.getName().startsWith("error")) |
| Assert.assertNotSame(expectedMimeType, detectedMimeType); |
| else { |
| Assert.assertEquals( |
| String.format("Error in mimetype detection for file %s", test.getAbsolutePath()), |
| expectedMimeType, |
| detectedMimeType |
| ); |
| } |
| is.close(); |
| } |
| } |
| |
| /** |
| * Verifies the detection of a specific MIME based on content, filename and metadata MIME type. |
| * |
| * @param expectedMimeType |
| * @param contentTypeHeader |
| * @param fileName |
| * @throws IOException |
| */ |
| private void detectMIMETypeByMetadata(String expectedMimeType, String contentTypeHeader, String fileName) |
| throws IOException { |
| File f = new File(fileName); |
| if (f.getName().startsWith(".")) return; |
| |
| InputStream is = null; |
| if (f.exists()) is = getInputStream(f); |
| |
| String detectedMimeType = detector.guessMIMEType( |
| null, |
| null, |
| MIMEType.parse(contentTypeHeader) |
| ).toString(); |
| |
| if (f.getName().startsWith("error")) |
| Assert.assertNotSame(expectedMimeType, detectedMimeType); |
| else { |
| Assert.assertEquals(expectedMimeType, detectedMimeType); |
| } |
| if (is != null) |
| is.close(); |
| } |
| |
| /** |
| * Verifies the detection of a specific MIME based on content and filename. |
| * |
| * @param expectedMimeType |
| * @param testDir |
| * @throws IOException |
| */ |
| private void detectMIMETypeByContentAndName(String expectedMimeType, String testDir) throws IOException { |
| File f = new File(testDir); |
| String detectedMimeType; |
| for (File test : f.listFiles()) { |
| if (test.getName().startsWith(".")) continue; |
| InputStream is = getInputStream(test); |
| detectedMimeType = detector.guessMIMEType(test.getName(), is, null).toString(); |
| if (test.getName().startsWith("error")) |
| Assert.assertNotSame(expectedMimeType, detectedMimeType); |
| else { |
| Assert.assertEquals( |
| String.format("Error while detecting mimetype in file %s", test), |
| expectedMimeType, |
| detectedMimeType |
| ); |
| } |
| is.close(); |
| } |
| } |
| |
| /** |
| * @param file the file to be load. |
| * @return the input stream containing the file. |
| * @throws IOException |
| */ |
| private InputStream getInputStream(File file) throws IOException { |
| FileInputStream fis = new FileInputStream(file); |
| ByteArrayOutputStream bos = new ByteArrayOutputStream(); |
| byte[] buffer = new byte[4096]; |
| while (fis.read(buffer) != -1) { |
| bos.write(buffer); |
| } |
| fis.close(); |
| InputStream bais; |
| bais = new ByteArrayInputStream(bos.toByteArray()); |
| return bais; |
| } |
| |
| } |