blob: c996ef76af76b513982ab49f096f9e82ad494d0c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.parse;
// Nutch imports
import org.apache.nutch.plugin.Extension;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
/**
* Unit test for new parse plugin selection.
*
* @author Sebastien Le Callonnec
* @version 1.0
*/
public class TestParserFactory {
private Configuration conf;
private ParserFactory parserFactory;
/** Inits the Test Case with the test parse-plugin file */
@Before
public void setUp() throws Exception {
conf = NutchConfiguration.create();
conf.set("plugin.includes", ".*");
conf.set("parse.plugin.file",
"org/apache/nutch/parse/parse-plugin-test.xml");
parserFactory = new ParserFactory(conf);
}
/** Unit test for <code>getExtensions(String)</code> method. */
@Test
public void testGetExtensions() throws Exception {
Extension ext = parserFactory.getExtensions("text/html").get(0);
Assert.assertEquals("parse-tika", ext.getDescriptor().getPluginId());
ext = parserFactory.getExtensions("text/html; charset=ISO-8859-1").get(0);
Assert.assertEquals("parse-tika", ext.getDescriptor().getPluginId());
ext = parserFactory.getExtensions("foo/bar").get(0);
Assert.assertEquals("parse-tika", ext.getDescriptor().getPluginId());
}
/** Unit test to check <code>getParsers</code> method */
@Test
public void testGetParsers() throws Exception {
Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
Assert.assertNotNull(parsers);
Assert.assertEquals(1, parsers.length);
Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
.getClass().getName());
parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
"http://foo.com");
Assert.assertNotNull(parsers);
Assert.assertEquals(1, parsers.length);
Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
.getClass().getName());
parsers = parserFactory.getParsers("application/x-javascript",
"http://foo.com");
Assert.assertNotNull(parsers);
Assert.assertEquals(1, parsers.length);
Assert.assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
.getClass().getName());
parsers = parserFactory.getParsers("text/plain", "http://foo.com");
Assert.assertNotNull(parsers);
Assert.assertEquals(1, parsers.length);
Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
.getClass().getName());
Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
Assert.assertEquals("Different instances!", parser1.hashCode(),
parser2.hashCode());
// test and make sure that the rss parser is loaded even though its
// plugin.xml
// doesn't claim to support text/rss, only application/rss+xml
parsers = parserFactory.getParsers("text/rss", "http://foo.com");
Assert.assertNotNull(parsers);
Assert.assertEquals(1, parsers.length);
Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
.getClass().getName());
}
}