Merge pull request #542 from sebastian-nagel/NUTCH-2810

NUTCH-2810 FreeGenerator to actually apply configured number of fetch lists
diff --git a/.asf.yaml b/.asf.yaml
new file mode 100644
index 0000000..aa9a939
--- /dev/null
+++ b/.asf.yaml
@@ -0,0 +1,16 @@
+github:
+  description: "Apache Nutch is an extensible and scalable web crawler"
+  homepage: https://nutch.apache.org/
+  labels:
+    - web-crawler
+    - crawling
+    - java
+    - nutch
+    - hadoop
+    - apache
+
+notifications:
+  commits:      commits@nutch.apache.org
+  issues:       dev@nutch.apache.org
+  pullrequests: dev@nutch.apache.org
+  jira_options: link label comment
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index 26f3d0e..6e634af 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -4,7 +4,7 @@
 * there is an open issue on the [Nutch issue tracker](https://issues.apache.org/jira/projects/NUTCH) which describes the problem or the improvement. We cannot accept pull requests without an issue because the change wouldn't be listed in the release notes.
 * the issue ID (`NUTCH-XXXX`)
   - is referenced in the title of the pull request
-  - and placed in front of your commit messages
+  - and placed in front of your commit messages surrounded by square brackets (`[NUTCH-XXXX] Issue or pull request title`)
 * commits are squashed into a single one (or few commits for larger changes)
 * Java source code follows [Nutch Eclipse Code Formatting rules](https://github.com/apache/nutch/blob/master/eclipse-codeformat.xml)
 * Nutch is successfully built and unit tests pass by running `ant clean runtime test`
diff --git a/.github/workflows/master-build.yml b/.github/workflows/master-build.yml
new file mode 100644
index 0000000..7e74840
--- /dev/null
+++ b/.github/workflows/master-build.yml
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: master pr build
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+        
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        java: [ '1.8' ]
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v1
+        with:
+          java-version: ${{ matrix.java }}
+      - name: Build with Ant
+        run: ant clean nightly javadoc -buildfile build.xml
diff --git a/CHANGES.txt b/CHANGES.txt
index 76c9fc6..e5c5984 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,7 +2,9 @@
 
 Nutch 1.18 Development
 
+Breaking Changes
 
+    -  As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details.
 
 Nutch 1.17 Release 18/06/2020 (dd/mm/yyyy)
 Release Report: https://s.apache.org/ovhry
diff --git a/build.xml b/build.xml
index bc8d8fb..0f6807a 100644
--- a/build.xml
+++ b/build.xml
@@ -230,7 +230,7 @@
       <packageset dir="${plugins.dir}/tld/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
-      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
@@ -739,7 +739,7 @@
       <packageset dir="${plugins.dir}/tld/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
-      <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+      <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
@@ -1164,8 +1164,8 @@
         <source path="${plugins.dir}/urlfilter-automaton/src/test/" />
         <source path="${plugins.dir}/urlfilter-domain/src/java/" />
         <source path="${plugins.dir}/urlfilter-domain/src/test/" />
-        <source path="${plugins.dir}/urlfilter-domainblacklist/src/java/" />
-        <source path="${plugins.dir}/urlfilter-domainblacklist/src/test/" />
+        <source path="${plugins.dir}/urlfilter-domaindenylist/src/java/" />
+        <source path="${plugins.dir}/urlfilter-domaindenylist/src/test/" />
         <source path="${plugins.dir}/urlfilter-fast/src/java/"/>
         <source path="${plugins.dir}/urlfilter-fast/src/test/"/>
         <source path="${plugins.dir}/urlfilter-ignoreexempt/src/java/" />
diff --git a/conf/domainblacklist-urlfilter.txt.template b/conf/domaindenylist-urlfilter.txt.template
similarity index 93%
rename from conf/domainblacklist-urlfilter.txt.template
rename to conf/domaindenylist-urlfilter.txt.template
index ca79a20..ff52043 100644
--- a/conf/domainblacklist-urlfilter.txt.template
+++ b/conf/domaindenylist-urlfilter.txt.template
@@ -13,4 +13,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# config file for urlfilter-domainblacklist plugin
+# config file for urlfilter-domaindenylist plugin
diff --git a/default.properties b/default.properties
index 7884cd5..0e406f7 100644
--- a/default.properties
+++ b/default.properties
@@ -103,7 +103,7 @@
 plugins.urlfilter=\
    org.apache.nutch.urlfilter.automaton*:\
    org.apache.nutch.urlfilter.domain*:\
-   org.apache.nutch.urlfilter.domainblacklist*:\
+   org.apache.nutch.urlfilter.domaindenylist*:\
    org.apache.nutch.urlfilter.fast*:\
    org.apache.nutch.urlfilter.ignoreexempt*:\
    org.apache.nutch.urlfilter.prefix*:\
diff --git a/src/java/org/apache/nutch/protocol/RobotRulesParser.java b/src/java/org/apache/nutch/protocol/RobotRulesParser.java
index 159f34f..2cb52a6 100644
--- a/src/java/org/apache/nutch/protocol/RobotRulesParser.java
+++ b/src/java/org/apache/nutch/protocol/RobotRulesParser.java
@@ -133,7 +133,7 @@
 
     String[] confWhiteList = conf.getStrings("http.robot.rules.whitelist");
     if (confWhiteList == null) {
-      LOG.info("robots.txt whitelist not configured.");
+      LOG.debug("robots.txt whitelist not configured.");
     }
     else {
       for (int i = 0; i < confWhiteList.length; i++) {
@@ -262,14 +262,16 @@
           "",
           "<agent-names>\tcomma-separated list of agent names",
           "\tused to select rules from the robots.txt file.",
-          "\tIf no agent name is given the property http.agent.name is used.",
-          "\tIf http.agent.name is empty, robots.txt is checked for rules",
-          "\tassigned to the user agent `*' (meaning any other).",
+          "\tIf no agent name is given the properties http.agent.name",
+          "\tand http.robots.agents are used.",
+          "\tIf also http.agent.name and http.robots.agents are empty,",
+          "\trobots.txt is checked for rules assigned to the user",
+          "\tagent `*' (meaning any other).",
           "",
           "Important properties:",
           " -D fetcher.store.robotstxt=true",
           "\toutput content and HTTP meta data of fetched robots.txt (if not a local file)",
-          " -D http.agent.name=...\tsame as argument <agent-names>",
+          " -D http.agent.name=...\t(primary) agent name",
           " -D http.robots.agents=...\tadditional agent names",
           " -D http.robot.rules.whitelist=..."};
       for (String s : help) {
@@ -315,7 +317,8 @@
     if (args.length > 2) {
       // set agent name from command-line in configuration and update parser
       String agents = args[2];
-      conf.set("http.agent.name", agents);
+      conf.set("http.robots.agents", agents);
+      conf.set("http.agent.name", agents.split(",")[0]);
       setConf(conf);
     }
 
@@ -376,13 +379,24 @@
    */
   private static class TestRobotRulesParser extends RobotRulesParser {
 
-    public TestRobotRulesParser(Configuration conf) {
-      // make sure that agent name is set so that setConf() does not complain,
-      // the agent name is later overwritten by command-line argument
-      if (conf.get("http.agent.name") == null) {
-        conf.set("http.agent.name", "*");
+    public void setConf(Configuration conf) {
+      /*
+       * Make sure that agent name is not empty so that
+       * RobotRulesParser.setConf() does not complain.
+       * 
+       * If provided the agent names passed as command-line argument are
+       * checked, see RobotRulesParser.run(...). Also http.agent.name is then
+       * filled taking the first agent name from command-line.
+       */
+      if (conf.get("http.agent.name", "").isEmpty()) {
+        String firstRobotsAgent = conf.get("http.robots.agents", "").split(",")[0].trim();
+        if (firstRobotsAgent.isEmpty()) {
+          conf.set("http.agent.name", "*");
+        } else {
+          conf.set("http.agent.name", firstRobotsAgent);
+        }
       }
-      setConf(conf);
+      super.setConf(conf);
     }
 
     /**
@@ -407,7 +421,7 @@
           openStream.read(robotsBytes);
           openStream.close();
           rules = robotParser.parseContent(url.toString(), robotsBytes,
-              "text/plain", this.conf.get("http.agent.name"));
+              "text/plain", agentNames);
         } catch (IOException e) {
           LOG.error("Failed to open robots.txt file " + url
               + StringUtils.stringifyException(e));
@@ -421,7 +435,7 @@
 
   public static void main(String[] args) throws Exception {
     Configuration conf = NutchConfiguration.create();
-    int res = ToolRunner.run(conf, new TestRobotRulesParser(conf), args);
+    int res = ToolRunner.run(conf, new TestRobotRulesParser(), args);
     System.exit(res);
   }
 
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index a2a0dd7..dd2a507 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -88,7 +88,7 @@
     <ant dir="tld" target="deploy"/>
     <ant dir="urlfilter-automaton" target="deploy"/>
     <ant dir="urlfilter-domain" target="deploy" />
-    <ant dir="urlfilter-domainblacklist" target="deploy" />
+    <ant dir="urlfilter-domaindenylist" target="deploy" />
     <ant dir="urlfilter-fast" target="deploy"/>
     <ant dir="urlfilter-prefix" target="deploy"/>
     <ant dir="urlfilter-regex" target="deploy"/>
@@ -145,7 +145,7 @@
      <ant dir="subcollection" target="test"/>
      <ant dir="urlfilter-automaton" target="test"/>
      <ant dir="urlfilter-domain" target="test"/>
-     <ant dir="urlfilter-domainblacklist" target="test"/>
+     <ant dir="urlfilter-domaindenylist" target="test"/>
      <ant dir="urlfilter-fast" target="test"/>
      <!--ant dir="urlfilter-ignoreexempt" target="test"/-->
      <ant dir="urlfilter-prefix" target="test"/>
@@ -234,7 +234,7 @@
     <ant dir="tld" target="clean"/>
     <ant dir="urlfilter-automaton" target="clean"/>
     <ant dir="urlfilter-domain" target="clean" />
-    <ant dir="urlfilter-domainblacklist" target="clean" />
+    <ant dir="urlfilter-domaindenylist" target="clean" />
     <ant dir="urlfilter-fast" target="clean"/>
     <ant dir="urlfilter-ignoreexempt" target="clean"/>
     <ant dir="urlfilter-prefix" target="clean"/>
diff --git a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
index d2eba1f..5388cec 100644
--- a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
+++ b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
@@ -18,7 +18,7 @@
 /**
  * URL filter plugin to include only URLs which match an element in a given list of
  * domain suffixes, domain names, and/or host names.
- * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * See {@link org.apache.nutch.urlfilter.domaindenylist} for the counterpart
  * (exclude URLs by host or domain).
  */
 package org.apache.nutch.urlfilter.domain;
diff --git a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java b/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
deleted file mode 100644
index 9ab207a..0000000
--- a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.domainblacklist;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-
-public class TestDomainBlacklistURLFilter {
-
-  private final static String SEPARATOR = System.getProperty("file.separator");
-  private final static String SAMPLES = System.getProperty("test.data", ".");
-
-  @Test
-  public void testFilter() throws Exception {
-
-    String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt";
-    Configuration conf = NutchConfiguration.create();
-    conf.set("urlfilter.domainblacklist.file", domainBlacklistFile);
-    DomainBlacklistURLFilter domainBlacklistFilter = new DomainBlacklistURLFilter();
-    domainBlacklistFilter.setConf(conf);
-    Assert.assertNull(domainBlacklistFilter.filter("http://lucene.apache.org"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.apache.org"));
-    Assert.assertNotNull(domainBlacklistFilter.filter("http://www.google.com"));
-    Assert.assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.net"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobas.net"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.yahoo.com"));
-    Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.be"));
-    Assert.assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com"));
-  }
-
-}
diff --git a/src/plugin/urlfilter-domainblacklist/build.xml b/src/plugin/urlfilter-domaindenylist/build.xml
similarity index 93%
rename from src/plugin/urlfilter-domainblacklist/build.xml
rename to src/plugin/urlfilter-domaindenylist/build.xml
index 19ea483..f06dfc5 100644
--- a/src/plugin/urlfilter-domainblacklist/build.xml
+++ b/src/plugin/urlfilter-domaindenylist/build.xml
@@ -15,7 +15,7 @@
  See the License for the specific language governing permissions and
  limitations under the License.
 -->
-<project name="urlfilter-domainblacklist" default="jar-core">
+<project name="urlfilter-domaindenylist" default="jar-core">
 
   <import file="../build-plugin.xml"/>
 
diff --git a/src/plugin/urlfilter-domainblacklist/data/hosts.txt b/src/plugin/urlfilter-domaindenylist/data/hosts.txt
similarity index 100%
rename from src/plugin/urlfilter-domainblacklist/data/hosts.txt
rename to src/plugin/urlfilter-domaindenylist/data/hosts.txt
diff --git a/src/plugin/urlfilter-domainblacklist/ivy.xml b/src/plugin/urlfilter-domaindenylist/ivy.xml
similarity index 100%
rename from src/plugin/urlfilter-domainblacklist/ivy.xml
rename to src/plugin/urlfilter-domaindenylist/ivy.xml
diff --git a/src/plugin/urlfilter-domainblacklist/plugin.xml b/src/plugin/urlfilter-domaindenylist/plugin.xml
similarity index 71%
rename from src/plugin/urlfilter-domainblacklist/plugin.xml
rename to src/plugin/urlfilter-domaindenylist/plugin.xml
index 04eee6e..d1a35f5 100644
--- a/src/plugin/urlfilter-domainblacklist/plugin.xml
+++ b/src/plugin/urlfilter-domaindenylist/plugin.xml
@@ -16,13 +16,13 @@
  limitations under the License.
 -->
 <plugin
-   id="urlfilter-domainblacklist"
-   name="Domain Blacklist URL Filter"
+   id="urlfilter-domaindenylist"
+   name="Domain Denylist URL Filter"
    version="1.0.0"
    provider-name="nutch.org">
 
    <runtime>
-      <library name="urlfilter-domainblacklist.jar">
+      <library name="urlfilter-domaindenylist.jar">
          <export name="*"/>
       </library>
    </runtime>
@@ -31,12 +31,12 @@
       <import plugin="nutch-extensionpoints"/>
    </requires>
 
-   <extension id="org.apache.nutch.net.urlfilter.domainblacklist"
-              name="Nutch Domain Blacklist URL Filter"
+   <extension id="org.apache.nutch.net.urlfilter.domaindenylist"
+              name="Nutch Domain Denylist URL Filter"
               point="org.apache.nutch.net.URLFilter">
-      <implementation id="DomainBlacklistURLFilter"
-        class="org.apache.nutch.urlfilter.domainblacklist.DomainBlacklistURLFilter">
-        <parameter name="file" value="domainblacklist-urlfilter.txt"/>
+      <implementation id="DomainDenylistURLFilter"
+        class="org.apache.nutch.urlfilter.domaindenylist.DomainDenylistURLFilter">
+        <parameter name="file" value="domaindenylist-urlfilter.txt"/>
       </implementation>
    </extension>
 
diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
similarity index 89%
rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
index 77c238b..58e3754 100644
--- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
+++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.nutch.urlfilter.domainblacklist;
+package org.apache.nutch.urlfilter.domaindenylist;
 
 import java.lang.invoke.MethodHandles;
 import java.io.BufferedReader;
@@ -62,12 +62,12 @@
  * overriding the more specific.
  * </p>
  * 
- * The domain file defaults to domainblacklist-urlfilter.txt in the classpath
+ * The domain file defaults to domaindenylist-urlfilter.txt in the classpath
  * but can be overridden using the:
  * 
  * <ul>
  * <li>
- * property "urlfilter.domainblacklist.file" in ./conf/nutch-*.xml, and
+ * property "urlfilter.domaindenylist.file" in ./conf/nutch-*.xml, and
  * </li>
  * <li>
  * attribute "file" in plugin.xml of this plugin
@@ -75,7 +75,7 @@
  * </ul>
  * 
  */
-public class DomainBlacklistURLFilter implements URLFilter {
+public class DomainDenylistURLFilter implements URLFilter {
 
   private static final Logger LOG = LoggerFactory
       .getLogger(MethodHandles.lookup().lookupClass());
@@ -105,7 +105,7 @@
     this.conf = conf;
 
     // get the extensions for domain urlfilter
-    String pluginName = "urlfilter-domainblacklist";
+    String pluginName = "urlfilter-domaindenylist";
     Extension[] extensions = PluginRepository.get(conf)
         .getExtensionPoint(URLFilter.class.getName()).getExtensions();
     for (int i = 0; i < extensions.length; i++) {
@@ -127,11 +127,11 @@
 
     // precedence hierarchy for definition of filter rules
     // (first non-empty definition takes precedence):
-    // 1. string rules defined by `urlfilter.domainblacklist.rules`
-    // 2. rule file name defined by `urlfilter.domainblacklist.file`
+    // 1. string rules defined by `urlfilter.domaindenylist.rules`
+    // 2. rule file name defined by `urlfilter.domaindenylist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String stringRules = conf.get("urlfilter.domainblacklist.rules");
-    String file = conf.get("urlfilter.domainblacklist.file", attributeFile);
+    String stringRules = conf.get("urlfilter.domaindenylist.rules");
+    String file = conf.get("urlfilter.domaindenylist.file", attributeFile);
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
similarity index 94%
rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
index 1f0022c..401d12f 100644
--- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
+++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
@@ -20,5 +20,5 @@
  * See {@link org.apache.nutch.urlfilter.domain} for the counterpart (include only URLs
  * matching host or domain).
  */
-package org.apache.nutch.urlfilter.domainblacklist;
+package org.apache.nutch.urlfilter.domaindenylist;
 
diff --git a/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java
new file mode 100644
index 0000000..0dde234
--- /dev/null
+++ b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.domaindenylist;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+public class TestDomainDenylistURLFilter {
+
+  private final static String SEPARATOR = System.getProperty("file.separator");
+  private final static String SAMPLES = System.getProperty("test.data", ".");
+
+  @Test
+  public void testFilter() throws Exception {
+
+    String domainDenylistFile = SAMPLES + SEPARATOR + "hosts.txt";
+    Configuration conf = NutchConfiguration.create();
+    conf.set("urlfilter.domaindenylist.file", domainDenylistFile);
+    DomainDenylistURLFilter domainDenylistFilter = new DomainDenylistURLFilter();
+    domainDenylistFilter.setConf(conf);
+    Assert.assertNull(domainDenylistFilter.filter("http://lucene.apache.org"));
+    Assert.assertNull(domainDenylistFilter.filter("http://hadoop.apache.org"));
+    Assert.assertNull(domainDenylistFilter.filter("http://www.apache.org"));
+    Assert.assertNotNull(domainDenylistFilter.filter("http://www.google.com"));
+    Assert.assertNotNull(domainDenylistFilter.filter("http://mail.yahoo.com"));
+    Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.net"));
+    Assert.assertNull(domainDenylistFilter.filter("http://www.foobas.net"));
+    Assert.assertNull(domainDenylistFilter.filter("http://www.yahoo.com"));
+    Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.be"));
+    Assert.assertNotNull(domainDenylistFilter.filter("http://www.adobe.com"));
+  }
+
+}
diff --git a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
index 61c6f17..c54740a 100644
--- a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
+++ b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
@@ -139,8 +139,8 @@
 
     // precedence hierarchy for definition of filter rules
     // (first non-empty definition takes precedence):
-    // 1. string rules defined by `urlfilter.domainblacklist.rules`
-    // 2. rule file name defined by `urlfilter.domainblacklist.file`
+    // 1. string rules defined by `urlfilter.domaindenylist.rules`
+    // 2. rule file name defined by `urlfilter.domaindenylist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
     String file = conf.get("urlfilter.prefix.file", attributeFile);
     String stringRules = conf.get("urlfilter.prefix.rules");
diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index 3833f3c..a9c2023 100644
--- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -273,8 +273,8 @@
 
     // precedence hierarchy for definition of filter rules
     // (first non-empty definition takes precedence):
-    // 1. string rules defined by `urlfilter.domainblacklist.rules`
-    // 2. rule file name defined by `urlfilter.domainblacklist.file`
+    // 1. string rules defined by `urlfilter.domaindenylist.rules`
+    // 2. rule file name defined by `urlfilter.domaindenylist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
     String file = conf.get("urlfilter.suffix.file", attributeFile);
     String stringRules = conf.get("urlfilter.suffix.rules");