NUTCH-2805: Rename plugin urlfilter-domainblacklist (#540)
NUTCH-2805: Rename plugin urlfilter-domainblacklist
diff --git a/CHANGES.txt b/CHANGES.txt
index 76c9fc6..e5c5984 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,7 +2,9 @@
Nutch 1.18 Development
+Breaking Changes
+ - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details.
Nutch 1.17 Release 18/06/2020 (dd/mm/yyyy)
Release Report: https://s.apache.org/ovhry
diff --git a/build.xml b/build.xml
index bc8d8fb..0f6807a 100644
--- a/build.xml
+++ b/build.xml
@@ -230,7 +230,7 @@
<packageset dir="${plugins.dir}/tld/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
- <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+ <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
@@ -739,7 +739,7 @@
<packageset dir="${plugins.dir}/tld/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-domain/src/java"/>
- <packageset dir="${plugins.dir}/urlfilter-domainblacklist/src/java"/>
+ <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-fast/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
@@ -1164,8 +1164,8 @@
<source path="${plugins.dir}/urlfilter-automaton/src/test/" />
<source path="${plugins.dir}/urlfilter-domain/src/java/" />
<source path="${plugins.dir}/urlfilter-domain/src/test/" />
- <source path="${plugins.dir}/urlfilter-domainblacklist/src/java/" />
- <source path="${plugins.dir}/urlfilter-domainblacklist/src/test/" />
+ <source path="${plugins.dir}/urlfilter-domaindenylist/src/java/" />
+ <source path="${plugins.dir}/urlfilter-domaindenylist/src/test/" />
<source path="${plugins.dir}/urlfilter-fast/src/java/"/>
<source path="${plugins.dir}/urlfilter-fast/src/test/"/>
<source path="${plugins.dir}/urlfilter-ignoreexempt/src/java/" />
diff --git a/conf/domainblacklist-urlfilter.txt.template b/conf/domaindenylist-urlfilter.txt.template
similarity index 93%
rename from conf/domainblacklist-urlfilter.txt.template
rename to conf/domaindenylist-urlfilter.txt.template
index ca79a20..ff52043 100644
--- a/conf/domainblacklist-urlfilter.txt.template
+++ b/conf/domaindenylist-urlfilter.txt.template
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# config file for urlfilter-domainblacklist plugin
+# config file for urlfilter-domaindenylist plugin
diff --git a/default.properties b/default.properties
index 7884cd5..0e406f7 100644
--- a/default.properties
+++ b/default.properties
@@ -103,7 +103,7 @@
plugins.urlfilter=\
org.apache.nutch.urlfilter.automaton*:\
org.apache.nutch.urlfilter.domain*:\
- org.apache.nutch.urlfilter.domainblacklist*:\
+ org.apache.nutch.urlfilter.domaindenylist*:\
org.apache.nutch.urlfilter.fast*:\
org.apache.nutch.urlfilter.ignoreexempt*:\
org.apache.nutch.urlfilter.prefix*:\
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index a2a0dd7..dd2a507 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -88,7 +88,7 @@
<ant dir="tld" target="deploy"/>
<ant dir="urlfilter-automaton" target="deploy"/>
<ant dir="urlfilter-domain" target="deploy" />
- <ant dir="urlfilter-domainblacklist" target="deploy" />
+ <ant dir="urlfilter-domaindenylist" target="deploy" />
<ant dir="urlfilter-fast" target="deploy"/>
<ant dir="urlfilter-prefix" target="deploy"/>
<ant dir="urlfilter-regex" target="deploy"/>
@@ -145,7 +145,7 @@
<ant dir="subcollection" target="test"/>
<ant dir="urlfilter-automaton" target="test"/>
<ant dir="urlfilter-domain" target="test"/>
- <ant dir="urlfilter-domainblacklist" target="test"/>
+ <ant dir="urlfilter-domaindenylist" target="test"/>
<ant dir="urlfilter-fast" target="test"/>
<!--ant dir="urlfilter-ignoreexempt" target="test"/-->
<ant dir="urlfilter-prefix" target="test"/>
@@ -234,7 +234,7 @@
<ant dir="tld" target="clean"/>
<ant dir="urlfilter-automaton" target="clean"/>
<ant dir="urlfilter-domain" target="clean" />
- <ant dir="urlfilter-domainblacklist" target="clean" />
+ <ant dir="urlfilter-domaindenylist" target="clean" />
<ant dir="urlfilter-fast" target="clean"/>
<ant dir="urlfilter-ignoreexempt" target="clean"/>
<ant dir="urlfilter-prefix" target="clean"/>
diff --git a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
index d2eba1f..5388cec 100644
--- a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
+++ b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/package-info.java
@@ -18,7 +18,7 @@
/**
* URL filter plugin to include only URLs which match an element in a given list of
* domain suffixes, domain names, and/or host names.
- * See {@link org.apache.nutch.urlfilter.domainblacklist} for the counterpart
+ * See {@link org.apache.nutch.urlfilter.domaindenylist} for the counterpart
* (exclude URLs by host or domain).
*/
package org.apache.nutch.urlfilter.domain;
diff --git a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java b/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
deleted file mode 100644
index 9ab207a..0000000
--- a/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.nutch.urlfilter.domainblacklist;
-
-import org.junit.Assert;
-import org.junit.Test;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.util.NutchConfiguration;
-
-public class TestDomainBlacklistURLFilter {
-
- private final static String SEPARATOR = System.getProperty("file.separator");
- private final static String SAMPLES = System.getProperty("test.data", ".");
-
- @Test
- public void testFilter() throws Exception {
-
- String domainBlacklistFile = SAMPLES + SEPARATOR + "hosts.txt";
- Configuration conf = NutchConfiguration.create();
- conf.set("urlfilter.domainblacklist.file", domainBlacklistFile);
- DomainBlacklistURLFilter domainBlacklistFilter = new DomainBlacklistURLFilter();
- domainBlacklistFilter.setConf(conf);
- Assert.assertNull(domainBlacklistFilter.filter("http://lucene.apache.org"));
- Assert.assertNull(domainBlacklistFilter.filter("http://hadoop.apache.org"));
- Assert.assertNull(domainBlacklistFilter.filter("http://www.apache.org"));
- Assert.assertNotNull(domainBlacklistFilter.filter("http://www.google.com"));
- Assert.assertNotNull(domainBlacklistFilter.filter("http://mail.yahoo.com"));
- Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.net"));
- Assert.assertNull(domainBlacklistFilter.filter("http://www.foobas.net"));
- Assert.assertNull(domainBlacklistFilter.filter("http://www.yahoo.com"));
- Assert.assertNull(domainBlacklistFilter.filter("http://www.foobar.be"));
- Assert.assertNotNull(domainBlacklistFilter.filter("http://www.adobe.com"));
- }
-
-}
diff --git a/src/plugin/urlfilter-domainblacklist/build.xml b/src/plugin/urlfilter-domaindenylist/build.xml
similarity index 93%
rename from src/plugin/urlfilter-domainblacklist/build.xml
rename to src/plugin/urlfilter-domaindenylist/build.xml
index 19ea483..f06dfc5 100644
--- a/src/plugin/urlfilter-domainblacklist/build.xml
+++ b/src/plugin/urlfilter-domaindenylist/build.xml
@@ -15,7 +15,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<project name="urlfilter-domainblacklist" default="jar-core">
+<project name="urlfilter-domaindenylist" default="jar-core">
<import file="../build-plugin.xml"/>
diff --git a/src/plugin/urlfilter-domainblacklist/data/hosts.txt b/src/plugin/urlfilter-domaindenylist/data/hosts.txt
similarity index 100%
rename from src/plugin/urlfilter-domainblacklist/data/hosts.txt
rename to src/plugin/urlfilter-domaindenylist/data/hosts.txt
diff --git a/src/plugin/urlfilter-domainblacklist/ivy.xml b/src/plugin/urlfilter-domaindenylist/ivy.xml
similarity index 100%
rename from src/plugin/urlfilter-domainblacklist/ivy.xml
rename to src/plugin/urlfilter-domaindenylist/ivy.xml
diff --git a/src/plugin/urlfilter-domainblacklist/plugin.xml b/src/plugin/urlfilter-domaindenylist/plugin.xml
similarity index 71%
rename from src/plugin/urlfilter-domainblacklist/plugin.xml
rename to src/plugin/urlfilter-domaindenylist/plugin.xml
index 04eee6e..d1a35f5 100644
--- a/src/plugin/urlfilter-domainblacklist/plugin.xml
+++ b/src/plugin/urlfilter-domaindenylist/plugin.xml
@@ -16,13 +16,13 @@
limitations under the License.
-->
<plugin
- id="urlfilter-domainblacklist"
- name="Domain Blacklist URL Filter"
+ id="urlfilter-domaindenylist"
+ name="Domain Denylist URL Filter"
version="1.0.0"
provider-name="nutch.org">
<runtime>
- <library name="urlfilter-domainblacklist.jar">
+ <library name="urlfilter-domaindenylist.jar">
<export name="*"/>
</library>
</runtime>
@@ -31,12 +31,12 @@
<import plugin="nutch-extensionpoints"/>
</requires>
- <extension id="org.apache.nutch.net.urlfilter.domainblacklist"
- name="Nutch Domain Blacklist URL Filter"
+ <extension id="org.apache.nutch.net.urlfilter.domaindenylist"
+ name="Nutch Domain Denylist URL Filter"
point="org.apache.nutch.net.URLFilter">
- <implementation id="DomainBlacklistURLFilter"
- class="org.apache.nutch.urlfilter.domainblacklist.DomainBlacklistURLFilter">
- <parameter name="file" value="domainblacklist-urlfilter.txt"/>
+ <implementation id="DomainDenylistURLFilter"
+ class="org.apache.nutch.urlfilter.domaindenylist.DomainDenylistURLFilter">
+ <parameter name="file" value="domaindenylist-urlfilter.txt"/>
</implementation>
</extension>
diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
similarity index 89%
rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
index 77c238b..58e3754 100644
--- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
+++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/DomainDenylistURLFilter.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.nutch.urlfilter.domainblacklist;
+package org.apache.nutch.urlfilter.domaindenylist;
import java.lang.invoke.MethodHandles;
import java.io.BufferedReader;
@@ -62,12 +62,12 @@
* overriding the more specific.
* </p>
*
- * The domain file defaults to domainblacklist-urlfilter.txt in the classpath
+ * The domain file defaults to domaindenylist-urlfilter.txt in the classpath
* but can be overridden using the:
*
* <ul>
* <li>
- * property "urlfilter.domainblacklist.file" in ./conf/nutch-*.xml, and
+ * property "urlfilter.domaindenylist.file" in ./conf/nutch-*.xml, and
* </li>
* <li>
* attribute "file" in plugin.xml of this plugin
@@ -75,7 +75,7 @@
* </ul>
*
*/
-public class DomainBlacklistURLFilter implements URLFilter {
+public class DomainDenylistURLFilter implements URLFilter {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
@@ -105,7 +105,7 @@
this.conf = conf;
// get the extensions for domain urlfilter
- String pluginName = "urlfilter-domainblacklist";
+ String pluginName = "urlfilter-domaindenylist";
Extension[] extensions = PluginRepository.get(conf)
.getExtensionPoint(URLFilter.class.getName()).getExtensions();
for (int i = 0; i < extensions.length; i++) {
@@ -127,11 +127,11 @@
// precedence hierarchy for definition of filter rules
// (first non-empty definition takes precedence):
- // 1. string rules defined by `urlfilter.domainblacklist.rules`
- // 2. rule file name defined by `urlfilter.domainblacklist.file`
+ // 1. string rules defined by `urlfilter.domaindenylist.rules`
+ // 2. rule file name defined by `urlfilter.domaindenylist.file`
// 3. rule file name defined in plugin.xml (`attributeFile`)
- String stringRules = conf.get("urlfilter.domainblacklist.rules");
- String file = conf.get("urlfilter.domainblacklist.file", attributeFile);
+ String stringRules = conf.get("urlfilter.domaindenylist.rules");
+ String file = conf.get("urlfilter.domaindenylist.file", attributeFile);
Reader reader = null;
if (stringRules != null) { // takes precedence over files
reader = new StringReader(stringRules);
diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
similarity index 94%
rename from src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
rename to src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
index 1f0022c..401d12f 100644
--- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/package-info.java
+++ b/src/plugin/urlfilter-domaindenylist/src/java/org/apache/nutch/urlfilter/domaindenylist/package-info.java
@@ -20,5 +20,5 @@
* See {@link org.apache.nutch.urlfilter.domain} for the counterpart (include only URLs
* matching host or domain).
*/
-package org.apache.nutch.urlfilter.domainblacklist;
+package org.apache.nutch.urlfilter.domaindenylist;
diff --git a/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java
new file mode 100644
index 0000000..0dde234
--- /dev/null
+++ b/src/plugin/urlfilter-domaindenylist/src/test/org/apache/nutch/urlfilter/domaindenylist/TestDomainDenylistURLFilter.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.urlfilter.domaindenylist;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.util.NutchConfiguration;
+
+public class TestDomainDenylistURLFilter {
+
+ private final static String SEPARATOR = System.getProperty("file.separator");
+ private final static String SAMPLES = System.getProperty("test.data", ".");
+
+ @Test
+ public void testFilter() throws Exception {
+
+ String domainDenylistFile = SAMPLES + SEPARATOR + "hosts.txt";
+ Configuration conf = NutchConfiguration.create();
+ conf.set("urlfilter.domaindenylist.file", domainDenylistFile);
+ DomainDenylistURLFilter domainDenylistFilter = new DomainDenylistURLFilter();
+ domainDenylistFilter.setConf(conf);
+ Assert.assertNull(domainDenylistFilter.filter("http://lucene.apache.org"));
+ Assert.assertNull(domainDenylistFilter.filter("http://hadoop.apache.org"));
+ Assert.assertNull(domainDenylistFilter.filter("http://www.apache.org"));
+ Assert.assertNotNull(domainDenylistFilter.filter("http://www.google.com"));
+ Assert.assertNotNull(domainDenylistFilter.filter("http://mail.yahoo.com"));
+ Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.net"));
+ Assert.assertNull(domainDenylistFilter.filter("http://www.foobas.net"));
+ Assert.assertNull(domainDenylistFilter.filter("http://www.yahoo.com"));
+ Assert.assertNull(domainDenylistFilter.filter("http://www.foobar.be"));
+ Assert.assertNotNull(domainDenylistFilter.filter("http://www.adobe.com"));
+ }
+
+}
diff --git a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
index 61c6f17..c54740a 100644
--- a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
+++ b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
@@ -139,8 +139,8 @@
// precedence hierarchy for definition of filter rules
// (first non-empty definition takes precedence):
- // 1. string rules defined by `urlfilter.domainblacklist.rules`
- // 2. rule file name defined by `urlfilter.domainblacklist.file`
+ // 1. string rules defined by `urlfilter.domaindenylist.rules`
+ // 2. rule file name defined by `urlfilter.domaindenylist.file`
// 3. rule file name defined in plugin.xml (`attributeFile`)
String file = conf.get("urlfilter.prefix.file", attributeFile);
String stringRules = conf.get("urlfilter.prefix.rules");
diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index 3833f3c..a9c2023 100644
--- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -273,8 +273,8 @@
// precedence hierarchy for definition of filter rules
// (first non-empty definition takes precedence):
- // 1. string rules defined by `urlfilter.domainblacklist.rules`
- // 2. rule file name defined by `urlfilter.domainblacklist.file`
+ // 1. string rules defined by `urlfilter.domaindenylist.rules`
+ // 2. rule file name defined by `urlfilter.domaindenylist.file`
// 3. rule file name defined in plugin.xml (`attributeFile`)
String file = conf.get("urlfilter.suffix.file", attributeFile);
String stringRules = conf.get("urlfilter.suffix.rules");