NUTCH-2419 Some URL filters and normalizers do not respect command-line override for rule file

- simplify selection of rule file (from property or attribute in plugin.xml)
diff --git a/src/plugin/parsefilter-regex/src/java/org/apache/nutch/parsefilter/regex/RegexParseFilter.java b/src/plugin/parsefilter-regex/src/java/org/apache/nutch/parsefilter/regex/RegexParseFilter.java
index c84f27c..6e86fc6 100644
--- a/src/plugin/parsefilter-regex/src/java/org/apache/nutch/parsefilter/regex/RegexParseFilter.java
+++ b/src/plugin/parsefilter-regex/src/java/org/apache/nutch/parsefilter/regex/RegexParseFilter.java
@@ -120,14 +120,8 @@
       }
     }
 
-    // domain file and attribute "file" take precedence if defined
-    String file = conf.get("parsefilter.regex.file");
+    String file = conf.get("parsefilter.regex.file", attributeFile);
     String stringRules = conf.get("parsefilter.regex.rules");
-    if (file != null) {
-      // take file
-    } else if (attributeFile != null) {
-      file = attributeFile;
-    }
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
diff --git a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
index fac02af..f629262 100644
--- a/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
+++ b/src/plugin/urlfilter-domain/src/java/org/apache/nutch/urlfilter/domain/DomainURLFilter.java
@@ -59,7 +59,7 @@
  * such as lucene.apache.org and hadoop.apache.org. The third line would allow
  * only URLs from www.apache.org. There is no specific ordering to entries. The
  * entries are from more general to more specific with the more general
- * overridding the more specific.
+ * overriding the more specific.
  * </p>
  * 
  * The domain file defaults to domain-urlfilter.txt in the classpath but can be
@@ -130,16 +130,11 @@
     // 2. rule file name defined by `urlfilter.domain.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
     String stringRules = conf.get("urlfilter.domain.rules");
-    String file = conf.get("urlfilter.domain.file");
+    String file = conf.get("urlfilter.domain.file", attributeFile);
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
     } else {
-      if (file != null) {
-        // take file
-      } else if (attributeFile != null) {
-        file = attributeFile;
-      }
       LOG.info("Reading {} rules file {}", pluginName, file);
       reader = conf.getConfResourceAsReader(file);
     }
diff --git a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java b/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
index 56b11e9..77c238b 100644
--- a/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
+++ b/src/plugin/urlfilter-domainblacklist/src/java/org/apache/nutch/urlfilter/domainblacklist/DomainBlacklistURLFilter.java
@@ -59,7 +59,7 @@
  * such as lucene.apache.org and hadoop.apache.org. The third line would exclude
  * only URLs from www.apache.org. There is no specific ordering to entries. The
  * entries are from more general to more specific with the more general
- * overridding the more specific.
+ * overriding the more specific.
  * </p>
  * 
  * The domain file defaults to domainblacklist-urlfilter.txt in the classpath
@@ -131,16 +131,11 @@
     // 2. rule file name defined by `urlfilter.domainblacklist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
     String stringRules = conf.get("urlfilter.domainblacklist.rules");
-    String file = conf.get("urlfilter.domainblacklist.file");
+    String file = conf.get("urlfilter.domainblacklist.file", attributeFile);
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
     } else {
-      if (file != null) {
-        // take file
-      } else if (attributeFile != null) {
-        file = attributeFile;
-      }
       LOG.info("Reading {} rules file {}", pluginName, file);
       reader = conf.getConfResourceAsReader(file);
     }
diff --git a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
index eeef9cf..61c6f17 100644
--- a/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
+++ b/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
@@ -142,17 +142,12 @@
     // 1. string rules defined by `urlfilter.domainblacklist.rules`
     // 2. rule file name defined by `urlfilter.domainblacklist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String file = conf.get("urlfilter.prefix.file");
+    String file = conf.get("urlfilter.prefix.file", attributeFile);
     String stringRules = conf.get("urlfilter.prefix.rules");
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
     } else {
-      if (file != null) {
-        // take file
-      } else if (attributeFile != null) {
-        file = attributeFile;
-      }
       LOG.info("Reading {} rules file {}", pluginName, file);
       reader = conf.getConfResourceAsReader(file);
     }
diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index 55382cc..3833f3c 100644
--- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -276,17 +276,12 @@
     // 1. string rules defined by `urlfilter.domainblacklist.rules`
     // 2. rule file name defined by `urlfilter.domainblacklist.file`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String file = conf.get("urlfilter.suffix.file");
+    String file = conf.get("urlfilter.suffix.file", attributeFile);
     String stringRules = conf.get("urlfilter.suffix.rules");
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
     } else {
-      if (file != null) {
-        // take file
-      } else if (attributeFile != null) {
-        file = attributeFile;
-      }
       LOG.info("Reading {} rules file {}", pluginName, file);
       reader = conf.getConfResourceAsReader(file);
     }
diff --git a/src/plugin/urlnormalizer-host/src/java/org/apache/nutch/net/urlnormalizer/host/HostURLNormalizer.java b/src/plugin/urlnormalizer-host/src/java/org/apache/nutch/net/urlnormalizer/host/HostURLNormalizer.java
index 4506c85..3a3c8a4 100644
--- a/src/plugin/urlnormalizer-host/src/java/org/apache/nutch/net/urlnormalizer/host/HostURLNormalizer.java
+++ b/src/plugin/urlnormalizer-host/src/java/org/apache/nutch/net/urlnormalizer/host/HostURLNormalizer.java
@@ -118,13 +118,8 @@
     // 1. string rules defined by `urlnormalizer.hosts.rules`
     // 2. rule file name defined by `urlnormalizer.hosts.file"`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String file = conf.get("urlnormalizer.hosts.file");
+    String file = conf.get("urlnormalizer.hosts.file", attributeFile);
     String stringRules = conf.get("urlnormalizer.hosts.rules");
-    if (file != null) {
-      // take file
-    } else if (attributeFile != null) {
-      file = attributeFile;
-    }
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
diff --git a/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java b/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
index f18ac65..f60c291 100644
--- a/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
+++ b/src/plugin/urlnormalizer-protocol/src/java/org/apache/nutch/net/urlnormalizer/protocol/ProtocolURLNormalizer.java
@@ -124,13 +124,8 @@
     // 1. string rules defined by `urlnormalizer.protocols.rules`
     // 2. rule file name defined by `urlnormalizer.protocols.file"`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String file = conf.get("urlnormalizer.protocols.file");
+    String file = conf.get("urlnormalizer.protocols.file", attributeFile);
     String stringRules = conf.get("urlnormalizer.protocols.rules");
-    if (file != null) {
-      // take file
-    } else if (attributeFile != null) {
-      file = attributeFile;
-    }
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);
diff --git a/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java b/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java
index 6e8b7b9..2570427 100644
--- a/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java
+++ b/src/plugin/urlnormalizer-slash/src/java/org/apache/nutch/net/urlnormalizer/slash/SlashURLNormalizer.java
@@ -130,13 +130,8 @@
     // 1. string rules defined by `urlnormalizer.slashes.rules`
     // 2. rule file name defined by `urlnormalizer.slashes.file"`
     // 3. rule file name defined in plugin.xml (`attributeFile`)
-    String file = conf.get("urlnormalizer.slashes.file");
+    String file = conf.get("urlnormalizer.slashes.file", attributeFile);
     String stringRules = conf.get("urlnormalizer.slashes.rules");
-    if (file != null) {
-      // take file
-    } else if (attributeFile != null) {
-      file = attributeFile;
-    }
     Reader reader = null;
     if (stringRules != null) { // takes precedence over files
       reader = new StringReader(stringRules);