NUTCH-2824 urlnormalizer-basic to unescape percent-encoded host names

- add unit tests to verify that a declared MalformedURLException is thrown
  on host names containing illegal percent-encoded sequences and
  any (undeclared) runtime exceptions are caught and rethrown
diff --git a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
index 05feb6d..fd0aa18 100644
--- a/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
+++ b/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
@@ -244,6 +244,23 @@
         "https://www.example.org/");
   }
 
+  /**
+   * Test that normalizer throws MalformedURLException for invalid URLs
+   */
+  @Test
+  public void testInvalidURLs() throws Exception {
+    // invalid percent-encoded sequence in host name
+    normalizeTestAssertThrowsMalformedURLException("https://example%2Xcom/");
+    // not a valid UTF-8 sequence in host name
+    // (only validated if parsed as Internationalized Domain Name)
+    BasicURLNormalizer norm = new BasicURLNormalizer();
+    conf = NutchConfiguration.create();
+    conf.set(BasicURLNormalizer.NORM_HOST_IDN, "toAscii");
+    norm.setConf(conf);
+    normalizeTestAssertThrowsMalformedURLException(norm,
+        "https://abc%FEdef.org/");
+  }
+
   private void normalizeTest(String weird, String normal) throws Exception {
     normalizeTest(this.normalizer, weird, normal);
   }
@@ -260,6 +277,23 @@
     }
   }
 
+  private void normalizeTestAssertThrowsMalformedURLException(String weird) throws Exception {
+    normalizeTestAssertThrowsMalformedURLException(this.normalizer, weird);
+  }
+
+  private void normalizeTestAssertThrowsMalformedURLException(
+      BasicURLNormalizer normalizer, String weird) throws Exception {
+    String normalized = null;
+    try {
+      normalized = normalizer.normalize(weird, URLNormalizers.SCOPE_DEFAULT);
+    } catch (MalformedURLException e) {
+      // ok, expected
+      return;
+    }
+    Assert.fail("Expected MalformedURLException was not thrown on " + weird
+        + " (normalized: " + normalized + ")");
+  }
+
   public static void main(String[] args) throws Exception {
     new TestBasicURLNormalizer().testNormalizer();
   }