Merge pull request #451 from sebastian-nagel/NUTCH-2709-remove-unused-http-properties

NUTCH-2709 Remove unused properties and code related to HTTP protocol
diff --git a/src/java/org/apache/nutch/util/PrefixStringMatcher.java b/src/java/org/apache/nutch/util/PrefixStringMatcher.java
index 36e2d9e..3be0fd7 100644
--- a/src/java/org/apache/nutch/util/PrefixStringMatcher.java
+++ b/src/java/org/apache/nutch/util/PrefixStringMatcher.java
@@ -16,8 +16,11 @@
  */
 package org.apache.nutch.util;
 
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Iterator;
+import java.util.List;
 
 /**
  * A class for efficiently matching <code>String</code>s against a set of
@@ -102,8 +105,9 @@
   }
 
   public static final void main(String[] argv) {
-    PrefixStringMatcher matcher = new PrefixStringMatcher(new String[] {
-        "abcd", "abc", "aac", "baz", "foo", "foobar" });
+    String[] prefixes = new String[] { "abcd", "abc", "aac", "baz", "foo",
+        "foobar" };
+    PrefixStringMatcher matcher = new PrefixStringMatcher(prefixes);
 
     String[] tests = { "a", "ab", "abc", "abcdefg", "apple", "aa", "aac",
         "aaccca", "abaz", "baz", "bazooka", "fo", "foobar", "kite", };
@@ -114,5 +118,23 @@
       System.out.println("  shortest: " + matcher.shortestMatch(tests[i]));
       System.out.println("   longest: " + matcher.longestMatch(tests[i]));
     }
+
+    int iterations = 1000;
+    System.out.println("Testing thread-safety (NUTCH-2585) with " + iterations
+        + " iterations:");
+    List<String> testsList = Arrays.asList(tests);
+    for (int i = 0; i < iterations; i++) {
+      matcher = new PrefixStringMatcher(prefixes);
+      Collections.shuffle(testsList);
+      try {
+        long count = testsList.parallelStream().filter(matcher::matches).count();
+        System.out.print(String.format("Cycle %4d : %d matches\r", i, count));
+      } catch (Exception e) {
+        // flush output
+        System.out.println("");
+        throw e;
+      }
+    }
+    System.out.println("");
   }
 }
diff --git a/src/java/org/apache/nutch/util/TrieStringMatcher.java b/src/java/org/apache/nutch/util/TrieStringMatcher.java
index fddecf3..d974ecb 100644
--- a/src/java/org/apache/nutch/util/TrieStringMatcher.java
+++ b/src/java/org/apache/nutch/util/TrieStringMatcher.java
@@ -23,6 +23,8 @@
 /**
  * TrieStringMatcher is a base class for simple tree-based string matching.
  * 
+ * This class is thread-safe during string matching but not when adding strings
+ * to the trie.
  */
 public abstract class TrieStringMatcher {
   protected TrieNode root;
@@ -103,9 +105,7 @@
      */
     TrieNode getChild(char nextChar) {
       if (children == null) {
-        children = childrenList.toArray(new TrieNode[childrenList.size()]);
-        childrenList = null;
-        Arrays.sort(children);
+        compile();
       }
 
       int min = 0;
@@ -137,6 +137,18 @@
       // if (this.nodeChar > other.nodeChar)
       return 1;
     }
+
+    /**
+     * Prepare node for matching. Note: this method is synchronized because it
+     * may be called concurrently when the trie is used for matching.
+     */
+    synchronized void compile() {
+      if (childrenList != null) {
+        children = childrenList.toArray(new TrieNode[childrenList.size()]);
+        childrenList = null;
+        Arrays.sort(children);
+      }
+    }
   }
 
   /**
diff --git a/src/plugin/urlfilter-automaton/ivy.xml b/src/plugin/urlfilter-automaton/ivy.xml
index 7c1968f..5ddf1db 100644
--- a/src/plugin/urlfilter-automaton/ivy.xml
+++ b/src/plugin/urlfilter-automaton/ivy.xml
@@ -36,7 +36,7 @@
   </publications>
 
   <dependencies>
-    <dependency org="dk.brics.automaton" name="automaton" rev="1.11-8" conf="*->default" />
+    <dependency org="dk.brics" name="automaton" rev="1.12-1" conf="*->default" />
   </dependencies>
   
 </ivy-module>