NUTCH-2381 In some situations the class TextProfileSignature
gives different signatures for the same text "profile" page.
- implement secondary sorting (similar to patch provided by
  Rodrigo Joni Sestari)
- allow to restore previous behavior by setting property
  `db.signature.text_profile.sec_sort_lex = false`
diff --git a/CHANGES.txt b/CHANGES.txt
index ff564d3..5721439 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -14,7 +14,15 @@
        further information.
 
     -  HostDB entries have been moved from Integer to Long in order to accomodate very large
-       hosts. Remove your existing HostDB and recreate it with bin/nutch updatehostdb.
+       hosts. Remove your existing HostDB and recreate it with bin/nutch updatehostdb, see
+       NUTCH-2694 for additional information.
+
+    -  The signature class TextProfileSignature has been improved to be stable over
+       consecutive runs by sorting tokens by frequency first and secondarily in lexicographic
+       order.  If an existing CrawlDb contains signatures generated by TextProfileSignature
+       these are likely to change when upgrading to Nutch 1.16.  The previous behavior relying
+       on a semi-stable pseudo-random hash sorting could be restored setting the property
+       `db.signature.text_profile.sec_sort_lex` to `false`. See also NUTCH-2381.
 
 
 Nutch 1.15 Release (25/07/2018)
diff --git a/src/java/org/apache/nutch/crawl/TextProfileSignature.java b/src/java/org/apache/nutch/crawl/TextProfileSignature.java
index c831be5..049206a 100644
--- a/src/java/org/apache/nutch/crawl/TextProfileSignature.java
+++ b/src/java/org/apache/nutch/crawl/TextProfileSignature.java
@@ -26,6 +26,7 @@
 import java.util.HashMap;
 import java.util.Iterator;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseImpl;
@@ -67,11 +68,22 @@
 
   Signature fallback = new MD5Signature();
 
-  public byte[] calculate(Content content, Parse parse) {
-    int MIN_TOKEN_LEN = getConf().getInt(
+  int MIN_TOKEN_LEN = 2;
+  float QUANT_RATE = 0.01f;
+  boolean secondaryLexicographicSorting = true;
+
+  @Override
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+    MIN_TOKEN_LEN = conf.getInt(
         "db.signature.text_profile.min_token_len", 2);
-    float QUANT_RATE = getConf().getFloat(
+    QUANT_RATE = conf.getFloat(
         "db.signature.text_profile.quant_rate", 0.01f);
+    secondaryLexicographicSorting = conf.getBoolean(
+        "db.signature.text_profile.sec_sort_lex", true);
+  }
+
+  public byte[] calculate(Content content, Parse parse) {
     HashMap<String, Token> tokens = new HashMap<>();
     String text = null;
     if (parse != null)
@@ -161,9 +173,17 @@
     }
   }
 
-  private static class TokenComparator implements Comparator<Token> {
+  private class TokenComparator implements Comparator<Token> {
+    /**
+     * Sort tokens first by decreasing frequency and second in lexicographic
+     * (Unicode) order
+     */
     public int compare(Token t1, Token t2) {
-      return t2.cnt - t1.cnt;
+      int diffCnt = t2.cnt - t1.cnt;
+      if (diffCnt == 0 && secondaryLexicographicSorting) {
+        return t1.val.compareTo(t2.val);
+      }
+      return diffCnt;
     }
   }
 
diff --git a/src/test/org/apache/nutch/crawl/TestTextProfileSignature.java b/src/test/org/apache/nutch/crawl/TestTextProfileSignature.java
new file mode 100644
index 0000000..adf4b5e
--- /dev/null
+++ b/src/test/org/apache/nutch/crawl/TestTextProfileSignature.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.crawl;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.Outlink;
+import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.parse.ParseImpl;
+import org.apache.nutch.parse.ParseStatus;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.nutch.util.StringUtil;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestTextProfileSignature {
+
+  @Test
+  public void testGetSignature() {
+    Configuration conf = NutchConfiguration.create();
+    Signature textProf = new TextProfileSignature();
+    textProf.setConf(conf);
+    String text = "Hello World The Quick Brown Fox Jumped Over the Lazy Fox";
+    ParseData pd = new ParseData(ParseStatus.STATUS_SUCCESS, "Hello World",
+        new Outlink[0], new Metadata());
+    byte[] signature1 = textProf.calculate(new Content(),
+        new ParseImpl(text, pd));
+    Assert.assertNotNull(signature1);
+    List<String> words = Arrays.asList(text.split("\\s"));
+    Collections.shuffle(words);
+    String text2 = String.join(" ", words);
+    byte[] signature2 = textProf.calculate(new Content(),
+        new ParseImpl(text2, pd));
+    Assert.assertNotNull(signature2);
+    Assert.assertEquals(StringUtil.toHexString(signature1),
+        StringUtil.toHexString(signature2));
+  }
+}