NUTCH-3096 HostDB ResolverThread can create too many job counters
(patch contributed by Markus Jelsma)
diff --git a/src/java/org/apache/nutch/hostdb/ResolverThread.java b/src/java/org/apache/nutch/hostdb/ResolverThread.java
index 434e7bb..c0a4f12 100644
--- a/src/java/org/apache/nutch/hostdb/ResolverThread.java
+++ b/src/java/org/apache/nutch/hostdb/ResolverThread.java
@@ -114,15 +114,32 @@
           }
         }
 
-        context.getCounter("UpdateHostDb",
-          Long.toString(datum.numFailures()) + "_times_failed").increment(1);
+        context.getCounter("UpdateHostDb", createFailureCounterLabel(datum)).increment(1);
       } catch (Exception ioe) {
         LOG.warn(StringUtils.stringifyException(ioe));
       }
     } catch (Exception e) {
       LOG.warn(StringUtils.stringifyException(e));
     }
-    
+
     context.getCounter("UpdateHostDb", "checked_hosts").increment(1);
   }
+
+  private String createFailureCounterLabel(HostDatum datum) {
+    // Hadoop will allow no more than 120 distinct counters. If we have a large
+    // number of distinct failures, we'll exceed the limit, Hadoop will complain,
+    // the job will fail. Let's limit the amount of possibilities by grouping
+    // the numFailures in buckets. NUTCH-3096
+    String label = null;
+    long n = datum.numFailures();
+    if (n < 4) {
+      label = Long.toString(n);
+    } else if (n > 3 && n < 11) {
+      label = "4-10";
+    } else {
+      label = ">10";
+    }
+
+    return label + "_times_failed";
+  }
 }