CONNECTORS-1667: Add specific interval param when tika is down

git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-1667@1891042 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
index 9244638..5893b05 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
+++ b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
@@ -30,12 +30,14 @@
   public static final String PARAM_CONNECTIONTIMEOUT = "connectionTimeout";
   public static final String PARAM_SOCKETTIMEOUT = "socketTimeout";
   public static final String PARAM_RETRYINTERVAL = "retryInterval";
+  public static final String PARAM_RETRYINTERVALTIKADOWN = "retryIntervalTikaDown";
   public static final String PARAM_RETRYNUMBER = "retryNumber";
   public static final String TIKAHOSTNAME_DEFAULT = "localhost";
   public static final String TIKAPORT_DEFAULT = "9998";
   public static final String CONNECTIONTIMEOUT_DEFAULT = "60000";
   public static final String SOCKETTIMEOUT_DEFAULT = "60000";
   public static final String RETRYINTERVAL_DEFAULT = "20000";
+  public static final String RETRYINTERVALTIKADOWN_DEFAULT = "120000";
   public static final String RETRYNUMBER_DEFAULT = "1";
 
   // Specification nodes and values
diff --git a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
index 16607db..21bcb88 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
+++ b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
@@ -126,6 +126,9 @@
   /** Retry interval */
   private String retryIntervalString = null;
 
+  /** Retry interval when Tika seems down */
+  private String retryIntervalTikaDownString = null;
+
   /** Retry number */
   private String retryNumberString = null;
 
@@ -146,6 +149,9 @@
   /** Retry interval */
   private long retryInterval = -1L;
 
+  /** Retry interval */
+  private long retryIntervalTikaDown = -1L;
+
   /** Retry number */
   private int retryNumber = -1;
 
@@ -221,6 +227,7 @@
     connectionTimeoutString = configParameters.getParameter(TikaConfig.PARAM_CONNECTIONTIMEOUT);
     socketTimeoutString = configParameters.getParameter(TikaConfig.PARAM_SOCKETTIMEOUT);
     retryIntervalString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVAL);
+    retryIntervalTikaDownString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
     retryNumberString = configParameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
   }
 
@@ -235,6 +242,7 @@
     connectionTimeoutString = null;
     socketTimeoutString = null;
     retryIntervalString = null;
+    retryIntervalTikaDownString = null;
     retryNumberString = null;
 
     super.disconnect();
@@ -293,6 +301,11 @@
         throw new ManifoldCFException("Bad retry interval number: " + retryIntervalString);
       }
       try {
+        this.retryIntervalTikaDown = Long.parseLong(retryIntervalTikaDownString);
+      } catch (final NumberFormatException e) {
+        throw new ManifoldCFException("Bad retry interval when tika is down number: " + retryIntervalTikaDownString);
+      }
+      try {
         this.retryNumber = Integer.parseInt(retryNumberString);
       } catch (final NumberFormatException e) {
         throw new ManifoldCFException("Bad retry number: " + retryNumberString);
@@ -448,6 +461,11 @@
       parameters.setParameter(TikaConfig.PARAM_RETRYINTERVAL, retryInterval);
     }
 
+    final String retryIntervalTikaDown = variableContext.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+    if (retryIntervalTikaDown != null) {
+      parameters.setParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN, retryIntervalTikaDown);
+    }
+
     final String retryNumber = variableContext.getParameter(TikaConfig.PARAM_RETRYNUMBER);
     if (retryNumber != null) {
       parameters.setParameter(TikaConfig.PARAM_RETRYNUMBER, retryNumber);
@@ -497,6 +515,11 @@
       retryInterval = TikaConfig.RETRYINTERVAL_DEFAULT;
     }
 
+    String retryIntervalTikaDown = parameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+    if (retryIntervalTikaDown == null) {
+      retryIntervalTikaDown = TikaConfig.RETRYINTERVALTIKADOWN_DEFAULT;
+    }
+
     String retryNumber = parameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
     if (retryNumber == null) {
       retryNumber = TikaConfig.RETRYNUMBER_DEFAULT;
@@ -508,6 +531,7 @@
     velocityContext.put("CONNECTIONTIMEOUT", connectionTimeout);
     velocityContext.put("SOCKETTIMEOUT", socketTimeout);
     velocityContext.put("RETRYINTERVAL", retryInterval);
+    velocityContext.put("RETRYINTERVALTIKADOWN", retryIntervalTikaDown);
     velocityContext.put("RETRYNUMBER", retryNumber);
   }
 
@@ -584,7 +608,7 @@
     // work
     Logging.ingest.warn("Tika Server unreachable while trying to process " + documentURI + ", retrying...", e);
     final long currentTime = System.currentTimeMillis();
-    throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryInterval, -1L, -1, false);
+    throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryIntervalTikaDown, -1L, retryNumber, false);
   }
 
   private void retryWithoutAbort(final Exception e) throws ServiceInterruption {
@@ -723,6 +747,9 @@
               } else { // The tika server seams to be down : retry {retryNumber} times and abort the
                 // job if it fails on
                 // each retry
+                resultCode = "TIKASERVEREXCEPTION";
+                description = "Tika seemed to be down when requested to process document " + documentURI + " : " + e.getMessage();
+                tikaServerResultCode = handleTikaServerError(description);
                 triggerServiceInterruption(documentURI, e);
               }
             } catch (final NoHttpResponseException e) {
@@ -733,6 +760,9 @@
             } catch (final IOException e) { // Unknown problem with the Tika Server. Retry {retryNumber} times and abort
               // the job if it fails on
               // each retry
+              resultCode = "TIKASERVEREXCEPTION";
+              description = "Unknown Tika problem when processing document " + documentURI + " : " + e.getMessage();
+              tikaServerResultCode = handleTikaServerError(description);
               triggerServiceInterruption(documentURI, e);
             }
             if (response != null) {
@@ -1174,7 +1204,7 @@
     final List<Map<String, String>> fieldMappings = new ArrayList<>();
     String keepAllMetadataValue = "true";
     String lowernamesValue = "true";
-    String writeLimitValue = "1000000";
+    String writeLimitValue = "1000000"; // 1Mo by default
     String extractArchives = "false";
     String maxEmbeddedResources = "";
     for (int i = 0; i < os.getChildCount(); i++) {
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
index df9a862..8447ebf 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
 TikaExtractor.RetryNumber=Number of retries:
 TikaExtractor.ExtractArchives=Extract archives content:
 TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
index df9a862..8447ebf 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
 TikaExtractor.RetryNumber=Number of retries:
 TikaExtractor.ExtractArchives=Extract archives content:
 TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
index ff476c9..7b25be2 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 TikaExtractor.RetryInterval=Intervalle entre les tentatives (en ms):
+TikaExtractor.RetryIntervalTikaDown=Intervalle entre les tentatives quand Tika est injoignable (in ms):
 TikaExtractor.RetryNumber=Nombre de tentatives:
 TikaExtractor.ExtractArchives=Extraire le contenu des archives:
 TikaExtractor.ConnectionTimeout=Connexion timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
index 4f8343b..3a3bafc 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
 TikaExtractor.RetryNumber=Number of retries:
 TikaExtractor.ExtractArchives=Extract archives content:
 TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
index a1c0f66..6ea20b4 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
@@ -14,6 +14,7 @@
 # limitations under the License.
 
 TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
 TikaExtractor.RetryNumber=Number of retries:
 TikaExtractor.ExtractArchives=Extract archives content:
 TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
index cd88e24..f9607cb 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
+++ b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
@@ -50,6 +50,12 @@
     </td>
   </tr>
   <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+    <td class="value"><input name="retryIntervalTikaDown" type="text"
+      value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)" size="20" />
+    </td>
+  </tr>
+  <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
     <td class="value"><input name="retryNumber" type="text"
       value="$Encoder.attributeEscape($RETRYNUMBER)" size="5" />
@@ -63,6 +69,7 @@
 <input type="hidden" name="connectionTimeout" value="$Encoder.attributeEscape($CONNECTIONTIMEOUT)"/>
 <input type="hidden" name="socketTimeout" value="$Encoder.attributeEscape($SOCKETTIMEOUT)"/>
 <input type="hidden" name="retryInterval" value="$Encoder.attributeEscape($RETRYINTERVAL)"/>
+<input type="hidden" name="retryIntervalTikaDown" value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)"/>
 <input type="hidden" name="retryNumber" value="$Encoder.attributeEscape($RETRYNUMBER)"/>
 
 #end
\ No newline at end of file
diff --git a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
index 67f298a..36afcb4 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
+++ b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
@@ -37,6 +37,10 @@
     <td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVAL)</nobr></td>
   </tr>
   <tr>
+    <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+    <td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVALTIKADOWN)</nobr></td>
+  </tr>
+  <tr>
     <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
     <td class="value"><nobr>$Encoder.bodyEscape($RETRYNUMBER)</nobr></td>
   </tr>