CONNECTORS-1667: Add specific interval param when tika is down
git-svn-id: https://svn.apache.org/repos/asf/manifoldcf/branches/CONNECTORS-1667@1891042 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
index 9244638..5893b05 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
+++ b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaConfig.java
@@ -30,12 +30,14 @@
public static final String PARAM_CONNECTIONTIMEOUT = "connectionTimeout";
public static final String PARAM_SOCKETTIMEOUT = "socketTimeout";
public static final String PARAM_RETRYINTERVAL = "retryInterval";
+ public static final String PARAM_RETRYINTERVALTIKADOWN = "retryIntervalTikaDown";
public static final String PARAM_RETRYNUMBER = "retryNumber";
public static final String TIKAHOSTNAME_DEFAULT = "localhost";
public static final String TIKAPORT_DEFAULT = "9998";
public static final String CONNECTIONTIMEOUT_DEFAULT = "60000";
public static final String SOCKETTIMEOUT_DEFAULT = "60000";
public static final String RETRYINTERVAL_DEFAULT = "20000";
+ public static final String RETRYINTERVALTIKADOWN_DEFAULT = "120000";
public static final String RETRYNUMBER_DEFAULT = "1";
// Specification nodes and values
diff --git a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
index 16607db..21bcb88 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
+++ b/connectors/tikaservice-rmeta/connector/src/main/java/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/TikaExtractor.java
@@ -126,6 +126,9 @@
/** Retry interval */
private String retryIntervalString = null;
+ /** Retry interval when Tika seems down */
+ private String retryIntervalTikaDownString = null;
+
/** Retry number */
private String retryNumberString = null;
@@ -146,6 +149,9 @@
/** Retry interval */
private long retryInterval = -1L;
+ /** Retry interval */
+ private long retryIntervalTikaDown = -1L;
+
/** Retry number */
private int retryNumber = -1;
@@ -221,6 +227,7 @@
connectionTimeoutString = configParameters.getParameter(TikaConfig.PARAM_CONNECTIONTIMEOUT);
socketTimeoutString = configParameters.getParameter(TikaConfig.PARAM_SOCKETTIMEOUT);
retryIntervalString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVAL);
+ retryIntervalTikaDownString = configParameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
retryNumberString = configParameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
}
@@ -235,6 +242,7 @@
connectionTimeoutString = null;
socketTimeoutString = null;
retryIntervalString = null;
+ retryIntervalTikaDownString = null;
retryNumberString = null;
super.disconnect();
@@ -293,6 +301,11 @@
throw new ManifoldCFException("Bad retry interval number: " + retryIntervalString);
}
try {
+ this.retryIntervalTikaDown = Long.parseLong(retryIntervalTikaDownString);
+ } catch (final NumberFormatException e) {
+ throw new ManifoldCFException("Bad retry interval when tika is down number: " + retryIntervalTikaDownString);
+ }
+ try {
this.retryNumber = Integer.parseInt(retryNumberString);
} catch (final NumberFormatException e) {
throw new ManifoldCFException("Bad retry number: " + retryNumberString);
@@ -448,6 +461,11 @@
parameters.setParameter(TikaConfig.PARAM_RETRYINTERVAL, retryInterval);
}
+ final String retryIntervalTikaDown = variableContext.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown != null) {
+ parameters.setParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN, retryIntervalTikaDown);
+ }
+
final String retryNumber = variableContext.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber != null) {
parameters.setParameter(TikaConfig.PARAM_RETRYNUMBER, retryNumber);
@@ -497,6 +515,11 @@
retryInterval = TikaConfig.RETRYINTERVAL_DEFAULT;
}
+ String retryIntervalTikaDown = parameters.getParameter(TikaConfig.PARAM_RETRYINTERVALTIKADOWN);
+ if (retryIntervalTikaDown == null) {
+ retryIntervalTikaDown = TikaConfig.RETRYINTERVALTIKADOWN_DEFAULT;
+ }
+
String retryNumber = parameters.getParameter(TikaConfig.PARAM_RETRYNUMBER);
if (retryNumber == null) {
retryNumber = TikaConfig.RETRYNUMBER_DEFAULT;
@@ -508,6 +531,7 @@
velocityContext.put("CONNECTIONTIMEOUT", connectionTimeout);
velocityContext.put("SOCKETTIMEOUT", socketTimeout);
velocityContext.put("RETRYINTERVAL", retryInterval);
+ velocityContext.put("RETRYINTERVALTIKADOWN", retryIntervalTikaDown);
velocityContext.put("RETRYNUMBER", retryNumber);
}
@@ -584,7 +608,7 @@
// work
Logging.ingest.warn("Tika Server unreachable while trying to process " + documentURI + ", retrying...", e);
final long currentTime = System.currentTimeMillis();
- throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryInterval, -1L, -1, false);
+ throw new ServiceInterruption("Tika Server connection down: " + e.getMessage(), e, currentTime + retryIntervalTikaDown, -1L, retryNumber, false);
}
private void retryWithoutAbort(final Exception e) throws ServiceInterruption {
@@ -723,6 +747,9 @@
} else { // The tika server seams to be down : retry {retryNumber} times and abort the
// job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Tika seemed to be down when requested to process document " + documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
} catch (final NoHttpResponseException e) {
@@ -733,6 +760,9 @@
} catch (final IOException e) { // Unknown problem with the Tika Server. Retry {retryNumber} times and abort
// the job if it fails on
// each retry
+ resultCode = "TIKASERVEREXCEPTION";
+ description = "Unknown Tika problem when processing document " + documentURI + " : " + e.getMessage();
+ tikaServerResultCode = handleTikaServerError(description);
triggerServiceInterruption(documentURI, e);
}
if (response != null) {
@@ -1174,7 +1204,7 @@
final List<Map<String, String>> fieldMappings = new ArrayList<>();
String keepAllMetadataValue = "true";
String lowernamesValue = "true";
- String writeLimitValue = "1000000";
+ String writeLimitValue = "1000000"; // 1Mo by default
String extractArchives = "false";
String maxEmbeddedResources = "";
for (int i = 0; i < os.getChildCount(); i++) {
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
index df9a862..8447ebf 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_en_US.properties
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
index df9a862..8447ebf 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_es_ES.properties
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
index ff476c9..7b25be2 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_fr_FR.properties
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Intervalle entre les tentatives (en ms):
+TikaExtractor.RetryIntervalTikaDown=Intervalle entre les tentatives quand Tika est injoignable (in ms):
TikaExtractor.RetryNumber=Nombre de tentatives:
TikaExtractor.ExtractArchives=Extraire le contenu des archives:
TikaExtractor.ConnectionTimeout=Connexion timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
index 4f8343b..3a3bafc 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_ja_JP.properties
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
index a1c0f66..6ea20b4 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
+++ b/connectors/tikaservice-rmeta/connector/src/main/native2ascii/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/common_zh_CN.properties
@@ -14,6 +14,7 @@
# limitations under the License.
TikaExtractor.RetryInterval=Retry interval (in ms):
+TikaExtractor.RetryIntervalTikaDown=Retry interval when Tika is down (in ms):
TikaExtractor.RetryNumber=Number of retries:
TikaExtractor.ExtractArchives=Extract archives content:
TikaExtractor.ConnectionTimeout=Connection timeout:
diff --git a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
index cd88e24..f9607cb 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
+++ b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/editConfiguration_Server.html
@@ -50,6 +50,12 @@
</td>
</tr>
<tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td class="value"><input name="retryIntervalTikaDown" type="text"
+ value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)" size="20" />
+ </td>
+ </tr>
+ <tr>
<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><input name="retryNumber" type="text"
value="$Encoder.attributeEscape($RETRYNUMBER)" size="5" />
@@ -63,6 +69,7 @@
<input type="hidden" name="connectionTimeout" value="$Encoder.attributeEscape($CONNECTIONTIMEOUT)"/>
<input type="hidden" name="socketTimeout" value="$Encoder.attributeEscape($SOCKETTIMEOUT)"/>
<input type="hidden" name="retryInterval" value="$Encoder.attributeEscape($RETRYINTERVAL)"/>
+<input type="hidden" name="retryIntervalTikaDown" value="$Encoder.attributeEscape($RETRYINTERVALTIKADOWN)"/>
<input type="hidden" name="retryNumber" value="$Encoder.attributeEscape($RETRYNUMBER)"/>
#end
\ No newline at end of file
diff --git a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
index 67f298a..36afcb4 100644
--- a/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
+++ b/connectors/tikaservice-rmeta/connector/src/main/resources/org/apache/manifoldcf/agents/transformation/tikaservice/rmeta/viewConfiguration.html
@@ -37,6 +37,10 @@
<td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVAL)</nobr></td>
</tr>
<tr>
+ <td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryIntervalTikaDown'))</nobr></td>
+ <td class="value"><nobr>$Encoder.bodyEscape($RETRYINTERVALTIKADOWN)</nobr></td>
+ </tr>
+ <tr>
<td class="description"><nobr>$Encoder.bodyEscape($ResourceBundle.getString('TikaExtractor.RetryNumber'))</nobr></td>
<td class="value"><nobr>$Encoder.bodyEscape($RETRYNUMBER)</nobr></td>
</tr>