[maven-release-plugin] copy for tag uima-ducc-2.2.2
git-svn-id: https://svn.apache.org/repos/asf/uima/uima-ducc/tags/uima-ducc-2.2.2@1825678 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/RELEASE_NOTES.html b/RELEASE_NOTES.html
index d97bc29..6e5117b 100755
--- a/RELEASE_NOTES.html
+++ b/RELEASE_NOTES.html
@@ -79,7 +79,7 @@
swapaccount=1. Details of how to do this can be found <a href="http://unix.stackexchange.com/questions/147158/how-to-enable-swap-accounting-for-memory-cgroup-in-archlinux">here</a>.
Due to a bug in uima sdk, the uima AnalysisEngineProcessException cannot be serialized as a Java object. If your
-analysis engine throws an exception in process(), the ducc framework will stringify it and wrapt it in
+analysis engine throws an exception in process(), the ducc framework will stringify it and wrap it in
java RuntimeException. If you have a custom error handler plugged in into a job driver you will not be
able to test for AnalysisEngineProcessException in a stack trace with a code like this:
diff --git a/src/main/admin/ducc_util.py b/src/main/admin/ducc_util.py
index 02eb574..f1c5a43 100644
--- a/src/main/admin/ducc_util.py
+++ b/src/main/admin/ducc_util.py
@@ -388,7 +388,7 @@
is_operational = False
req = node.split('.')[0]
cmd = '/bin/hostname'
- ssh_cmd = 'ssh -o BatchMode=yes -o ConnectTimeout=10'+' '+node+" "+cmd
+ ssh_cmd = 'ssh -q -o BatchMode=yes -o ConnectTimeout=10'+' '+node+" "+cmd
resp = self.popen(ssh_cmd)
lines = resp.readlines()
if(len(lines)== 1):
diff --git a/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/HistoryManagerDb.java b/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/HistoryManagerDb.java
index f018ad9..a27d474 100644
--- a/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/HistoryManagerDb.java
+++ b/uima-ducc-database/src/main/java/org/apache/uima/ducc/database/HistoryManagerDb.java
@@ -27,7 +27,6 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.uima.ducc.common.Pair;
import org.apache.uima.ducc.common.node.metrics.ProcessGarbageCollectionStats;
@@ -98,6 +97,8 @@
// Jira 4804 For now don't save details in tables: jobs, reservations, & processes
static final boolean saveDetails = System.getenv("SAVE_DB_DETAILS") == null ? false : true;
+ private long restoreCount = 0;
+
public HistoryManagerDb()
{
}
@@ -792,7 +793,12 @@
String cql = "SELECT * FROM ducc.orckpt WHERE id=0";
ResultSet rs = h.execute(cql);
for ( Row r : rs ) {
- logger.info(methodName, null, "Found checkpoint.");
+ if(restoreCount > 0) {
+ logger.debug(methodName, null, "Found checkpoint.");
+ }
+ else {
+ logger.info(methodName, null, "Found checkpoint.");
+ }
if(r == null) {
continue;
}
@@ -824,16 +830,26 @@
Map<DuccId, IDuccWork> map = work.getMap();
for ( DuccId id : map.keySet() ) {
IDuccWork w = map.get(id);
- logger.info(methodName, id, "Restored", w.getClass());
+ if(restoreCount > 0) {
+ logger.debug(methodName, id, "Restored", w.getClass());
+ }
+ else {
+ logger.info(methodName, id, "Restored", w.getClass());
+ }
}
ret = new Pair<DuccWorkMap, Map<DuccId, DuccId>>(work, processToJob);
}
} catch ( Exception e ) {
- logger.error(methodName, null, "Error restoring checkpoint:", e);
+ if(restoreCount > 0) {
+ logger.debug(methodName, null, "Error restoring checkpoint:", e);
+ }
+ else {
+ logger.error(methodName, null, "Error restoring checkpoint:", e);
+ }
}
-
+ restoreCount = restoreCount+1;
return ret;
}
diff --git a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
index b2c458f..c6c31ca 100644
--- a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
+++ b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/PingDriver.java
@@ -783,13 +783,13 @@
PingStopper()
{
String methodName = "PingStopper.init";
- logger.debug(methodName, sset.getId(), "Wait for pinger to exit:", 60000);
+ logger.info(methodName, sset.getId(), "Wait for pinger to exit:", 60000);
}
public void run()
{
String methodName = "PingStopper.run";
- logger.debug(methodName, sset.getId(), "PingStopper kills reluctant pinger");
+ logger.info(methodName, sset.getId(), "PingStopper kills reluctant pinger");
if ( ping_main != null ) ping_main.destroy();
}
}
@@ -900,6 +900,8 @@
} catch (ClassNotFoundException e) {
logger.error(methodName, sset.getId(), "ExtrnPingDriver: Input garbled:", e);
errors++;
+ } finally {
+ logger.error(methodName, sset.getId(), "ExtrnPingDriver: Terminating ping thread after " + errors + " errors");
}
}
}
diff --git a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
index 209cb38..ac4f000 100644
--- a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
+++ b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServicePingMain.java
@@ -187,6 +187,7 @@
System.out.println("Exceeded error count. Exiting.");
System.exit(1);
}
+ System.out.println("ServicePingMain: Error count " + error_count + " < threshold of " + error_max);
}
// /**
diff --git a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
index a71d9be..b60bb7e 100644
--- a/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
+++ b/uima-ducc-sm/src/main/java/org/apache/uima/ducc/sm/ServiceSet.java
@@ -631,9 +631,7 @@
synchronized void setAutostart(boolean auto)
{
- meta_props.setProperty(IStateServices.SvcMetaProps.autostart.pname(), auto ? "true" : "false");
- this.autostart = auto;
- if ( auto ) {
+ if (!this.autostart && auto) { // UIMA-5390 Restrict these resets to only if autostart was off but is now on
// turning this on gives benefit of the doubt on failure management
// by definition, an autostarted services is NOT reference started
cancelLinger();
@@ -641,6 +639,8 @@
init_failures = 0;
resetRuntimeErrors();
}
+ meta_props.setProperty(IStateServices.SvcMetaProps.autostart.pname(), auto ? "true" : "false");
+ this.autostart = auto;
}
synchronized void restartPinger()
@@ -900,14 +900,14 @@
case Waiting:
if(serviceMeta == null) {
notPinging = true;
- notPingingReason = "pinger has not reported";
+ notPingingReason = "pinger is not running";
}
else {
configPing();
long pingExpiry = pingStability * pingRate;
long now = System.currentTimeMillis();
long pingElapsed = now - last_ping;
- if(pingElapsed > pingExpiry) {
+ if (pingElapsed > pingExpiry && last_ping != 0) { // Don't treat first ping as stale
notPinging = true;
notPingingReason = "pinger data is stale";
}
@@ -974,7 +974,9 @@
meta_props.put(IStateServices.SvcMetaProps.service_healthy.pname(), "false");
if ( excessiveFailures() ) {
- meta_props.put(IStateServices.SvcMetaProps.submit_error.pname(), "Service stopped by exessive failures. Initialization failures[" + init_failures + "], Runtime failures[" + run_failures + "]");
+ String msg = init_failures >= init_failure_max ? "initialization failures [" + init_failures + "]"
+ : "runtime failures [" + run_failures + "]";
+ meta_props.put(IStateServices.SvcMetaProps.submit_error.pname(), "Service disabled by excessive " + msg);
} else {
meta_props.put(IStateServices.SvcMetaProps.service_statistics.pname(), "N/A");
}
@@ -1490,7 +1492,7 @@
disable(disable_reason);
save_meta = true;
} else {
- logger.warn(methodName, id, "Instance", inst_id + ": Uunsolicited termination, not yet excessive. Restarting instance.");
+ logger.warn(methodName, id, "Instance", inst_id + ": Unsolicited termination, not yet excessive. Restarting instance.");
start();
return; // don't use termination to set state - start will signal the state machine
}
@@ -1928,7 +1930,10 @@
if ( inShutdown ) return; // in shutdown, don't restart
if ( ping_failures > ping_failure_max ) {
- logger.warn(methodName, id, "Not restarting pinger due to excessiver errors:", ping_failures);
+ String msg = "Service stopped as pinger failed to start " + ping_failures + " times.";
+ logger.warn(methodName, id, msg);
+ meta_props.put(IStateServices.SvcMetaProps.submit_error.pname(), msg);
+ disableAndStop(msg);
return;
}