Catch exception inside ITRetryUtil to fix one of the causes for flaky integration tests (#11265)

* Do not stop retrying when an exception is encountered. Save & propagate last exception if retry count is exceeded.

* Add one more log message to help with debugging

* Limit schema registry heap to attempt to control OOMs
diff --git a/integration-tests/docker/docker-compose.base.yml b/integration-tests/docker/docker-compose.base.yml
index f8119de..9267b1a 100644
--- a/integration-tests/docker/docker-compose.base.yml
+++ b/integration-tests/docker/docker-compose.base.yml
@@ -398,4 +398,4 @@
       SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
       SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid
       SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users
-      SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file
+      SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file -Xmx32m
diff --git a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java
index 3ef2f71..e43c26d 100644
--- a/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java
+++ b/integration-tests/src/main/java/org/apache/druid/testing/utils/ITRetryUtil.java
@@ -52,23 +52,43 @@
       String taskMessage
   )
   {
-    try {
-      int currentTry = 0;
-      while (callable.call() != expectedValue) {
-        if (currentTry > retryCount) {
-          throw new ISE("Max number of retries[%d] exceeded for Task[%s]. Failing.", retryCount, taskMessage);
+    int currentTry = 0;
+    Exception lastException = null;
+
+    while (true) {
+      try {
+        LOG.info("Trying attempt[%d/%d]...", currentTry, retryCount);
+        if (currentTry > retryCount || callable.call() == expectedValue) {
+          break;
         }
         LOG.info(
-            "Attempt[%d]: Task %s still not complete. Next retry in %d ms",
-            currentTry, taskMessage, delayInMillis
+            "Attempt[%d/%d] did not pass: Task %s still not complete. Next retry in %d ms",
+            currentTry, retryCount, taskMessage, delayInMillis
         );
         Thread.sleep(delayInMillis);
-
         currentTry++;
       }
+      catch (Exception e) {
+        // just continue retrying if there is an exception (it may be transient!) but save the last:
+        lastException = e;
+      }
     }
-    catch (Exception e) {
-      throw new RuntimeException(e);
+
+    if (currentTry > retryCount) {
+      if (lastException != null) {
+        throw new ISE(
+            "Max number of retries[%d] exceeded for Task[%s]. Failing.",
+            retryCount,
+            taskMessage,
+            lastException
+        );
+      } else {
+        throw new ISE(
+            "Max number of retries[%d] exceeded for Task[%s]. Failing.",
+            retryCount,
+            taskMessage
+        );
+      }
     }
   }