ARTEMIS-2800 Work around (trapping) current race identified on a BZ https://bugzilla.redhat.com/show_bug.cgi?id=1845326

commit: 05b4ae7bc2eef9f18d338e38eb7515cd0cfba33d [log] [tgz]
author: Clebert Suconic <clebertsuconic@apache.org> Thu Jun 11 15:35:35 2020 -0400
committer: Clebert Suconic <clebertsuconic@apache.org> Thu Jun 11 16:06:51 2020 -0400
tree: 6cee614d43b7b4904844beb91375b5e239c156cd
parent: eb516d1eb86aeaa1171caa1b01ba7b3dff86b31a [diff]
diff --git a/src/main/c/org_apache_activemq_artemis_nativo_jlibaio_LibaioContext.c b/src/main/c/org_apache_activemq_artemis_nativo_jlibaio_LibaioContext.c
index e057311..d495eb4 100644
--- a/src/main/c/org_apache_activemq_artemis_nativo_jlibaio_LibaioContext.c
+++ b/src/main/c/org_apache_activemq_artemis_nativo_jlibaio_LibaioContext.c

@@ -149,6 +149,24 @@
             if (!available) {
                 return 0;
             }
+
+            if (available >= max) {
+               // This is to trap a possible bug from the kernel:
+               //       https://bugzilla.redhat.com/show_bug.cgi?id=1845326
+               //       https://issues.apache.org/jira/browse/ARTEMIS-2800
+               //
+               // On the race available would eventually be >= max, while ring->tail was invalid
+               // we could work around by waiting ring-tail to change:
+               // while (ring->tail == tail) mem_barrier();
+               //
+               // however eventually we could have available==max in a legal situation what could lead to infinite loop here
+               return io_getevents(aio_ctx, min_nr, max, events, timeout);
+
+               // also: I could have called io_getevents to the one at the end of this method
+               //       but I really hate goto, so I would rather have a duplicate code here
+               //       and I did not want to create another memory flag to stop the rest of the code
+            }
+
             //the kernel has written ring->tail from an interrupt:
             //we need to load acquire the completed events here
             read_barrier();
@@ -177,6 +195,8 @@
             fprintf(stdout, "The kernel is not supoprting the ring buffer any longer\n");
         #endif
     }
+    // if this next line ever needs to be changed, beware of a duplicate code on this method
+    // I explain why I duplicated the call instead of reuse it there ^^^^
     int sys_call_events = io_getevents(aio_ctx, min_nr, max, events, timeout);
     #ifdef DEBUG
         fprintf(stdout, "consumed sys-call = %d\n", sys_call_events);
commit	05b4ae7bc2eef9f18d338e38eb7515cd0cfba33d	[log] [tgz]
author	Clebert Suconic <clebertsuconic@apache.org>	Thu Jun 11 15:35:35 2020 -0400
committer	Clebert Suconic <clebertsuconic@apache.org>	Thu Jun 11 16:06:51 2020 -0400
tree	6cee614d43b7b4904844beb91375b5e239c156cd
parent	eb516d1eb86aeaa1171caa1b01ba7b3dff86b31a [diff]