trng: kinetis: update TRNG to be interrupt driven

When TRNG HW (not RNGA) is present, it can be used in an interrupt
driven manner, where interrupts are generated for data available or on
errors. With this commit the poller task is updated so it only runs when
woken by an interrupt instead of polling constantly to fill in the
cache. This fixes issues with task starvation, since the poller task is
very high priority and the TRNG is very slow in the standard
configuration.

A new syscfg was added to allow faster generation of entropy by
changing the value of `KINETIS_TRNG_ENTROPY_DELAY`.

This commit also refactors the TRNG queue to be simpler and better
abstracted.

PS: the commit does not touch the functionality of RNGA based HW (eg
K64F).

Signed-off-by: Fabio Utzig <utzig@apache.org>
diff --git a/hw/drivers/trng/trng_kinetis/src/trng_kinetis.c b/hw/drivers/trng/trng_kinetis/src/trng_kinetis.c
index 815552a..dfab99d 100644
--- a/hw/drivers/trng/trng_kinetis/src/trng_kinetis.c
+++ b/hw/drivers/trng/trng_kinetis/src/trng_kinetis.c
@@ -35,6 +35,22 @@
     do {                                                              \
         (base)->MCTL |= (TRNG_MCTL_PRGM_MASK | TRNG_MCTL_ERR_MASK);   \
     } while (0)
+#define TRNG_CLEAR_AND_ENABLE_INTS(base)                              \
+    do {                                                              \
+        (base)->INT_CTRL &= ~(TRNG_INT_CTRL_HW_ERR_MASK |             \
+                              TRNG_INT_CTRL_ENT_VAL_MASK);            \
+        (base)->INT_MASK |= TRNG_INT_MASK_HW_ERR_MASK |               \
+                            TRNG_INT_MASK_ENT_VAL_MASK;               \
+    } while (0)
+#define TRNG_CLEAR_AND_ENABLE_ENTROPY_INT(base)                       \
+    do {                                                              \
+        (base)->INT_CTRL &= ~TRNG_INT_CTRL_ENT_VAL_MASK;              \
+        (base)->INT_MASK |= TRNG_INT_MASK_ENT_VAL_MASK;               \
+    } while (0)
+#define TRNG_DISABLE_ENTROPY_INT(base)                                \
+    do {                                                              \
+        (base)->INT_MASK &= ~TRNG_INT_MASK_ENT_VAL_MASK;              \
+    } while (0)
 #else
 #error "Unsupported TRNG interface"
 #endif
@@ -42,33 +58,80 @@
 #include "trng/trng.h"
 #include "trng_kinetis/trng_kinetis.h"
 
-static uint8_t rng_cache[ MYNEWT_VAL(KINETIS_TRNG_CACHE_LEN) ];
-static uint16_t rng_cache_out;
-static uint16_t rng_cache_in;
-static struct os_mutex rng_cache_mu;
+#define TRNG_CACHE_LEN MYNEWT_VAL(KINETIS_TRNG_CACHE_LEN)
+static struct {
+    uint16_t out;
+    uint16_t in;
+    uint16_t used;
+    struct os_mutex mu;
+    uint8_t cache[TRNG_CACHE_LEN];
+} rng_state;
+static_assert(sizeof(rng_state.cache) == TRNG_CACHE_LEN,
+              "Must fix TRNG_CACHE_LEN usage");
+#define CACHE_OUT(x)                                               \
+    do {                                                           \
+        (x) = rng_state.cache[rng_state.out];                      \
+        rng_state.out = (rng_state.out + 1) % TRNG_CACHE_LEN;      \
+        rng_state.used--;                                          \
+    } while (0)
+#define CACHE_IN(x)                                                \
+    do {                                                           \
+        rng_state.cache[rng_state.in] = (x);                       \
+        rng_state.in = (rng_state.in + 1) % TRNG_CACHE_LEN;        \
+        rng_state.used++;                                          \
+    } while (0)
+#define IS_CACHE_FULL() (rng_state.used == TRNG_CACHE_LEN)
+#define IS_CACHE_EMPTY() (rng_state.used == 0)
+#define CACHE_INIT()                                               \
+    do {                                                           \
+        rng_state.out = 0;                                         \
+        rng_state.in = 0;                                          \
+        rng_state.used = 0;                                        \
+    } while (0)
+#define CACHE_LOCK() os_mutex_pend(&rng_state.mu, OS_TIMEOUT_NEVER)
+#define CACHE_UNLOCK() os_mutex_release(&rng_state.mu)
+#define CACHE_INIT_LOCK() os_mutex_init(&rng_state.mu)
+
+#if USE_RNGA
 static bool running;
+#endif
 static struct os_eventq rng_evtq;
+static struct os_event evt = {0};
 
 #define TRNG_POLLER_PRIO (8)
 #define TRNG_POLLER_STACK_SIZE OS_STACK_ALIGN(64)
 static struct os_task trng_poller_task;
 OS_TASK_STACK_DEFINE(trng_poller_stack, TRNG_POLLER_STACK_SIZE);
 
-static inline void
+#if USE_TRNG
+static void
+trng_irq_handler(void)
+{
+    if (TRNG0->MCTL & TRNG_MCTL_ERR_MASK) {
+        TRNG0->MCTL |= TRNG_MCTL_ERR_MASK;
+    }
+
+    if (TRNG0->INT_CTRL & TRNG_INT_CTRL_HW_ERR_MASK) {
+        TRNG0->INT_CTRL &= ~TRNG_INT_CTRL_HW_ERR_MASK;
+    }
+
+    if (TRNG0->INT_CTRL & TRNG_INT_CTRL_ENT_VAL_MASK) {
+        TRNG_DISABLE_ENTROPY_INT(TRNG0);
+        (void)os_eventq_put(&rng_evtq, &evt);
+    }
+}
+#endif
+
+static void
 kinetis_trng_start(void)
 {
-    struct os_event evt;
-
 #if USE_RNGA
     RNGA_SetMode(RNG, kRNGA_ModeNormal);
+    running = true;
+    (void)os_eventq_put(&rng_evtq, &evt);
 #elif USE_TRNG
     TRNG_START(TRNG0);
 #endif
-    running = true;
-
-    evt.ev_queued = 0;
-    evt.ev_arg = NULL;
-    (void)os_eventq_put(&rng_evtq, &evt);
 }
 
 static inline void
@@ -76,47 +139,35 @@
 {
 #if USE_RNGA
     RNGA_SetMode(RNG, kRNGA_ModeSleep);
+    running = false;
 #elif USE_TRNG
     TRNG_STOP(TRNG0);
 #endif
-
-   running = false;
 }
 
 static size_t
 kinetis_trng_read(struct trng_dev *trng, void *ptr, size_t size)
 {
     size_t num_read;
+    uint8_t *u8p;
 
-    os_mutex_pend(&rng_cache_mu, OS_TIMEOUT_NEVER);
+    num_read = 0;
+    u8p = (uint8_t *)ptr;
 
-    if (rng_cache_out <= rng_cache_in) {
-        size = min(size, rng_cache_in - rng_cache_out);
-        memcpy(ptr, &rng_cache[rng_cache_out], size);
-        num_read = size;
-    } else if (rng_cache_out + size <= sizeof(rng_cache)) {
-        memcpy(ptr, &rng_cache[rng_cache_out], size);
-        num_read = size;
-    } else {
-        num_read = sizeof(rng_cache) - rng_cache_out;
-        memcpy(ptr, &rng_cache[rng_cache_out], num_read);
+    CACHE_LOCK();
 
-        size -= num_read;
-        ptr += num_read;
-
-        size = min(size, rng_cache_in);
-        memcpy(ptr, rng_cache, size);
-        num_read += size;
+    while (!IS_CACHE_EMPTY() && size) {
+        CACHE_OUT(*u8p++);
+        num_read++;
+        size--;
     }
 
-    rng_cache_out = (rng_cache_out + num_read) % sizeof(rng_cache);
+    CACHE_UNLOCK();
 
     if (num_read > 0) {
         kinetis_trng_start();
     }
 
-    os_mutex_release(&rng_cache_mu);
-
     return num_read;
 }
 
@@ -146,31 +197,39 @@
     int rc;
 
     while (1) {
-        if (running) {
+#if USE_TRNG
+        (void)os_eventq_get(&rng_evtq);
+#endif
+
 #if USE_RNGA
+        if (running) {
             rc = RNGA_GetRandomData(RNG, data, sizeof(uint32_t));
 #else
             rc = TRNG_GetRandomData(TRNG0, data, sizeof(uint32_t));
 #endif
             if (rc == 0) {
-                os_mutex_pend(&rng_cache_mu, OS_TIMEOUT_NEVER);
+                CACHE_LOCK();
+
                 for (i = 0; i < 4; i++) {
-                    rng_cache[rng_cache_in++] = data[i];
-
-                    if (rng_cache_in >= sizeof(rng_cache)) {
-                        rng_cache_in = 0;
-                    }
-
-                    if ((rng_cache_in + 1) % sizeof(rng_cache) == rng_cache_out) {
+                    if (IS_CACHE_FULL()) {
                         kinetis_trng_stop();
                         break;
                     }
+
+                    CACHE_IN(data[i]);
                 }
-                os_mutex_release(&rng_cache_mu);
+
+                CACHE_UNLOCK();
+
+#if USE_TRNG
+                TRNG_CLEAR_AND_ENABLE_ENTROPY_INT(TRNG0);
+#endif
             }
+#if USE_RNGA
         } else {
             (void)os_eventq_get(&rng_evtq);
         }
+#endif
     }
 }
 
@@ -179,22 +238,28 @@
 {
     struct trng_dev *trng;
 #if USE_TRNG
-    trng_config_t default_config;
+    trng_config_t trng_config;
 #endif
 
     trng = (struct trng_dev *)dev;
     assert(trng);
 
     if (!(dev->od_flags & OS_DEV_F_STATUS_OPEN)) {
-        rng_cache_out = 0;
-        rng_cache_in = 0;
+        CACHE_INIT();
 
 #if USE_RNGA
         RNGA_Init(RNG);
         RNGA_Seed(RNG, SIM->UIDL);
+
 #elif USE_TRNG
-        (void)TRNG_GetDefaultConfig(&default_config);
-        TRNG_Init(TRNG0, &default_config);
+        NVIC_SetVector(TRNG0_IRQn, (uint32_t)trng_irq_handler);
+        NVIC_EnableIRQ(TRNG0_IRQn);
+
+        (void)TRNG_GetDefaultConfig(&trng_config);
+        trng_config.entropyDelay = MYNEWT_VAL(KINETIS_TRNG_ENTROPY_DELAY);
+        TRNG_Init(TRNG0, &trng_config);
+
+        TRNG_CLEAR_AND_ENABLE_INTS(TRNG0);
 #endif
 
         kinetis_trng_start();
@@ -218,7 +283,7 @@
     trng->interface.read = kinetis_trng_read;
 
     os_eventq_init(&rng_evtq);
-    os_mutex_init(&rng_cache_mu);
+    CACHE_INIT_LOCK();
 
     rc = os_task_init(&trng_poller_task, "trng_poller", trng_poller_handler, NULL,
             TRNG_POLLER_PRIO, OS_WAIT_FOREVER, trng_poller_stack,
diff --git a/hw/drivers/trng/trng_kinetis/syscfg.yml b/hw/drivers/trng/trng_kinetis/syscfg.yml
index 7c72a7b..dc01492 100644
--- a/hw/drivers/trng/trng_kinetis/syscfg.yml
+++ b/hw/drivers/trng/trng_kinetis/syscfg.yml
@@ -30,3 +30,10 @@
         value: 0
         restrictions:
             - '!KINETIS_TRNG_USE_RNGA'
+    KINETIS_TRNG_ENTROPY_DELAY:
+        description: >
+            The configured number is the length, in system clocks, of each
+            entropy sample taken. The  default value is 3200, but it can be
+            reduced for faster sample generation, given the compromise of
+            generating worse entropy.
+        value: 3200