MINIFICPP-1379 Reimplement Identifier::parse

This closes #923

Signed-off-by: Marton Szasz <szaszm01@gmail.com>
diff --git a/libminifi/include/utils/Id.h b/libminifi/include/utils/Id.h
index 0e8076e..68dbf7a 100644
--- a/libminifi/include/utils/Id.h
+++ b/libminifi/include/utils/Id.h
@@ -73,6 +73,8 @@
   static utils::optional<Identifier> parse(const std::string& str);
 
  private:
+  static bool parseByte(Data& data, const uint8_t* input, int& charIdx, int& byteIdx);
+
   Data data_{};
 };
 
diff --git a/libminifi/include/utils/StringUtils.h b/libminifi/include/utils/StringUtils.h
index 08847a4..940ca3f 100644
--- a/libminifi/include/utils/StringUtils.h
+++ b/libminifi/include/utils/StringUtils.h
@@ -323,6 +323,14 @@
   }
 
   /**
+   * Hexdecodes a single character in the set [0-9a-fA-F]
+   * @param ch the character to be decoded
+   * @param output the reference where the result should be written
+   * @return true on success
+   */
+  static bool from_hex(uint8_t ch, uint8_t& output);
+
+  /**
    * Hexdecodes the hexencoded string in data, ignoring every character that is not [0-9a-fA-F]
    * @param data the output buffer where the hexdecoded bytes will be written. Must be at least length / 2 bytes long.
    * @param data_length pointer to the length of data the data buffer. It will be filled with the length of the decoded bytes.
diff --git a/libminifi/src/utils/Id.cpp b/libminifi/src/utils/Id.cpp
index 87cb5d4..cf98da0 100644
--- a/libminifi/src/utils/Id.cpp
+++ b/libminifi/src/utils/Id.cpp
@@ -33,7 +33,6 @@
 #include <string>
 #include <limits>
 #include "core/logging/LoggerConfiguration.h"
-#include "utils/StringUtils.h"
 
 #ifdef WIN32
 #include "Rpc.h"
@@ -42,6 +41,8 @@
 #pragma comment(lib, "Ws2_32.lib")
 #endif
 
+#include "utils/StringUtils.h"
+
 namespace org {
 namespace apache {
 namespace nifi {
@@ -115,18 +116,43 @@
 
 utils::optional<Identifier> Identifier::parse(const std::string &str) {
   Identifier id;
-  int assigned = sscanf(str.c_str(), UUID_FORMAT_STRING,
-      &id.data_[0], &id.data_[1], &id.data_[2], &id.data_[3],
-      &id.data_[4], &id.data_[5],
-      &id.data_[6], &id.data_[7],
-      &id.data_[8], &id.data_[9],
-      &id.data_[10], &id.data_[11], &id.data_[12], &id.data_[13], &id.data_[14], &id.data_[15]);
-  if (assigned != 16) {
-    return utils::nullopt;
+  // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx is 36 long: 16 bytes * 2 hex digits / byte + 4 hyphens
+  if (str.length() != 36) return {};
+  int charIdx = 0;
+  int byteIdx = 0;
+  auto input = reinterpret_cast<const uint8_t*>(str.c_str());
+
+  // [xxxxxxxx]-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  while (byteIdx < 4) {
+    if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+  }
+  // xxxxxxxx[-]xxxx-xxxx-xxxx-xxxxxxxxxxxx
+  if (input[charIdx++] != '-') return {};
+
+  // xxxxxxxx-[xxxx-xxxx-xxxx-]xxxxxxxxxxxx - 3x 2 bytes and a hyphen
+  for (size_t idx = 0; idx < 3; ++idx) {
+    if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+    if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+    if (input[charIdx++] != '-') return {};
+  }
+
+  // xxxxxxxx-xxxx-xxxx-xxxx-[xxxxxxxxxxxx] - the rest, i.e. until byte 16
+  while (byteIdx < 16) {
+    if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
   }
   return id;
 }
 
+bool Identifier::parseByte(Data &data, const uint8_t *input, int &charIdx, int &byteIdx) {
+  uint8_t upper, lower;
+  if (!StringUtils::from_hex(input[charIdx++], upper)
+      || !StringUtils::from_hex(input[charIdx++], lower)) {
+    return false;
+  }
+  data[byteIdx++] = (upper << 4) | lower;
+  return true;
+}
+
 IdGenerator::IdGenerator()
     : implementation_(UUID_TIME_IMPL),
       logger_(logging::LoggerFactory<IdGenerator>::getLogger()),
diff --git a/libminifi/src/utils/StringUtils.cpp b/libminifi/src/utils/StringUtils.cpp
index 881f721..7adae95 100644
--- a/libminifi/src/utils/StringUtils.cpp
+++ b/libminifi/src/utils/StringUtils.cpp
@@ -167,6 +167,14 @@
   return result_string;
 }
 
+bool StringUtils::from_hex(uint8_t ch, uint8_t& output) {
+  if (ch > 127) {
+    return false;
+  }
+  output = hex_lut[ch];
+  return output != SKIP;
+}
+
 bool StringUtils::from_hex(uint8_t* data, size_t* data_length, const char* hex, size_t hex_length) {
   if (*data_length < hex_length / 2) {
     return false;
diff --git a/libminifi/test/unit/IdTests.cpp b/libminifi/test/unit/IdTests.cpp
index ca8b45c..efccf2d 100644
--- a/libminifi/test/unit/IdTests.cpp
+++ b/libminifi/test/unit/IdTests.cpp
@@ -163,6 +163,33 @@
   LogTestController::getInstance().reset();
 }
 
+TEST_CASE("Test parse invalid", "[id]") {
+  TestController test_controller;
+
+  LogTestController::getInstance().setDebug<utils::IdGenerator>();
+  std::shared_ptr<minifi::Properties> id_props = std::make_shared<minifi::Properties>();
+  id_props->set("uid.implementation", "time");
+
+  std::shared_ptr<utils::IdGenerator> generator = utils::IdGenerator::getIdGenerator();
+  generator->initialize(id_props);
+
+  const std::map<std::string, bool> test_cases{
+    {"12", false},  // drastically shorter
+    {"12345678-1234-1234-1234-123456789abc", true},  // ok
+    {"12345678-1234-1234-1234-123456789ab", false},  // shorter by one
+    {"12345678-1234-1234-1234-123456789abc0", false},  // longer by one ('0')
+    {"123456789-123-1234-1234-123456789abc", false},  // first block is longer, but second shorter
+    {"12345678-1234-1234-1234-123456789abZ", false},  // contains invalid character at the end
+    {"123456780123401234012340123456789abc", false},  // missing delimiters
+    {"12345678-12-34-1234-1234-123456789ab", false},  // exra hyphen
+    {"utter garbage but exactly 36 chars  ", false}  // utter garbage
+  };
+
+  for (const auto& it : test_cases) {
+    REQUIRE(static_cast<bool>(utils::Identifier::parse(it.first)) == it.second);
+  }
+}
+
 TEST_CASE("Test to_string", "[id]") {
   TestController test_controller;