MINIFICPP-1379 Reimplement Identifier::parse
This closes #923
Signed-off-by: Marton Szasz <szaszm01@gmail.com>
diff --git a/libminifi/include/utils/Id.h b/libminifi/include/utils/Id.h
index 0e8076e..68dbf7a 100644
--- a/libminifi/include/utils/Id.h
+++ b/libminifi/include/utils/Id.h
@@ -73,6 +73,8 @@
static utils::optional<Identifier> parse(const std::string& str);
private:
+ static bool parseByte(Data& data, const uint8_t* input, int& charIdx, int& byteIdx);
+
Data data_{};
};
diff --git a/libminifi/include/utils/StringUtils.h b/libminifi/include/utils/StringUtils.h
index 08847a4..940ca3f 100644
--- a/libminifi/include/utils/StringUtils.h
+++ b/libminifi/include/utils/StringUtils.h
@@ -323,6 +323,14 @@
}
/**
+ * Hexdecodes a single character in the set [0-9a-fA-F]
+ * @param ch the character to be decoded
+ * @param output the reference where the result should be written
+ * @return true on success
+ */
+ static bool from_hex(uint8_t ch, uint8_t& output);
+
+ /**
* Hexdecodes the hexencoded string in data, ignoring every character that is not [0-9a-fA-F]
* @param data the output buffer where the hexdecoded bytes will be written. Must be at least length / 2 bytes long.
* @param data_length pointer to the length of data the data buffer. It will be filled with the length of the decoded bytes.
diff --git a/libminifi/src/utils/Id.cpp b/libminifi/src/utils/Id.cpp
index 87cb5d4..cf98da0 100644
--- a/libminifi/src/utils/Id.cpp
+++ b/libminifi/src/utils/Id.cpp
@@ -33,7 +33,6 @@
#include <string>
#include <limits>
#include "core/logging/LoggerConfiguration.h"
-#include "utils/StringUtils.h"
#ifdef WIN32
#include "Rpc.h"
@@ -42,6 +41,8 @@
#pragma comment(lib, "Ws2_32.lib")
#endif
+#include "utils/StringUtils.h"
+
namespace org {
namespace apache {
namespace nifi {
@@ -115,18 +116,43 @@
utils::optional<Identifier> Identifier::parse(const std::string &str) {
Identifier id;
- int assigned = sscanf(str.c_str(), UUID_FORMAT_STRING,
- &id.data_[0], &id.data_[1], &id.data_[2], &id.data_[3],
- &id.data_[4], &id.data_[5],
- &id.data_[6], &id.data_[7],
- &id.data_[8], &id.data_[9],
- &id.data_[10], &id.data_[11], &id.data_[12], &id.data_[13], &id.data_[14], &id.data_[15]);
- if (assigned != 16) {
- return utils::nullopt;
+ // xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx is 36 long: 16 bytes * 2 hex digits / byte + 4 hyphens
+ if (str.length() != 36) return {};
+ int charIdx = 0;
+ int byteIdx = 0;
+ auto input = reinterpret_cast<const uint8_t*>(str.c_str());
+
+ // [xxxxxxxx]-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ while (byteIdx < 4) {
+ if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+ }
+ // xxxxxxxx[-]xxxx-xxxx-xxxx-xxxxxxxxxxxx
+ if (input[charIdx++] != '-') return {};
+
+ // xxxxxxxx-[xxxx-xxxx-xxxx-]xxxxxxxxxxxx - 3x 2 bytes and a hyphen
+ for (size_t idx = 0; idx < 3; ++idx) {
+ if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+ if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
+ if (input[charIdx++] != '-') return {};
+ }
+
+ // xxxxxxxx-xxxx-xxxx-xxxx-[xxxxxxxxxxxx] - the rest, i.e. until byte 16
+ while (byteIdx < 16) {
+ if (!parseByte(id.data_, input, charIdx, byteIdx)) return {};
}
return id;
}
+bool Identifier::parseByte(Data &data, const uint8_t *input, int &charIdx, int &byteIdx) {
+ uint8_t upper, lower;
+ if (!StringUtils::from_hex(input[charIdx++], upper)
+ || !StringUtils::from_hex(input[charIdx++], lower)) {
+ return false;
+ }
+ data[byteIdx++] = (upper << 4) | lower;
+ return true;
+}
+
IdGenerator::IdGenerator()
: implementation_(UUID_TIME_IMPL),
logger_(logging::LoggerFactory<IdGenerator>::getLogger()),
diff --git a/libminifi/src/utils/StringUtils.cpp b/libminifi/src/utils/StringUtils.cpp
index 881f721..7adae95 100644
--- a/libminifi/src/utils/StringUtils.cpp
+++ b/libminifi/src/utils/StringUtils.cpp
@@ -167,6 +167,14 @@
return result_string;
}
+bool StringUtils::from_hex(uint8_t ch, uint8_t& output) {
+ if (ch > 127) {
+ return false;
+ }
+ output = hex_lut[ch];
+ return output != SKIP;
+}
+
bool StringUtils::from_hex(uint8_t* data, size_t* data_length, const char* hex, size_t hex_length) {
if (*data_length < hex_length / 2) {
return false;
diff --git a/libminifi/test/unit/IdTests.cpp b/libminifi/test/unit/IdTests.cpp
index ca8b45c..efccf2d 100644
--- a/libminifi/test/unit/IdTests.cpp
+++ b/libminifi/test/unit/IdTests.cpp
@@ -163,6 +163,33 @@
LogTestController::getInstance().reset();
}
+TEST_CASE("Test parse invalid", "[id]") {
+ TestController test_controller;
+
+ LogTestController::getInstance().setDebug<utils::IdGenerator>();
+ std::shared_ptr<minifi::Properties> id_props = std::make_shared<minifi::Properties>();
+ id_props->set("uid.implementation", "time");
+
+ std::shared_ptr<utils::IdGenerator> generator = utils::IdGenerator::getIdGenerator();
+ generator->initialize(id_props);
+
+ const std::map<std::string, bool> test_cases{
+ {"12", false}, // drastically shorter
+ {"12345678-1234-1234-1234-123456789abc", true}, // ok
+ {"12345678-1234-1234-1234-123456789ab", false}, // shorter by one
+ {"12345678-1234-1234-1234-123456789abc0", false}, // longer by one ('0')
+ {"123456789-123-1234-1234-123456789abc", false}, // first block is longer, but second shorter
+ {"12345678-1234-1234-1234-123456789abZ", false}, // contains invalid character at the end
+ {"123456780123401234012340123456789abc", false}, // missing delimiters
+ {"12345678-12-34-1234-1234-123456789ab", false}, // exra hyphen
+ {"utter garbage but exactly 36 chars ", false} // utter garbage
+ };
+
+ for (const auto& it : test_cases) {
+ REQUIRE(static_cast<bool>(utils::Identifier::parse(it.first)) == it.second);
+ }
+}
+
TEST_CASE("Test to_string", "[id]") {
TestController test_controller;