blob: b75fb51b333fd5eb32b0a3ae06dd86e129fdf41e [file]
/**
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <list>
#include <fstream>
#include <memory>
#include <utility>
#include <string>
#include <set>
#include <iostream>
#include "unit/TestBase.h"
#include "unit/Catch.h"
#include "core/Core.h"
#include "unit/ProvenanceTestHelper.h"
#include "repository/VolatileContentRepository.h"
#include "unit/TestUtils.h"
#include "minifi-cpp/core/FlowFile.h"
#include "core/Processor.h"
#include "minifi-cpp/core/ProcessContext.h"
#include "core/ProcessSession.h"
#include "GetFile.h"
#include "ExtractText.h"
#include "LogAttribute.h"
const char* TEST_TEXT = "Test text";
const char* REGEX_TEST_TEXT = "Speed limit 130 | Speed limit 80";
const char* TEST_FILE = "test_file.txt";
const char* TEST_ATTR = "ExtractedText";
TEST_CASE("Test creation of ExtractText", "[extracttextCreate]") {
TestController testController;
auto processor = minifi::test::utils::make_processor<org::apache::nifi::minifi::processors::ExtractText>("processorname");
REQUIRE(processor->getName() == "processorname");
utils::Identifier processoruuid = processor->getUUID();
REQUIRE(processoruuid);
}
TEST_CASE("Test usage of ExtractText", "[extracttextTest]") {
TestController testController;
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::ExtractText>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::GetFile>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::LogAttribute>();
LogTestController::getInstance().setTrace<core::ProcessSession>();
LogTestController::getInstance().setTrace<core::repository::VolatileContentRepository>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::Connection>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::core::Connectable>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::core::FlowFile>();
std::shared_ptr<TestPlan> plan = testController.createPlan();
std::shared_ptr<TestRepository> repo = std::make_shared<TestRepository>();
auto temp_dir = testController.createTempDirectory();
REQUIRE(!temp_dir.empty());
auto getfile = plan->addProcessor("GetFile", "getfileCreate2");
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::Directory, temp_dir.string());
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::KeepSourceFile, "true");
auto maprocessor = plan->addProcessor("ExtractText", "testExtractText", core::Relationship("success", "description"), true);
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::Attribute, TEST_ATTR);
plan->addProcessor("LogAttribute", "outputLogAttribute", core::Relationship("success", "description"), true);
auto test_file_path = temp_dir / TEST_FILE;
std::ofstream test_file(test_file_path);
if (test_file.is_open()) {
test_file << TEST_TEXT;
test_file.close();
}
plan->runNextProcessor(); // GetFile
plan->runNextProcessor(); // ExtractText
plan->runNextProcessor(); // LogAttribute
std::stringstream ss2;
ss2 << "key:" << TEST_ATTR << " value:" << TEST_TEXT;
std::string log_check = ss2.str();
REQUIRE(LogTestController::getInstance().contains(log_check));
plan->reset();
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::SizeLimit, "4");
LogTestController::getInstance().reset();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::LogAttribute>();
std::ofstream test_file_2(test_file_path.string() + "2");
if (test_file_2.is_open()) {
test_file_2 << TEST_TEXT << std::endl;
test_file_2.close();
}
plan->runNextProcessor(); // GetFile
plan->runNextProcessor(); // ExtractText
plan->runNextProcessor(); // LogAttribute
REQUIRE(LogTestController::getInstance().contains(log_check, std::chrono::seconds(0)) == false);
ss2.str("");
ss2 << "key:" << TEST_ATTR << " value:" << "Test";
log_check = ss2.str();
REQUIRE(LogTestController::getInstance().contains(log_check));
LogTestController::getInstance().reset();
}
TEST_CASE("Test usage of ExtractText in regex mode", "[extracttextRegexTest]") {
TestController testController;
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::ExtractText>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::GetFile>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::LogAttribute>();
std::shared_ptr<TestPlan> plan = testController.createPlan();
std::shared_ptr<TestRepository> repo = std::make_shared<TestRepository>();
auto dir = testController.createTempDirectory();
REQUIRE(!dir.empty());
auto getfile = plan->addProcessor("GetFile", "getfileCreate2");
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::Directory, dir.string());
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::KeepSourceFile, "true");
auto maprocessor = plan->addProcessor("ExtractText", "testExtractText", core::Relationship("success", "description"), true);
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::RegexMode, "true");
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::EnableRepeatingCaptureGroup, "true");
plan->setDynamicProperty(maprocessor, "RegexAttr", "Speed limit ([0-9]+)");
plan->setDynamicProperty(maprocessor, "InvalidRegex", "[Invalid)A(F)");
plan->addProcessor("LogAttribute", "outputLogAttribute", core::Relationship("success", "description"), true);
auto test_file_path = dir / TEST_FILE;
std::ofstream test_file(test_file_path);
if (test_file.is_open()) {
test_file << REGEX_TEST_TEXT;
test_file.close();
}
std::list<std::string> expected_logs;
SECTION("Do not include capture group 0") {
plan->setProperty(maprocessor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero, "false");
testController.runSession(plan);
expected_logs = {
"key:RegexAttr value:130",
"key:RegexAttr.0 value:130",
"key:RegexAttr.1 value:80"
};
}
SECTION("Include capture group 0") {
testController.runSession(plan);
expected_logs = {
"key:RegexAttr value:Speed limit 130",
"key:RegexAttr.0 value:Speed limit 130",
"key:RegexAttr.1 value:130",
"key:RegexAttr.2 value:Speed limit 80",
"key:RegexAttr.3 value:80"
};
}
for (const auto& log : expected_logs) {
REQUIRE(LogTestController::getInstance().contains(log));
}
std::string error_str = "error encountered when trying to construct regular expression from property (key: InvalidRegex)";
REQUIRE(LogTestController::getInstance().contains(error_str));
LogTestController::getInstance().reset();
}
TEST_CASE("Test usage of ExtractText in regex mode with large regex matches", "[extracttextRegexTest]") {
TestController test_controller;
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::ExtractText>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::GetFile>();
LogTestController::getInstance().setTrace<org::apache::nifi::minifi::processors::LogAttribute>();
std::shared_ptr<TestPlan> plan = test_controller.createPlan();
std::shared_ptr<TestRepository> repo = std::make_shared<TestRepository>();
auto dir = test_controller.createTempDirectory();
REQUIRE(!dir.empty());
auto getfile = plan->addProcessor("GetFile", "GetFile");
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::Directory, dir.string());
plan->setProperty(getfile, org::apache::nifi::minifi::processors::GetFile::KeepSourceFile, "true");
auto extract_text_processor = plan->addProcessor("ExtractText", "ExtractText", core::Relationship("success", "description"), true);
plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::RegexMode, "true");
plan->setProperty(extract_text_processor, org::apache::nifi::minifi::processors::ExtractText::IncludeCaptureGroupZero, "false");
plan->setDynamicProperty(extract_text_processor, "RegexAttr", "Speed limit (.*)");
plan->addProcessor("LogAttribute", "outputLogAttribute", core::Relationship("success", "description"), true);
std::string additional_long_string(100'000, '.');
minifi::test::utils::putFileToDir(dir, TEST_FILE, "Speed limit 80" + additional_long_string);
test_controller.runSession(plan);
REQUIRE(LogTestController::getInstance().contains("key:RegexAttr.0 value:80"));
LogTestController::getInstance().reset();
}