| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #pragma once |
| |
| #include "controllers/RecordSetReader.h" |
| #include "core/PropertyDefinitionBuilder.h" |
| #include "minifi-cpp/core/logging/Logger.h" |
| #include "core/logging/LoggerFactory.h" |
| #include "pugixml.hpp" |
| |
| namespace org::apache::nifi::minifi::standard { |
| |
| class XMLReader final : public core::RecordSetReaderImpl { |
| public: |
| explicit XMLReader(const std::string_view name, const utils::Identifier& uuid = {}) : RecordSetReaderImpl(name, uuid) {} |
| |
| XMLReader(XMLReader&&) = delete; |
| XMLReader(const XMLReader&) = delete; |
| XMLReader& operator=(XMLReader&&) = delete; |
| XMLReader& operator=(const XMLReader&) = delete; |
| |
| ~XMLReader() override = default; |
| |
| EXTENSIONAPI static constexpr const char* Description = "Reads XML content and creates Record objects. Records are expected in the second level of XML data, embedded in an enclosing root tag. " |
| "Types for records are inferred automatically based on the content of the XML tags. For timestamps, the format is expected to be ISO 8601 compliant."; |
| |
| EXTENSIONAPI static constexpr auto FieldNameForContent = core::PropertyDefinitionBuilder<>::createProperty("Field Name for Content") |
| .withDescription("If tags with content (e. g. <field>content</field>) are defined as nested records in the schema, the name of the tag will be used as name for the record and the value of " |
| "this property will be used as name for the field. If the tag contains subnodes besides the content (e.g. <field>content<subfield>subcontent</subfield></field>), " |
| "or a node attribute is present, we need to define a name for the text content, so that it can be distinguished from the subnodes. If this property is not set, the default " |
| "name 'value' will be used for the text content of the tag in this case.") |
| .build(); |
| EXTENSIONAPI static constexpr auto ParseXMLAttributes = core::PropertyDefinitionBuilder<>::createProperty("Parse XML Attributes") |
| .withDescription("When this property is 'true' then XML attributes are parsed and added to the record as new fields, otherwise XML attributes and their values are ignored.") |
| .isRequired(true) |
| .withValidator(core::StandardPropertyValidators::BOOLEAN_VALIDATOR) |
| .withDefaultValue("false") |
| .build(); |
| EXTENSIONAPI static constexpr auto AttributePrefix = core::PropertyDefinitionBuilder<>::createProperty("Attribute Prefix") |
| .withDescription("If this property is set, the name of attributes will be prepended with a prefix when they are added to a record.") |
| .build(); |
| EXTENSIONAPI static constexpr auto ExpectRecordsAsArray = core::PropertyDefinitionBuilder<>::createProperty("Expect Records as Array") |
| .withDescription("This property defines whether the reader expects a FlowFile to consist of a single Record or a series of Records with a \"wrapper element\". Because XML does not provide " |
| "for a way to read a series of XML documents from a stream directly, it is common to combine many XML documents by concatenating them and then wrapping the entire XML blob " |
| "with a \"wrapper element\". This property dictates whether the reader expects a FlowFile to consist of a single Record or a series of Records with a \"wrapper element\" " |
| "that will be ignored.") |
| .isRequired(true) |
| .withValidator(core::StandardPropertyValidators::BOOLEAN_VALIDATOR) |
| .withDefaultValue("false") |
| .build(); |
| |
| EXTENSIONAPI static constexpr auto Properties = std::array<core::PropertyReference, 4>{FieldNameForContent, ParseXMLAttributes, AttributePrefix, ExpectRecordsAsArray}; |
| |
| EXTENSIONAPI static constexpr bool SupportsDynamicProperties = false; |
| EXTENSIONAPI static constexpr auto ImplementsApis = std::array{ RecordSetReader::ProvidesApi }; |
| ADD_COMMON_VIRTUAL_FUNCTIONS_FOR_CONTROLLER_SERVICES |
| |
| nonstd::expected<core::RecordSet, std::error_code> read(io::InputStream& input_stream) override; |
| |
| void initialize() override { |
| setSupportedProperties(Properties); |
| } |
| void onEnable() override; |
| void yield() override {} |
| bool isRunning() const override { return getState() == core::controller::ControllerServiceState::ENABLED; } |
| bool isWorkAvailable() override { return false; } |
| |
| private: |
| void writeRecordField(core::RecordObject& record_object, const std::string& name, const std::string& value, bool write_pcdata_node = false) const; |
| void parseNodeElement(core::RecordObject& record_object, const pugi::xml_node& node) const; |
| void parseXmlNode(core::RecordObject& record_object, const pugi::xml_node& node) const; |
| void addRecordFromXmlNode(const pugi::xml_node& node, core::RecordSet& record_set) const; |
| bool parseRecordsFromXml(core::RecordSet& record_set, const std::string& xml_content) const; |
| |
| std::string field_name_for_content_; |
| bool parse_xml_attributes_ = false; |
| std::string attribute_prefix_; |
| bool expect_records_as_array_ = false; |
| std::shared_ptr<core::logging::Logger> logger_ = core::logging::LoggerFactory<XMLReader>::getLogger(); |
| }; |
| |
| } // namespace org::apache::nifi::minifi::standard |