blob: 21754156f9bf649f090fd272b9eb981f54271434 [file] [log] [blame]
/**
* @file ExtractText.cpp
* ExtractText class implementation
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iterator>
#include <string>
#include <memory>
#include <set>
#include <iostream>
#include <sstream>
#include "processors/ExtractText.h"
#include "core/ProcessContext.h"
#include "core/ProcessSession.h"
#include "core/FlowFile.h"
namespace org {
namespace apache {
namespace nifi {
namespace minifi {
namespace processors {
core::Property ExtractText::Attribute("Attribute", "Attribute to set from content", "");
core::Property ExtractText::SizeLimit("Size Limit", "Maximum number of bytes to read into the attribute. 0 for no limit. Default is 2MB.");
core::Relationship ExtractText::Success("success", "success operational on the flow record");
void ExtractText::initialize() {
//! Set the supported properties
std::set<core::Property> properties;
properties.insert(Attribute);
setSupportedProperties(properties);
//! Set the supported relationships
std::set<core::Relationship> relationships;
relationships.insert(Success);
setSupportedRelationships(relationships);
}
void ExtractText::onTrigger(core::ProcessContext *context, core::ProcessSession *session) {
std::shared_ptr<core::FlowFile> flowFile = session->get();
if (!flowFile) {
return;
}
ReadCallback cb(flowFile, context);
session->read(flowFile, &cb);
session->transfer(flowFile, Success);
}
int64_t ExtractText::ReadCallback::process(std::shared_ptr<io::BaseStream> stream) {
int64_t ret = 0;
uint64_t size_limit = flowFile_->getSize();
uint64_t read_size = 0;
uint64_t loop_read = max_read_;
std::string attrKey, sizeLimitStr;
ctx_->getProperty(Attribute.getName(), attrKey);
ctx_->getProperty(SizeLimit.getName(), sizeLimitStr);
if (sizeLimitStr == "")
size_limit = DEFAULT_SIZE_LIMIT;
else if (sizeLimitStr != "0")
size_limit = std::stoi(sizeLimitStr);
std::ostringstream contentStream;
std::string contentStr;
while (read_size < size_limit) {
if (size_limit - read_size < (uint64_t)max_read_)
loop_read = size_limit - read_size;
ret = stream->readData(buffer_, loop_read);
buffer_.resize(ret);
if (ret < 0) {
return -1;
}
if (ret > 0) {
contentStream.write(reinterpret_cast<const char*>(&buffer_[0]), ret);
if (contentStream.fail()) {
return -1;
}
} else {
break;
}
}
contentStr = contentStream.str();
flowFile_->setAttribute(attrKey, contentStr);
return read_size;
}
ExtractText::ReadCallback::ReadCallback(std::shared_ptr<core::FlowFile> flowFile, core::ProcessContext *ctx)
: max_read_(getpagesize()),
flowFile_(flowFile),
ctx_(ctx) {
buffer_.resize(max_read_);
}
} /* namespace processors */
} /* namespace minifi */
} /* namespace nifi */
} /* namespace apache */
} /* namespace org */