blob: 404105b1070ca0a5d08d36463e7050f2c97ac643 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "gandiva/like_holder.h"
#include <regex>
#include "gandiva/node.h"
#include "gandiva/regex_util.h"
namespace gandiva {
RE2 LikeHolder::starts_with_regex_(R"((\w|\s)*\.\*)");
RE2 LikeHolder::ends_with_regex_(R"(\.\*(\w|\s)*)");
// Short-circuit pattern matches for the two common sub cases :
// - starts_with and ends_with.
const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
std::shared_ptr<LikeHolder> holder;
auto status = Make(node, &holder);
if (status.ok()) {
std::string& pattern = holder->pattern_;
auto literal_type = node.children().at(1)->return_type();
if (RE2::FullMatch(pattern, starts_with_regex_)) {
auto prefix = pattern.substr(0, pattern.length() - 2); // trim .*
auto prefix_node =
std::make_shared<LiteralNode>(literal_type, LiteralHolder(prefix), false);
return FunctionNode("starts_with", {node.children().at(0), prefix_node},
node.return_type());
} else if (RE2::FullMatch(pattern, ends_with_regex_)) {
auto suffix = pattern.substr(2); // skip .*
auto suffix_node =
std::make_shared<LiteralNode>(literal_type, LiteralHolder(suffix), false);
return FunctionNode("ends_with", {node.children().at(0), suffix_node},
node.return_type());
}
}
// Could not optimize, return original node.
return node;
}
static bool IsArrowStringLiteral(arrow::Type::type type) {
return type == arrow::Type::STRING || type == arrow::Type::BINARY;
}
Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
ARROW_RETURN_IF(node.children().size() != 2,
Status::Invalid("'like' function requires two parameters"));
auto literal = dynamic_cast<LiteralNode*>(node.children().at(1).get());
ARROW_RETURN_IF(
literal == nullptr,
Status::Invalid("'like' function requires a literal as the second parameter"));
auto literal_type = literal->return_type()->id();
ARROW_RETURN_IF(
!IsArrowStringLiteral(literal_type),
Status::Invalid(
"'like' function requires a string literal as the second parameter"));
return Make(arrow::util::get<std::string>(literal->holder()), holder);
}
Status LikeHolder::Make(const std::string& sql_pattern,
std::shared_ptr<LikeHolder>* holder) {
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));
*holder = lholder;
return Status::OK();
}
} // namespace gandiva