blob: e6440c1c4aa591af5a19c43ca172e4bb8eb75cea [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef GANDIVA_PROJECTOR_CACHE_KEY_H
#define GANDIVA_PROJECTOR_CACHE_KEY_H
#include <algorithm>
#include <memory>
#include <string>
#include <thread>
#include <vector>
#include "gandiva/arrow.h"
#include "gandiva/projector.h"
namespace gandiva {
class ProjectorCacheKey {
public:
ProjectorCacheKey(SchemaPtr schema, std::shared_ptr<Configuration> configuration,
ExpressionVector expression_vector, SelectionVector::Mode mode)
: schema_(schema), configuration_(configuration), mode_(mode), uniqifier_(0) {
static const int kSeedValue = 4;
size_t result = kSeedValue;
for (auto& expr : expression_vector) {
std::string expr_as_string = expr->ToString();
expressions_as_strings_.push_back(expr_as_string);
boost::hash_combine(result, expr_as_string);
UpdateUniqifier(expr_as_string);
}
boost::hash_combine(result, mode);
boost::hash_combine(result, configuration->Hash());
boost::hash_combine(result, schema_->ToString());
boost::hash_combine(result, uniqifier_);
hash_code_ = result;
}
std::size_t Hash() const { return hash_code_; }
bool operator==(const ProjectorCacheKey& other) const {
// arrow schema does not overload equality operators.
if (!(schema_->Equals(*other.schema().get(), true))) {
return false;
}
if (*configuration_ != *other.configuration_) {
return false;
}
if (expressions_as_strings_ != other.expressions_as_strings_) {
return false;
}
if (mode_ != other.mode_) {
return false;
}
if (uniqifier_ != other.uniqifier_) {
return false;
}
return true;
}
bool operator!=(const ProjectorCacheKey& other) const { return !(*this == other); }
SchemaPtr schema() const { return schema_; }
std::string ToString() const {
std::stringstream ss;
// indent, window, indent_size, null_rep and skip new lines.
arrow::PrettyPrintOptions options{0, 10, 2, "null", true};
DCHECK_OK(PrettyPrint(*schema_.get(), options, &ss));
ss << "Expressions: [";
bool first = true;
for (auto& expr : expressions_as_strings_) {
if (first) {
first = false;
} else {
ss << ", ";
}
ss << expr;
}
ss << "]";
return ss.str();
}
private:
void UpdateUniqifier(const std::string& expr) {
if (uniqifier_ == 0) {
// caching of expressions with re2 patterns causes lock contention. So, use
// multiple instances to reduce contention.
if (expr.find(" like(") != std::string::npos) {
uniqifier_ = std::hash<std::thread::id>()(std::this_thread::get_id()) % 16;
}
}
}
const SchemaPtr schema_;
const std::shared_ptr<Configuration> configuration_;
SelectionVector::Mode mode_;
std::vector<std::string> expressions_as_strings_;
size_t hash_code_;
uint32_t uniqifier_;
};
} // namespace gandiva
#endif // GANDIVA_PROJECTOR_CACHE_KEY_H