blob: 8a270cfdc06f2d4a8fe205c9674e19cd0dcbbcb2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "gandiva/filter.h"
#include <memory>
#include <utility>
#include "gandiva/bitmap_accumulator.h"
#include "gandiva/cache.h"
#include "gandiva/condition.h"
#include "gandiva/expr_validator.h"
#include "gandiva/llvm_generator.h"
#include "gandiva/selection_vector_impl.h"
namespace gandiva {
Filter::Filter(std::unique_ptr<LLVMGenerator> llvm_generator, SchemaPtr schema,
std::shared_ptr<Configuration> configuration)
: llvm_generator_(std::move(llvm_generator)),
schema_(schema),
configuration_(configuration) {}
Filter::~Filter() {}
Status Filter::Make(SchemaPtr schema, ConditionPtr condition,
std::shared_ptr<Configuration> configuration,
std::shared_ptr<Filter>* filter) {
ARROW_RETURN_IF(schema == nullptr, Status::Invalid("Schema cannot be null"));
ARROW_RETURN_IF(condition == nullptr, Status::Invalid("Condition cannot be null"));
ARROW_RETURN_IF(configuration == nullptr,
Status::Invalid("Configuration cannot be null"));
std::shared_ptr<Cache<ExpressionCacheKey, std::shared_ptr<llvm::MemoryBuffer>>> cache =
LLVMGenerator::GetCache();
Condition conditionToKey = *(condition.get());
ExpressionCacheKey cache_key(schema, configuration, conditionToKey);
bool is_cached = false;
std::shared_ptr<llvm::MemoryBuffer> prev_cached_obj;
prev_cached_obj = cache->GetObjectCode(cache_key);
// Verify if previous filter obj code was cached
if (prev_cached_obj != nullptr) {
is_cached = true;
}
GandivaObjectCache obj_cache(cache, cache_key);
// Build LLVM generator, and generate code for the specified expression
ARROW_ASSIGN_OR_RAISE(auto llvm_gen,
LLVMGenerator::Make(configuration, is_cached, obj_cache));
if (!is_cached) {
// Run the validation on the expression.
// Return if the expression is invalid since we will not be able to process further.
ExprValidator expr_validator(llvm_gen->types(), schema,
configuration->function_registry());
ARROW_RETURN_NOT_OK(expr_validator.Validate(condition));
}
// Set the object cache for LLVM
ARROW_RETURN_NOT_OK(llvm_gen->SetLLVMObjectCache(obj_cache));
ARROW_RETURN_NOT_OK(llvm_gen->Build({condition}, SelectionVector::Mode::MODE_NONE));
// Instantiate the filter with the completely built llvm generator
*filter = std::make_shared<Filter>(std::move(llvm_gen), schema, configuration);
filter->get()->SetBuiltFromCache(is_cached);
return Status::OK();
}
Status Filter::Evaluate(const arrow::RecordBatch& batch,
std::shared_ptr<SelectionVector> out_selection) {
const auto num_rows = batch.num_rows();
ARROW_RETURN_IF(!batch.schema()->Equals(*schema_),
Status::Invalid("RecordBatch schema must expected filter schema"));
ARROW_RETURN_IF(num_rows == 0, Status::Invalid("RecordBatch must be non-empty."));
ARROW_RETURN_IF(out_selection == nullptr,
Status::Invalid("out_selection must be non-null."));
ARROW_RETURN_IF(out_selection->GetMaxSlots() < num_rows,
Status::Invalid("Output selection vector capacity too small"));
// Allocate three local_bitmaps (one for output, one for validity, one to compute the
// intersection).
LocalBitMapsHolder bitmaps(num_rows, 3 /*local_bitmaps*/);
int64_t bitmap_size = bitmaps.GetLocalBitMapSize();
auto validity = std::make_shared<arrow::Buffer>(bitmaps.GetLocalBitMap(0), bitmap_size);
auto value = std::make_shared<arrow::Buffer>(bitmaps.GetLocalBitMap(1), bitmap_size);
auto array_data = arrow::ArrayData::Make(arrow::boolean(), num_rows, {validity, value});
// Execute the expression(s).
ARROW_RETURN_NOT_OK(llvm_generator_->Execute(batch, {array_data}));
// Compute the intersection of the value and validity.
auto result = bitmaps.GetLocalBitMap(2);
BitMapAccumulator::IntersectBitMaps(
result, {bitmaps.GetLocalBitMap(0), bitmaps.GetLocalBitMap((1))}, {0, 0}, num_rows);
return out_selection->PopulateFromBitMap(result, bitmap_size, num_rows - 1);
}
const std::string& Filter::DumpIR() { return llvm_generator_->ir(); }
void Filter::SetBuiltFromCache(bool flag) { built_from_cache_ = flag; }
bool Filter::GetBuiltFromCache() { return built_from_cache_; }
} // namespace gandiva