blob: fb23106b6bc97a190d4484d7f080ba14886c4dff [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <array>
#include "arrow/compute/kernels/common.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap.h"
#include "arrow/util/bitmap_ops.h"
namespace arrow {
using internal::Bitmap;
namespace compute {
namespace {
template <typename ComputeWord>
void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArraySpan& left,
const ArraySpan& right, ArraySpan* out) {
DCHECK(left.GetNullCount() != 0 || right.GetNullCount() != 0)
<< "ComputeKleene is unnecessarily expensive for the non-null case";
Bitmap left_valid_bm{left.buffers[0].data, left.offset, left.length};
Bitmap left_data_bm{left.buffers[1].data, left.offset, left.length};
Bitmap right_valid_bm{right.buffers[0].data, right.offset, right.length};
Bitmap right_data_bm{right.buffers[1].data, right.offset, right.length};
std::array<Bitmap, 2> out_bms{Bitmap(out->buffers[0].data, out->offset, out->length),
Bitmap(out->buffers[1].data, out->offset, out->length)};
auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) {
auto left_true = left_valid & left_data;
auto left_false = left_valid & ~left_data;
auto right_true = right_valid & right_data;
auto right_false = right_valid & ~right_data;
compute_word(left_true, left_false, right_true, right_false, out_validity, out_data);
};
if (right.GetNullCount() == 0) {
std::array<Bitmap, 3> in_bms{left_valid_bm, left_data_bm, right_data_bm};
Bitmap::VisitWordsAndWrite(
in_bms, &out_bms,
[&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1)));
});
return;
}
if (left.GetNullCount() == 0) {
std::array<Bitmap, 3> in_bms{left_data_bm, right_valid_bm, right_data_bm};
Bitmap::VisitWordsAndWrite(
in_bms, &out_bms,
[&](const std::array<uint64_t, 3>& in, std::array<uint64_t, 2>* out) {
apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1)));
});
return;
}
DCHECK(left.GetNullCount() != 0 && right.GetNullCount() != 0);
std::array<Bitmap, 4> in_bms{left_valid_bm, left_data_bm, right_valid_bm,
right_data_bm};
Bitmap::VisitWordsAndWrite(
in_bms, &out_bms,
[&](const std::array<uint64_t, 4>& in, std::array<uint64_t, 2>* out) {
apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1)));
});
}
inline BooleanScalar InvertScalar(const Scalar& in) {
return in.is_valid ? BooleanScalar(!checked_cast<const BooleanScalar&>(in).value)
: BooleanScalar();
}
inline Bitmap GetBitmap(const ArraySpan& arr, int index) {
return Bitmap{arr.buffers[index].data, arr.offset, arr.length};
}
Status InvertOpExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
ArraySpan* out_span = out->array_span();
GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(batch[0].array, 1));
return Status::OK();
}
template <typename Op>
struct Commutative {
static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
ExecResult* out) {
return Op::Call(ctx, right, left, out);
}
};
struct AndOp : Commutative<AndOp> {
using Commutative<AndOp>::Call;
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (right.is_valid) {
checked_cast<const BooleanScalar&>(right).value
? GetBitmap(*out_span, 1).CopyFrom(GetBitmap(left, 1))
: GetBitmap(*out_span, 1).SetBitsTo(false);
}
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
::arrow::internal::BitmapAnd(left.buffers[1].data, left.offset, right.buffers[1].data,
right.offset, right.length, out_span->offset,
out_span->buffers[1].data);
return Status::OK();
}
};
struct KleeneAndOp : Commutative<KleeneAndOp> {
using Commutative<KleeneAndOp>::Call;
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
if (right_false) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
GetBitmap(*out_span, 1).SetBitsTo(false); // all false case
return Status::OK();
}
if (right_true) {
if (left.GetNullCount() == 0) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
} else {
GetBitmap(*out_span, 0).CopyFrom(GetBitmap(left, 0));
}
GetBitmap(*out_span, 1).CopyFrom(GetBitmap(left, 1));
return Status::OK();
}
// scalar was null: out[i] is valid iff left[i] was false
if (left.GetNullCount() == 0) {
::arrow::internal::InvertBitmap(left.buffers[1].data, left.offset, left.length,
out_span->buffers[0].data, out_span->offset);
} else {
::arrow::internal::BitmapAndNot(left.buffers[0].data, left.offset,
left.buffers[1].data, left.offset, left.length,
out_span->offset, out_span->buffers[0].data);
}
::arrow::internal::CopyBitmap(left.buffers[1].data, left.offset, left.length,
out_span->buffers[1].data, out_span->offset);
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
return AndOp::Call(ctx, left, right, out);
}
auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
uint64_t right_false, uint64_t* out_valid,
uint64_t* out_data) {
*out_data = left_true & right_true;
*out_valid = left_false | right_false | (left_true & right_true);
};
ComputeKleene(compute_word, ctx, left, right, out_span);
return Status::OK();
}
};
struct OrOp : Commutative<OrOp> {
using Commutative<OrOp>::Call;
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (right.is_valid) {
checked_cast<const BooleanScalar&>(right).value
? GetBitmap(*out_span, 1).SetBitsTo(true)
: GetBitmap(*out_span, 1).CopyFrom(GetBitmap(left, 1));
}
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
::arrow::internal::BitmapOr(left.buffers[1].data, left.offset, right.buffers[1].data,
right.offset, right.length, out_span->offset,
out_span->buffers[1].data);
return Status::OK();
}
};
struct KleeneOrOp : Commutative<KleeneOrOp> {
using Commutative<KleeneOrOp>::Call;
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
bool right_true = right.is_valid && checked_cast<const BooleanScalar&>(right).value;
bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;
if (right_true) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
GetBitmap(*out_span, 1).SetBitsTo(true); // all true case
return Status::OK();
}
if (right_false) {
if (left.GetNullCount() == 0) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
} else {
GetBitmap(*out_span, 0).CopyFrom(GetBitmap(left, 0));
}
GetBitmap(*out_span, 1).CopyFrom(GetBitmap(left, 1));
return Status::OK();
}
// scalar was null: out[i] is valid iff left[i] was true
if (left.GetNullCount() == 0) {
::arrow::internal::CopyBitmap(left.buffers[1].data, left.offset, left.length,
out_span->buffers[0].data, out_span->offset);
} else {
::arrow::internal::BitmapAnd(left.buffers[0].data, left.offset,
left.buffers[1].data, left.offset, left.length,
out_span->offset, out_span->buffers[0].data);
}
::arrow::internal::CopyBitmap(left.buffers[1].data, left.offset, left.length,
out_span->buffers[1].data, out_span->offset);
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
out_span->null_count = 0;
GetBitmap(*out_span, 0).SetBitsTo(true);
return OrOp::Call(ctx, left, right, out);
}
static auto compute_word = [](uint64_t left_true, uint64_t left_false,
uint64_t right_true, uint64_t right_false,
uint64_t* out_valid, uint64_t* out_data) {
*out_data = left_true | right_true;
*out_valid = left_true | right_true | (left_false & right_false);
};
ComputeKleene(compute_word, ctx, left, right, out_span);
return Status::OK();
}
};
struct XorOp : Commutative<XorOp> {
using Commutative<XorOp>::Call;
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (right.is_valid) {
checked_cast<const BooleanScalar&>(right).value
? GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(left, 1))
: GetBitmap(*out_span, 1).CopyFrom(GetBitmap(left, 1));
}
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
::arrow::internal::BitmapXor(left.buffers[1].data, left.offset, right.buffers[1].data,
right.offset, right.length, out_span->offset,
out_span->buffers[1].data);
return Status::OK();
}
};
struct AndNotOp {
static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (left.is_valid) {
checked_cast<const BooleanScalar&>(left).value
? GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(right, 1))
: GetBitmap(*out_span, 1).SetBitsTo(false);
}
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
return AndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
::arrow::internal::BitmapAndNot(left.buffers[1].data, left.offset,
right.buffers[1].data, right.offset, right.length,
out_span->offset, out_span->buffers[1].data);
return Status::OK();
}
};
struct KleeneAndNotOp {
static Status Call(KernelContext* ctx, const Scalar& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
bool left_true = left.is_valid && checked_cast<const BooleanScalar&>(left).value;
bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;
if (left_false) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
GetBitmap(*out_span, 1).SetBitsTo(false); // all false case
return Status::OK();
}
if (left_true) {
if (right.GetNullCount() == 0) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
} else {
GetBitmap(*out_span, 0).CopyFrom(GetBitmap(right, 0));
}
GetBitmap(*out_span, 1).CopyFromInverted(GetBitmap(right, 1));
return Status::OK();
}
// scalar was null: out[i] is valid iff right[i] was true
if (right.GetNullCount() == 0) {
::arrow::internal::CopyBitmap(right.buffers[1].data, right.offset, right.length,
out_span->buffers[0].data, out_span->offset);
} else {
::arrow::internal::BitmapAnd(right.buffers[0].data, right.offset,
right.buffers[1].data, right.offset, right.length,
out_span->offset, out_span->buffers[0].data);
}
::arrow::internal::InvertBitmap(right.buffers[1].data, right.offset, right.length,
out_span->buffers[1].data, out_span->offset);
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const Scalar& right,
ExecResult* out) {
return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const ArraySpan& left, const ArraySpan& right,
ExecResult* out) {
ArraySpan* out_span = out->array_span();
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
GetBitmap(*out_span, 0).SetBitsTo(true);
out_span->null_count = 0;
return AndNotOp::Call(ctx, left, right, out);
}
static auto compute_word = [](uint64_t left_true, uint64_t left_false,
uint64_t right_true, uint64_t right_false,
uint64_t* out_valid, uint64_t* out_data) {
*out_data = left_true & right_false;
*out_valid = left_false | right_true | (left_true & right_false);
};
ComputeKleene(compute_word, ctx, left, right, out_span);
return Status::OK();
}
};
void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec,
FunctionDoc doc, FunctionRegistry* registry,
NullHandling::type null_handling = NullHandling::INTERSECTION) {
auto func = std::make_shared<ScalarFunction>(name, Arity(arity), std::move(doc));
std::vector<InputType> in_types(arity, InputType(boolean()));
ScalarKernel kernel(std::move(in_types), boolean(), exec);
kernel.null_handling = null_handling;
DCHECK_OK(func->AddKernel(kernel));
DCHECK_OK(registry->AddFunction(std::move(func)));
}
const FunctionDoc invert_doc{"Invert boolean values", "", {"values"}};
const FunctionDoc and_doc{
"Logical 'and' boolean values",
("When a null is encountered in either input, a null is output.\n"
"For a different null behavior, see function \"and_kleene\"."),
{"x", "y"}};
const FunctionDoc and_not_doc{
"Logical 'and not' boolean values",
("When a null is encountered in either input, a null is output.\n"
"For a different null behavior, see function \"and_not_kleene\"."),
{"x", "y"}};
const FunctionDoc or_doc{
"Logical 'or' boolean values",
("When a null is encountered in either input, a null is output.\n"
"For a different null behavior, see function \"or_kleene\"."),
{"x", "y"}};
const FunctionDoc xor_doc{
"Logical 'xor' boolean values",
("When a null is encountered in either input, a null is output."),
{"x", "y"}};
const FunctionDoc and_kleene_doc{
"Logical 'and' boolean values (Kleene logic)",
("This function behaves as follows with nulls:\n\n"
"- true and null = null\n"
"- null and true = null\n"
"- false and null = false\n"
"- null and false = false\n"
"- null and null = null\n"
"\n"
"In other words, in this context a null value really means \"unknown\",\n"
"and an unknown value 'and' false is always false.\n"
"For a different null behavior, see function \"and\"."),
{"x", "y"}};
const FunctionDoc and_not_kleene_doc{
"Logical 'and not' boolean values (Kleene logic)",
("This function behaves as follows with nulls:\n\n"
"- true and not null = null\n"
"- null and not false = null\n"
"- false and not null = false\n"
"- null and not true = false\n"
"- null and not null = null\n"
"\n"
"In other words, in this context a null value really means \"unknown\",\n"
"and an unknown value 'and not' true is always false, as is false\n"
"'and not' an unknown value.\n"
"For a different null behavior, see function \"and_not\"."),
{"x", "y"}};
const FunctionDoc or_kleene_doc{
"Logical 'or' boolean values (Kleene logic)",
("This function behaves as follows with nulls:\n\n"
"- true or null = true\n"
"- null or true = true\n"
"- false or null = null\n"
"- null or false = null\n"
"- null or null = null\n"
"\n"
"In other words, in this context a null value really means \"unknown\",\n"
"and an unknown value 'or' true is always true.\n"
"For a different null behavior, see function \"or\"."),
{"x", "y"}};
} // namespace
namespace internal {
void RegisterScalarBoolean(FunctionRegistry* registry) {
// These functions can write into sliced output bitmaps
MakeFunction("invert", 1, InvertOpExec, invert_doc, registry);
MakeFunction("and", 2, applicator::SimpleBinary<AndOp>, and_doc, registry);
MakeFunction("and_not", 2, applicator::SimpleBinary<AndNotOp>, and_not_doc, registry);
MakeFunction("or", 2, applicator::SimpleBinary<OrOp>, or_doc, registry);
MakeFunction("xor", 2, applicator::SimpleBinary<XorOp>, xor_doc, registry);
MakeFunction("and_kleene", 2, applicator::SimpleBinary<KleeneAndOp>, and_kleene_doc,
registry, NullHandling::COMPUTED_PREALLOCATE);
MakeFunction("and_not_kleene", 2, applicator::SimpleBinary<KleeneAndNotOp>,
and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE);
MakeFunction("or_kleene", 2, applicator::SimpleBinary<KleeneOrOp>, or_kleene_doc,
registry, NullHandling::COMPUTED_PREALLOCATE);
}
} // namespace internal
} // namespace compute
} // namespace arrow