blob: ebb3dca0d1e0de548219d71affbc03b5aac625b1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cmath>
#include "arrow/compute/kernels/common.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
namespace arrow {
using internal::CopyBitmap;
using internal::InvertBitmap;
namespace compute {
namespace internal {
namespace {
struct IsValidOperator {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
checked_cast<BooleanScalar*>(out)->value = in.is_valid;
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
DCHECK_EQ(out->offset, 0);
DCHECK_LE(out->length, arr.length);
if (arr.MayHaveNulls()) {
// Input has nulls => output is the null (validity) bitmap.
// To avoid copying the null bitmap, slice from the starting byte offset
// and set the offset to the remaining bit offset.
out->offset = arr.offset % 8;
out->buffers[1] =
arr.offset == 0 ? arr.buffers[0]
: SliceBuffer(arr.buffers[0], arr.offset / 8,
BitUtil::BytesForBits(out->length + out->offset));
return Status::OK();
}
// Input has no nulls => output is entirely true.
ARROW_ASSIGN_OR_RAISE(out->buffers[1],
ctx->AllocateBitmap(out->length + out->offset));
BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length, true);
return Status::OK();
}
};
struct IsNullOperator {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
return Status::OK();
}
static Status Call(KernelContext* ctx, const ArrayData& arr, ArrayData* out) {
if (arr.MayHaveNulls()) {
// Input has nulls => output is the inverted null (validity) bitmap.
InvertBitmap(arr.buffers[0]->data(), arr.offset, arr.length,
out->buffers[1]->mutable_data(), out->offset);
} else {
// Input has no nulls => output is entirely false.
BitUtil::SetBitsTo(out->buffers[1]->mutable_data(), out->offset, out->length,
false);
}
return Status::OK();
}
};
struct IsNanOperator {
template <typename OutType, typename InType>
static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
return std::isnan(value);
}
};
void MakeFunction(std::string name, const FunctionDoc* doc,
std::vector<InputType> in_types, OutputType out_type,
ArrayKernelExec exec, FunctionRegistry* registry,
MemAllocation::type mem_allocation, bool can_write_into_slices) {
Arity arity{static_cast<int>(in_types.size())};
auto func = std::make_shared<ScalarFunction>(name, arity, doc);
ScalarKernel kernel(std::move(in_types), out_type, exec);
kernel.null_handling = NullHandling::OUTPUT_NOT_NULL;
kernel.can_write_into_slices = can_write_into_slices;
kernel.mem_allocation = mem_allocation;
DCHECK_OK(func->AddKernel(std::move(kernel)));
DCHECK_OK(registry->AddFunction(std::move(func)));
}
template <typename InType>
void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
DCHECK_OK(
func->AddKernel({ty}, boolean(),
applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
}
std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
AddIsNanKernel<FloatType>(float32(), func.get());
AddIsNanKernel<DoubleType>(float64(), func.get());
return func;
}
Status IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const Datum& arg0 = batch[0];
if (arg0.type()->id() == Type::NA) {
auto false_value = std::make_shared<BooleanScalar>(false);
if (arg0.kind() == Datum::SCALAR) {
out->value = false_value;
} else {
std::shared_ptr<Array> false_values;
RETURN_NOT_OK(MakeArrayFromScalar(*false_value, out->length(), ctx->memory_pool())
.Value(&false_values));
out->value = false_values->data();
}
return Status::OK();
} else {
return applicator::SimpleUnary<IsValidOperator>(ctx, batch, out);
}
}
Status IsNullExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const Datum& arg0 = batch[0];
if (arg0.type()->id() == Type::NA) {
if (arg0.kind() == Datum::SCALAR) {
out->value = std::make_shared<BooleanScalar>(true);
} else {
// Data is preallocated
ArrayData* out_arr = out->mutable_array();
BitUtil::SetBitsTo(out_arr->buffers[1]->mutable_data(), out_arr->offset,
out_arr->length, true);
}
return Status::OK();
} else {
return applicator::SimpleUnary<IsNullOperator>(ctx, batch, out);
}
}
const FunctionDoc is_valid_doc(
"Return true if non-null",
("For each input value, emit true iff the value is valid (non-null)."), {"values"});
const FunctionDoc is_null_doc("Return true if null",
("For each input value, emit true iff the value is null."),
{"values"});
const FunctionDoc is_nan_doc("Return true if NaN",
("For each input value, emit true iff the value is NaN."),
{"values"});
} // namespace
void RegisterScalarValidity(FunctionRegistry* registry) {
MakeFunction("is_valid", &is_valid_doc, {ValueDescr::ANY}, boolean(), IsValidExec,
registry, MemAllocation::NO_PREALLOCATE, /*can_write_into_slices=*/false);
MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec,
registry, MemAllocation::PREALLOCATE,
/*can_write_into_slices=*/true);
DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
}
} // namespace internal
} // namespace compute
} // namespace arrow