blob: 4fd8059f6fe70612e9f910ad95fb7df6ce464c52 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
using Apache.Arrow.Types;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using Apache.Arrow.Memory;
namespace Apache.Arrow
{
public class BinaryArray : Array
{
public class Builder : BuilderBase<BinaryArray, Builder>
{
public Builder() : base(BinaryType.Default) { }
public Builder(IArrowType dataType) : base(dataType) { }
protected override BinaryArray Build(ArrayData data)
{
return new BinaryArray(data);
}
}
public BinaryArray(ArrayData data)
: base(data)
{
data.EnsureDataType(ArrowTypeId.Binary);
data.EnsureBufferCount(3);
}
public BinaryArray(ArrowTypeId typeId, ArrayData data)
: base(data)
{
data.EnsureDataType(typeId);
data.EnsureBufferCount(3);
}
public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder>
where TArray : IArrowArray
where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
{
protected IArrowType DataType { get; }
protected TBuilder Instance => this as TBuilder;
protected ArrowBuffer.Builder<int> ValueOffsets { get; }
protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
protected int Offset { get; set; }
protected int NullCount => this.ValidityBuffer.UnsetBitCount;
protected BuilderBase(IArrowType dataType)
{
DataType = dataType;
ValueOffsets = new ArrowBuffer.Builder<int>();
ValueBuffer = new ArrowBuffer.Builder<byte>();
ValidityBuffer = new ArrowBuffer.BitmapBuilder();
// From the docs:
//
// The offsets buffer contains length + 1 signed integers (either 32-bit or 64-bit, depending on the
// logical type), which encode the start position of each slot in the data buffer. The length of the
// value in each slot is computed using the difference between the offset at that slot’s index and the
// subsequent offset.
//
// In this builder, we choose to append the first offset (zero) upon construction, and each trailing
// offset is then added after each individual item has been appended.
ValueOffsets.Append(this.Offset);
}
protected abstract TArray Build(ArrayData data);
/// <summary>
/// Gets the length of the array built so far.
/// </summary>
public int Length => ValueOffsets.Length - 1;
/// <summary>
/// Build an Arrow array from the appended contents so far.
/// </summary>
/// <param name="allocator">Optional memory allocator.</param>
/// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
public TArray Build(MemoryAllocator allocator = default)
{
var bufs = new[]
{
NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty,
ValueOffsets.Build(allocator),
ValueBuffer.Build(allocator),
};
var data = new ArrayData(
DataType,
length: Length,
NullCount,
offset: 0,
bufs);
return Build(data);
}
/// <summary>
/// Append a single null value to the array.
/// </summary>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder AppendNull()
{
// Do not add to the value buffer in the case of a null.
// Note that we do not need to increment the offset as a result.
ValidityBuffer.Append(false);
ValueOffsets.Append(Offset);
return Instance;
}
/// <summary>
/// Appends a value, consisting of a single byte, to the array.
/// </summary>
/// <param name="value">Byte value to append.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(byte value)
{
ValueBuffer.Append(value);
ValidityBuffer.Append(true);
Offset++;
ValueOffsets.Append(Offset);
return Instance;
}
/// <summary>
/// Append a value, consisting of a span of bytes, to the array.
/// </summary>
/// <remarks>
/// Note that a single value is added, which consists of arbitrarily many bytes. If multiple values are
/// to be added, use the <see cref="AppendRange"/> method.
/// </remarks>
/// <param name="span">Span of bytes to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(ReadOnlySpan<byte> span)
{
ValueBuffer.Append(span);
ValidityBuffer.Append(true);
Offset += span.Length;
ValueOffsets.Append(Offset);
return Instance;
}
/// <summary>
/// Append a value, consisting of an enumerable collection of bytes, to the array.
/// </summary>
/// <remarks>
/// Note that this method appends a single value, which may consist of arbitrarily many bytes. If multiple
/// values are to be added, use the <see cref="AppendRange(IEnumerable{byte})"/> method instead.
/// </remarks>
/// <param name="value">Enumerable collection of bytes to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Append(IEnumerable<byte> value)
{
if (value == null)
{
return AppendNull();
}
// Note: by looking at the length of the value buffer before and after, we avoid having to iterate
// through the enumerable multiple times to get both length and contents.
int priorLength = ValueBuffer.Length;
ValueBuffer.AppendRange(value);
int valueLength = ValueBuffer.Length - priorLength;
Offset += valueLength;
ValidityBuffer.Append(true);
ValueOffsets.Append(Offset);
return Instance;
}
/// <summary>
/// Append an enumerable collection of single-byte values to the array.
/// </summary>
/// <remarks>
/// Note that this method appends multiple values, each of which is a single byte. If a single value is
/// to be added, use the <see cref="Append(IEnumerable{byte})"/> method instead.
/// </remarks>
/// <param name="values">Single-byte values to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder AppendRange(IEnumerable<byte> values)
{
if (values == null)
{
throw new ArgumentNullException(nameof(values));
}
foreach (byte b in values)
{
Append(b);
}
return Instance;
}
/// <summary>
/// Append an enumerable collection of values to the array.
/// </summary>
/// <param name="values">Values to add.</param>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder AppendRange(IEnumerable<byte[]> values)
{
if (values == null)
{
throw new ArgumentNullException(nameof(values));
}
foreach (byte[] arr in values)
{
if (arr == null)
{
AppendNull();
}
else
{
Append((ReadOnlySpan<byte>)arr);
}
}
return Instance;
}
public TBuilder Reserve(int capacity)
{
// TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
ValueOffsets.Reserve(capacity + 1);
ValueBuffer.Reserve(capacity);
ValidityBuffer.Reserve(capacity + 1);
return Instance;
}
public TBuilder Resize(int length)
{
// TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
ValueOffsets.Resize(length + 1);
ValueBuffer.Resize(length);
ValidityBuffer.Resize(length + 1);
return Instance;
}
public TBuilder Swap(int i, int j)
{
// TODO: Implement
throw new NotImplementedException();
}
public TBuilder Set(int index, byte value)
{
// TODO: Implement
throw new NotImplementedException();
}
/// <summary>
/// Clear all contents appended so far.
/// </summary>
/// <returns>Returns the builder (for fluent-style composition).</returns>
public TBuilder Clear()
{
ValueOffsets.Clear();
ValueBuffer.Clear();
ValidityBuffer.Clear();
// Always write the first offset before anything has been written.
Offset = 0;
ValueOffsets.Append(Offset);
return Instance;
}
}
public BinaryArray(IArrowType dataType, int length,
ArrowBuffer valueOffsetsBuffer,
ArrowBuffer dataBuffer,
ArrowBuffer nullBitmapBuffer,
int nullCount = 0, int offset = 0)
: this(new ArrayData(dataType, length, nullCount, offset,
new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
{ }
public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
public ArrowBuffer ValueBuffer => Data.Buffers[2];
public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length + 1);
public ReadOnlySpan<byte> Values => ValueBuffer.Span.CastTo<byte>();
[MethodImpl(MethodImplOptions.AggressiveInlining)]
[Obsolete("This method has been deprecated. Please use ValueOffsets[index] instead.")]
public int GetValueOffset(int index)
{
if (index < 0 || index > Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
return ValueOffsets[index];
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public int GetValueLength(int index)
{
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
if (!IsValid(index))
{
return 0;
}
ReadOnlySpan<int> offsets = ValueOffsets;
return offsets[index + 1] - offsets[index];
}
/// <summary>
/// Get the collection of bytes, as a read-only span, at a given index in the array.
/// </summary>
/// <remarks>
/// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
/// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
/// Use the <see cref="Array.IsNull"/> method instead to reliably determine null values.
/// </remarks>
/// <param name="index">Index at which to get bytes.</param>
/// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
/// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
/// </exception>
public ReadOnlySpan<byte> GetBytes(int index)
{
if (index < 0 || index >= Length)
{
throw new ArgumentOutOfRangeException(nameof(index));
}
if (IsNull(index))
{
// Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
// is actually returned as an empty span.
return ReadOnlySpan<byte>.Empty;
}
return ValueBuffer.Span.Slice(ValueOffsets[index], GetValueLength(index));
}
}
}