blob: 6df1011128c8acafeaa2be1368293823b61b3f23 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
using System;
using System.Collections;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
public class StringArray : BinaryArray, IReadOnlyList<string>, ICollection<string>
{
public static readonly Encoding DefaultEncoding = Encoding.UTF8;
private Dictionary<Encoding, string[]> materializedStringStore;
public new class Builder : BuilderBase<StringArray, Builder>
{
public Builder() : base(StringType.Default) { }
protected override StringArray Build(ArrayData data)
{
return new StringArray(data);
}
public Builder Append(string value, Encoding encoding = null)
{
if (value == null)
{
return AppendNull();
}
encoding = encoding ?? DefaultEncoding;
byte[] span = encoding.GetBytes(value);
return Append(span.AsSpan());
}
public Builder AppendRange(IEnumerable<string> values, Encoding encoding = null)
{
foreach (string value in values)
{
Append(value, encoding);
}
return this;
}
}
public StringArray(ArrayData data)
: base(ArrowTypeId.String, data) { }
public StringArray(int length,
ArrowBuffer valueOffsetsBuffer,
ArrowBuffer dataBuffer,
ArrowBuffer nullBitmapBuffer,
int nullCount = 0, int offset = 0)
: this(new ArrayData(StringType.Default, length, nullCount, offset,
new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
{ }
public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
/// <summary>
/// Get the string value at the given index
/// </summary>
/// <param name="index">Input index</param>
/// <param name="encoding">Optional: the string encoding, default is UTF8</param>
/// <returns>The string object at the given index</returns>
public string GetString(int index, Encoding encoding = default)
{
encoding ??= DefaultEncoding;
if (materializedStringStore != null && materializedStringStore.TryGetValue(encoding, out string[] materializedStrings))
{
return materializedStrings[index];
}
ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
if (isNull)
{
return null;
}
if (bytes.Length == 0)
{
return string.Empty;
}
unsafe
{
fixed (byte* data = &MemoryMarshal.GetReference(bytes))
return encoding.GetString(data, bytes.Length);
}
}
/// <summary>
/// Materialize the array for the given encoding to accelerate the string access
/// </summary>
/// <param name="encoding">Optional: the string encoding, default is UTF8</param>
/// <remarks>This method is not thread safe when it is called in parallel with <see cref="GetString(int, Encoding)"/> or <see cref="Materialize(Encoding)"/>.</remarks>
public void Materialize(Encoding encoding = default)
{
encoding ??= DefaultEncoding;
if (IsMaterialized(encoding))
{
return;
}
if (materializedStringStore == null)
{
materializedStringStore = new Dictionary<Encoding, string[]>();
}
var stringStore = new string[Length];
for (int i = 0; i < Length; i++)
{
stringStore[i] = GetString(i, encoding);
}
materializedStringStore[encoding] = stringStore;
}
/// <summary>
/// Check if the array has been materialized for the given encoding
/// </summary>
/// <param name="encoding">Optional: the string encoding, default is UTF8</param>
/// <returns>True of false whether the array has been materialized</returns>
public bool IsMaterialized(Encoding encoding = default)
{
if (materializedStringStore == null)
{
return false;
}
encoding ??= DefaultEncoding;
return materializedStringStore.ContainsKey(encoding);
}
int IReadOnlyCollection<string>.Count => Length;
string IReadOnlyList<string>.this[int index] => GetString(index);
IEnumerator<string> IEnumerable<string>.GetEnumerator()
{
for (int index = 0; index < Length; index++)
{
yield return GetString(index);
};
}
IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<string>)this).GetEnumerator();
int ICollection<string>.Count => Length;
bool ICollection<string>.IsReadOnly => true;
void ICollection<string>.Add(string item) => throw new NotSupportedException("Collection is read-only.");
bool ICollection<string>.Remove(string item) => throw new NotSupportedException("Collection is read-only.");
void ICollection<string>.Clear() => throw new NotSupportedException("Collection is read-only.");
bool ICollection<string>.Contains(string item)
{
for (int index = 0; index < Length; index++)
{
if (GetString(index) == item)
return true;
}
return false;
}
void ICollection<string>.CopyTo(string[] array, int arrayIndex)
{
for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++)
{
array[destIndex] = GetString(srcIndex);
}
}
}
}