blob: 1cf6dc78a789c416558d301352e01e5ee287c5f4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using Apache.Arrow.Types;
namespace Apache.Arrow.C
{
public static class CArrowSchemaExporter
{
#if NET5_0_OR_GREATER
private static unsafe delegate* unmanaged<CArrowSchema*, void> ReleaseSchemaPtr => &ReleaseCArrowSchema;
#else
internal unsafe delegate void ReleaseArrowSchema(CArrowSchema* cArray);
private static unsafe readonly NativeDelegate<ReleaseArrowSchema> s_releaseSchema = new NativeDelegate<ReleaseArrowSchema>(ReleaseCArrowSchema);
private static IntPtr ReleaseSchemaPtr => s_releaseSchema.Pointer;
#endif
/// <summary>
/// Export a type to a <see cref="CArrowSchema"/>.
/// </summary>
/// <param name="datatype">The datatype to export</param>
/// <param name="schema">An allocated but uninitialized CArrowSchema pointer.</param>
/// <example>
/// <code>
/// CArrowSchema* exportPtr = CArrowSchema.Create();
/// CArrowSchemaExporter.ExportType(dataType, exportPtr);
/// foreign_import_function(exportPtr);
/// CArrowSchema.Free(exportPtr);
/// </code>
/// </example>
public static unsafe void ExportType(IArrowType datatype, CArrowSchema* schema)
{
if (datatype == null)
{
throw new ArgumentNullException(nameof(datatype));
}
if (schema == null)
{
throw new ArgumentNullException(nameof(schema));
}
schema->format = StringUtil.ToCStringUtf8(GetFormat(datatype));
schema->name = null;
schema->metadata = null;
schema->flags = GetFlags(datatype);
schema->children = ConstructChildren(datatype, out var numChildren);
schema->n_children = numChildren;
schema->dictionary = ConstructDictionary(datatype);
schema->release = ReleaseSchemaPtr;
schema->private_data = null;
}
/// <summary>
/// Export a field to a <see cref="CArrowSchema"/>.
/// </summary>
/// <param name="field">The field to export</param>
/// <param name="schema">An allocated but uninitialized CArrowSchema pointer.</param>
/// <example>
/// <code>
/// CArrowSchema* exportPtr = CArrowSchema.Create();
/// CArrowSchemaExporter.ExportType(field, exportPtr);
/// foreign_import_function(exportPtr);
/// CArrowSchema.Free(exportPtr);
/// </code>
/// </example>
public static unsafe void ExportField(Field field, CArrowSchema* schema)
{
ExportType(field.DataType, schema);
schema->name = StringUtil.ToCStringUtf8(field.Name);
schema->metadata = ConstructMetadata(field.Metadata);
schema->flags = GetFlags(field.DataType, field.IsNullable);
}
/// <summary>
/// Export a schema to a <see cref="CArrowSchema"/>.
/// </summary>
/// <param name="schema">The schema to export</param>
/// <param name="out_schema">An allocated but uninitialized CArrowSchema pointer.</param>
/// <example>
/// <code>
/// CArrowSchema* exportPtr = CArrowSchema.Create();
/// CArrowSchemaExporter.ExportType(schema, exportPtr);
/// foreign_import_function(exportPtr);
/// CArrowSchema.Free(exportPtr);
/// </code>
/// </example>
public static unsafe void ExportSchema(Schema schema, CArrowSchema* out_schema)
{
var structType = new StructType(schema.FieldsList);
ExportType(structType, out_schema);
out_schema->metadata = ConstructMetadata(schema.Metadata);
}
private static char FormatTimeUnit(TimeUnit unit) => unit switch
{
TimeUnit.Second => 's',
TimeUnit.Millisecond => 'm',
TimeUnit.Microsecond => 'u',
TimeUnit.Nanosecond => 'n',
_ => throw new InvalidDataException($"Unsupported time unit for export: {unit}"),
};
private static string FormatUnion(UnionType unionType)
{
StringBuilder builder = new StringBuilder();
builder.Append(unionType.Mode switch
{
UnionMode.Sparse => "+us:",
UnionMode.Dense => "+ud:",
_ => throw new InvalidDataException($"Unsupported union mode for export: {unionType.Mode}"),
});
for (int i = 0; i < unionType.TypeIds.Length; i++)
{
if (i > 0) { builder.Append(','); }
builder.Append(unionType.TypeIds[i]);
}
return builder.ToString();
}
private static string GetFormat(IArrowType datatype)
{
switch (datatype)
{
case NullType _: return "n";
case BooleanType _: return "b";
// Integers
case Int8Type _: return "c";
case UInt8Type _: return "C";
case Int16Type _: return "s";
case UInt16Type _: return "S";
case Int32Type _: return "i";
case UInt32Type _: return "I";
case Int64Type _: return "l";
case UInt64Type _: return "L";
// Floats
case HalfFloatType _: return "e";
case FloatType _: return "f";
case DoubleType _: return "g";
// Decimal
case Decimal32Type decimalType:
return $"d:{decimalType.Precision},{decimalType.Scale},32";
case Decimal64Type decimalType:
return $"d:{decimalType.Precision},{decimalType.Scale},64";
case Decimal128Type decimalType:
return $"d:{decimalType.Precision},{decimalType.Scale}";
case Decimal256Type decimalType:
return $"d:{decimalType.Precision},{decimalType.Scale},256";
// Binary
case BinaryType _: return "z";
case BinaryViewType _: return "vz";
case LargeBinaryType _: return "Z";
case StringType _: return "u";
case StringViewType _: return "vu";
case LargeStringType _: return "U";
case FixedSizeBinaryType binaryType:
return $"w:{binaryType.ByteWidth}";
// Date
case Date32Type _: return "tdD";
case Date64Type _: return "tdm";
// Time
case Time32Type timeType:
return String.Format("tt{0}", FormatTimeUnit(timeType.Unit));
case Time64Type timeType:
// Same prefix as Time32, but allowed time units are different.
return String.Format("tt{0}", FormatTimeUnit(timeType.Unit));
// Duration
case DurationType durationType:
return String.Format("tD{0}", FormatTimeUnit(durationType.Unit));
// Timestamp
case TimestampType timestampType:
return String.Format("ts{0}:{1}", FormatTimeUnit(timestampType.Unit), timestampType.Timezone);
// Interval
case IntervalType intervalType:
return intervalType.Unit switch
{
IntervalUnit.YearMonth => "tiM",
IntervalUnit.DayTime => "tiD",
IntervalUnit.MonthDayNanosecond => "tin",
_ => throw new InvalidDataException($"Unsupported interval unit for export: {intervalType.Unit}"),
};
// Nested
case ListType _: return "+l";
case ListViewType _: return "+vl";
case LargeListType _: return "+L";
case FixedSizeListType fixedListType:
return $"+w:{fixedListType.ListSize}";
case StructType _: return "+s";
case UnionType u: return FormatUnion(u);
case MapType _: return "+m";
// Dictionary
case DictionaryType dictionaryType:
return GetFormat(dictionaryType.IndexType);
default: throw new NotImplementedException($"Exporting {datatype.Name} not implemented");
}
}
private static long GetFlags(IArrowType datatype, bool nullable = true)
{
long flags = 0;
if (nullable)
{
flags |= CArrowSchema.ArrowFlagNullable;
}
if (datatype is DictionaryType dictionaryType)
{
if (dictionaryType.Ordered)
{
flags |= CArrowSchema.ArrowFlagDictionaryOrdered;
}
}
if (datatype is MapType mapType && mapType.KeySorted)
{
flags |= CArrowSchema.ArrowFlagMapKeysSorted;
}
return flags;
}
private static unsafe CArrowSchema** ConstructChildren(IArrowType datatype, out long numChildren)
{
if (datatype is NestedType nestedType)
{
IReadOnlyList<Field> fields = nestedType.Fields;
int numFields = fields.Count;
numChildren = numFields;
if (numFields == 0)
{
throw new NotSupportedException("Exporting nested data types with zero children.");
};
var pointerList = (CArrowSchema**)Marshal.AllocHGlobal(numFields * IntPtr.Size);
for (var i = 0; i < numChildren; i++)
{
CArrowSchema* cSchema = CArrowSchema.Create();
ExportField(fields[i], cSchema);
pointerList[i] = cSchema;
}
return pointerList;
}
else
{
numChildren = 0;
return null;
}
}
private static unsafe CArrowSchema* ConstructDictionary(IArrowType datatype)
{
if (datatype is DictionaryType dictType)
{
CArrowSchema* cSchema = CArrowSchema.Create();
ExportType(dictType.ValueType, cSchema);
return cSchema;
}
else
{
return null;
}
}
private unsafe static byte* ConstructMetadata(IReadOnlyDictionary<string, string> metadata)
{
if (metadata == null || metadata.Count == 0)
{
return null;
}
int size = 4;
int[] lengths = new int[metadata.Count * 2];
int i = 0;
foreach (KeyValuePair<string, string> pair in metadata)
{
size += 8;
lengths[i] = Encoding.UTF8.GetByteCount(pair.Key);
size += lengths[i++];
lengths[i] = Encoding.UTF8.GetByteCount(pair.Value);
size += lengths[i++];
}
IntPtr result = Marshal.AllocHGlobal(size);
Marshal.WriteInt32(result, metadata.Count);
byte* ptr = (byte*)result + 4;
i = 0;
foreach (KeyValuePair<string, string> pair in metadata)
{
WriteMetadataString(ref ptr, lengths[i++], pair.Key);
WriteMetadataString(ref ptr, lengths[i++], pair.Value);
}
Debug.Assert((long)(IntPtr)ptr - (long)result == size);
return (byte*)result;
}
private unsafe static void WriteMetadataString(ref byte* ptr, int length, string str)
{
Marshal.WriteInt32((IntPtr)ptr, length);
ptr += 4;
fixed (char* s = str)
{
Encoding.UTF8.GetBytes(s, str.Length, ptr, length);
}
ptr += length;
}
#if NET5_0_OR_GREATER
[UnmanagedCallersOnly]
#endif
private static unsafe void ReleaseCArrowSchema(CArrowSchema* schema)
{
if (schema == null) return;
if (schema->release == default) return;
Marshal.FreeHGlobal((IntPtr)schema->format);
Marshal.FreeHGlobal((IntPtr)schema->name);
Marshal.FreeHGlobal((IntPtr)schema->metadata);
schema->format = null;
schema->name = null;
schema->metadata = null;
if (schema->n_children > 0)
{
for (int i = 0; i < schema->n_children; i++)
{
CArrowSchema.Free(schema->GetChild(i));
}
Marshal.FreeHGlobal((IntPtr)schema->children);
}
if (schema->dictionary != null)
{
CArrowSchema.Free(schema->dictionary);
}
schema->flags = 0;
schema->n_children = 0;
schema->dictionary = null;
schema->children = null;
schema->release = default;
}
}
}