feat: Add support for List data types (#39)
### Rationale within the changes
This PR refactors and extends support for nested types in the Arrow
integration. The current implementation of `ArrowNestedType` is tailored
primarily for data structs, as seen in `StructBufferBuilder`. However,
it lacks broader support and certain expected functionalities, such as
`loadStructArrayBuilder`.
To address this, the following improvements have been made:
- Renamed `ArrowNestedType` to `ArrowTypeStruct` to align with naming
conventions used elsewhere in the codebase.
- Introduced initial support for `ArrowTypeList`, including nested
lists.
For simplicity, instead of introducing a dedicated subtype for lists,
this PR uses an interface of `[Any?]?`. If this approach proves
insufficient, there are more explicit alternatives that can be explored.
**NOTE:** Work on `ArrowCExporter` and `ArrowCImporter` has been
intentionally deferred. These components require a deeper understanding
of memory ownership and child parsing, and I believe it's better to be
addressed in a future PR, unless it's strict necessary.
### What's Changed
1. Renamed `ArrowNestedType -> ArrowTypeStruct`.
2. Added support for `ArrowTypeList`, including nested lists.
3. Implemented `ListArray` with basic `.asString` formatting.
4. Added `ListArrayBuilder`.
5. Extended `ArrowArrayBuilder` to support the `.list` type.
6. Implemented `loadStructArrayBuilder` and `loadListArrayBuilder`.
7. Introduced `ListBufferBuilder`.
8. Added `ArrowReader.loadListData`.
9. Added `makeListHolder`.
### Are these changes tested?
Tests are included in `ArrayTests.swift`. It's also working on internal
applications, including integration with `ArrowFlight`.
Closes #16.
---------
Co-authored-by: Marco <mgraziano@dadostech.com>
Co-authored-by: Sutou Kouhei <kou@cozmixng.org>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/Arrow/Sources/Arrow/ArrowArray.swift b/Arrow/Sources/Arrow/ArrowArray.swift
index d4ee873..759e1de 100644
--- a/Arrow/Sources/Arrow/ArrowArray.swift
+++ b/Arrow/Sources/Arrow/ArrowArray.swift
@@ -114,7 +114,9 @@
case .binary:
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
- return try ArrowArrayHolderImpl(StructArray(with))
+ return try ArrowArrayHolderImpl(NestedArray(with))
+ case .list:
+ return try ArrowArrayHolderImpl(NestedArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
@@ -355,16 +357,37 @@
}
}
-public class StructArray: ArrowArray<[Any?]> {
- public private(set) var arrowFields: [ArrowArrayHolder]?
+public class NestedArray: ArrowArray<[Any?]> {
+ private var children: [ArrowArrayHolder]?
+
public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
- var fields = [ArrowArrayHolder]()
- for child in arrowData.children {
- fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
- }
- self.arrowFields = fields
+ switch arrowData.type.id {
+ case .list:
+ guard arrowData.children.count == 1 else {
+ throw ArrowError.invalid("List array must have exactly one child")
+ }
+
+ guard let listType = arrowData.type as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList for list type ID")
+ }
+
+ self.children = [try ArrowArrayHolderImpl.loadArray(
+ listType.elementField.type,
+ with: arrowData.children[0]
+ )]
+
+ case .strct:
+ var fields = [ArrowArrayHolder]()
+ for child in arrowData.children {
+ fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
+ }
+ self.children = fields
+
+ default:
+ throw ArrowError.invalid("NestedArray only supports list and struct types, got: \(arrowData.type.id)")
+ }
}
public override subscript(_ index: UInt) -> [Any?]? {
@@ -372,36 +395,104 @@
return nil
}
- if let fields = arrowFields {
- var result = [Any?]()
- for field in fields {
- result.append(field.array.asAny(index))
- }
-
- return result
+ guard let children = self.children else {
+ return nil
}
- return nil
+ switch arrowData.type.id {
+ case .list:
+ guard let values = children.first else { return nil }
+
+ let offsets = self.arrowData.buffers[1]
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+
+ let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
+ let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).load(as: Int32.self)
+
+ var items = [Any?]()
+ for i in startOffset..<endOffset {
+ items.append(values.array.asAny(UInt(i)))
+ }
+
+ return items
+
+ case .strct:
+ var result = [Any?]()
+ for field in children {
+ result.append(field.array.asAny(index))
+ }
+ return result
+
+ default:
+ return nil
+ }
}
public override func asString(_ index: UInt) -> String {
- if self.arrowData.isNull(index) {
- return ""
- }
+ switch arrowData.type.id {
+ case .list:
+ if self.arrowData.isNull(index) {
+ return "null"
+ }
- var output = "{"
- if let fields = arrowFields {
- for fieldIndex in 0..<fields.count {
- let asStr = fields[fieldIndex].array as? AsString
- if fieldIndex == 0 {
- output.append("\(asStr!.asString(index))")
+ guard let list = self[index] else {
+ return "null"
+ }
+
+ var output = "["
+ for (i, item) in list.enumerated() {
+ if i > 0 {
+ output.append(",")
+ }
+
+ if item == nil {
+ output.append("null")
+ } else if let asStringItem = item as? AsString {
+ output.append(asStringItem.asString(0))
} else {
- output.append(",\(asStr!.asString(index))")
+ output.append("\(item!)")
}
}
- }
+ output.append("]")
+ return output
- output += "}"
- return output
+ case .strct:
+ if self.arrowData.isNull(index) {
+ return ""
+ }
+
+ var output = "{"
+ if let children = self.children {
+ for fieldIndex in 0..<children.count {
+ let asStr = children[fieldIndex].array as? AsString
+ if fieldIndex == 0 {
+ output.append("\(asStr!.asString(index))")
+ } else {
+ output.append(",\(asStr!.asString(index))")
+ }
+ }
+ }
+ output += "}"
+ return output
+
+ default:
+ return ""
+ }
+ }
+
+ public var isListArray: Bool {
+ return arrowData.type.id == .list
+ }
+
+ public var isStructArray: Bool {
+ return arrowData.type.id == .strct
+ }
+
+ public var fields: [ArrowArrayHolder]? {
+ return arrowData.type.id == .strct ? children : nil
+ }
+
+ public var values: ArrowArrayHolder? {
+ return arrowData.type.id == .list ? children?.first : nil
}
}
diff --git a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
index 493e43a..7db249d 100644
--- a/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
+++ b/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
@@ -125,13 +125,13 @@
}
}
-public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
+public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, NestedArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
- try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+ try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}
@@ -143,7 +143,7 @@
}
self.builders = builders
- try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
+ try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}
public override func append(_ values: [Any?]?) {
@@ -159,7 +159,7 @@
}
}
- public override func finish() throws -> StructArray {
+ public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
@@ -169,11 +169,40 @@
let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
- let structArray = try StructArray(arrowData)
+ let structArray = try NestedArray(arrowData)
return structArray
}
}
+public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, NestedArray> {
+ let valueBuilder: any ArrowArrayHolderBuilder
+
+ public override init(_ arrowType: ArrowType) throws {
+ guard let listType = arrowType as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList")
+ }
+ let arrowField = listType.elementField
+ self.valueBuilder = try ArrowArrayBuilders.loadBuilder(arrowType: arrowField.type)
+ try super.init(arrowType)
+ }
+
+ public override func append(_ values: [Any?]?) {
+ self.bufferBuilder.append(values)
+ if let vals = values {
+ for val in vals {
+ self.valueBuilder.appendAny(val)
+ }
+ }
+ }
+
+ public override func finish() throws -> NestedArray {
+ let buffers = self.bufferBuilder.finish()
+ let childData = try valueBuilder.toHolder().array.arrowData
+ let arrowData = try ArrowData(self.type, buffers: buffers, children: [childData], nullCount: self.nullCount, length: self.length)
+ return try NestedArray(arrowData)
+ }
+}
+
public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
@@ -290,6 +319,16 @@
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try TimestampArrayBuilder(timestampType.unit)
+ case .strct:
+ guard let structType = arrowType as? ArrowTypeStruct else {
+ throw ArrowError.invalid("Expected ArrowStructType for \(arrowType.id)")
+ }
+ return try StructArrayBuilder(structType.fields)
+ case .list:
+ guard let listType = arrowType as? ArrowTypeList else {
+ throw ArrowError.invalid("Expected ArrowTypeList for \(arrowType.id)")
+ }
+ return try ListArrayBuilder(listType)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
@@ -353,4 +392,12 @@
public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder {
return try TimestampArrayBuilder(unit, timezone: timezone)
}
+
+ public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws -> StructArrayBuilder {
+ return try StructArrayBuilder(fields)
+ }
+
+ public static func loadListArrayBuilder(_ listType: ArrowTypeList) throws -> ListArrayBuilder {
+ return try ListArrayBuilder(listType)
+ }
}
diff --git a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
index cc0bae0..4e518c6 100644
--- a/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
+++ b/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
@@ -338,20 +338,20 @@
public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
- var info: ArrowNestedType?
+ var info: ArrowTypeStruct?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}
public func initializeTypeInfo(_ fields: [ArrowField]) {
- info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ info = ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
- if length > self.nulls.length {
+ if self.length > self.nulls.length {
self.resize(length)
}
@@ -379,3 +379,78 @@
return [nulls]
}
}
+
+public class ListBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
+ public typealias ItemType = [Any?]
+ var offsets: ArrowBuffer
+
+ public required init() throws {
+ self.offsets = ArrowBuffer.createBuffer(1, size: UInt(MemoryLayout<Int32>.stride))
+ let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
+ super.init(nulls)
+ self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
+ }
+
+ public func append(_ count: Int) {
+ let index = UInt(self.length)
+ self.length += 1
+
+ if length >= self.offsets.length {
+ self.resize(length + 1)
+ }
+
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+ let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
+
+ BitUtility.setBit(index + self.offset, buffer: self.nulls)
+ let newOffset = currentOffset + Int32(count)
+ self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
+ }
+
+ public func append(_ newValue: [Any?]?) {
+ let index = UInt(self.length)
+ self.length += 1
+
+ if self.length >= self.offsets.length {
+ self.resize(self.length + 1)
+ }
+
+ let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
+ let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
+
+ if let vals = newValue {
+ BitUtility.setBit(index + self.offset, buffer: self.nulls)
+ let newOffset = currentOffset + Int32(vals.count)
+ self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
+ } else {
+ self.nullCount += 1
+ BitUtility.clearBit(index + self.offset, buffer: self.nulls)
+ self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: currentOffset, as: Int32.self)
+ }
+ }
+
+ public override func isNull(_ index: UInt) -> Bool {
+ return !BitUtility.isSet(index + self.offset, buffer: self.nulls)
+ }
+
+ public func resize(_ length: UInt) {
+ if length > self.offsets.length {
+ let resizeLength = resizeLength(self.offsets)
+ var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
+ var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
+ ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
+ ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
+ self.offsets = offsets
+ self.nulls = nulls
+ }
+ }
+
+ public func finish() -> [ArrowBuffer] {
+ let length = self.length
+ var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
+ var offsets = ArrowBuffer.createBuffer(length + 1, size: UInt(MemoryLayout<Int32>.size))
+ ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
+ ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: offsets.capacity)
+ return [nulls, offsets]
+ }
+}
diff --git a/Arrow/Sources/Arrow/ArrowReader.swift b/Arrow/Sources/Arrow/ArrowReader.swift
index 91d7465..bcaa234 100644
--- a/Arrow/Sources/Arrow/ArrowReader.swift
+++ b/Arrow/Sources/Arrow/ArrowReader.swift
@@ -116,6 +116,35 @@
rbLength: UInt(loadInfo.batchData.recordBatch.length))
}
+ private func loadListData(_ loadInfo: DataLoadInfo, field: org_apache_arrow_flatbuf_Field) -> Result<ArrowArrayHolder, ArrowError> {
+ guard let node = loadInfo.batchData.nextNode() else {
+ return .failure(.invalid("Node not found"))
+ }
+
+ guard let nullBuffer = loadInfo.batchData.nextBuffer() else {
+ return .failure(.invalid("Null buffer not found"))
+ }
+
+ guard let offsetBuffer = loadInfo.batchData.nextBuffer() else {
+ return .failure(.invalid("Offset buffer not found"))
+ }
+
+ let nullLength = UInt(ceil(Double(node.length) / 8))
+ let arrowNullBuffer = makeBuffer(nullBuffer, fileData: loadInfo.fileData, length: nullLength, messageOffset: loadInfo.messageOffset)
+ let arrowOffsetBuffer = makeBuffer(offsetBuffer, fileData: loadInfo.fileData, length: UInt(node.length + 1), messageOffset: loadInfo.messageOffset)
+
+ guard field.childrenCount == 1, let childField = field.children(at: 0) else {
+ return .failure(.invalid("List must have exactly one child"))
+ }
+
+ switch loadField(loadInfo, field: childField) {
+ case .success(let childHolder):
+ return makeArrayHolder(field, buffers: [arrowNullBuffer, arrowOffsetBuffer], nullCount: UInt(node.nullCount), children: [childHolder.array.arrowData], rbLength: UInt(loadInfo.batchData.recordBatch.length))
+ case .failure(let error):
+ return .failure(error)
+ }
+ }
+
private func loadPrimitiveData(
_ loadInfo: DataLoadInfo,
field: org_apache_arrow_flatbuf_Field)
@@ -178,12 +207,17 @@
_ loadInfo: DataLoadInfo,
field: org_apache_arrow_flatbuf_Field)
-> Result<ArrowArrayHolder, ArrowError> {
- if isNestedType(field.typeType) {
+ switch field.typeType {
+ case .struct_:
return loadStructData(loadInfo, field: field)
- } else if isFixedPrimitive(field.typeType) {
- return loadPrimitiveData(loadInfo, field: field)
- } else {
- return loadVariableData(loadInfo, field: field)
+ case .list:
+ return loadListData(loadInfo, field: field)
+ default:
+ if isFixedPrimitive(field.typeType) {
+ return loadPrimitiveData(loadInfo, field: field)
+ } else {
+ return loadVariableData(loadInfo, field: field)
+ }
}
}
diff --git a/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/Arrow/Sources/Arrow/ArrowReaderHelper.swift
index 37f4680..d170a6c 100644
--- a/Arrow/Sources/Arrow/ArrowReaderHelper.swift
+++ b/Arrow/Sources/Arrow/ArrowReaderHelper.swift
@@ -135,7 +135,7 @@
}
}
-func makeStructHolder(
+func makeNestedHolder(
_ field: ArrowField,
buffers: [ArrowBuffer],
nullCount: UInt,
@@ -143,10 +143,14 @@
rbLength: UInt
) -> Result<ArrowArrayHolder, ArrowError> {
do {
- let arrowData = try ArrowData(field.type,
- buffers: buffers, children: children,
- nullCount: nullCount, length: rbLength)
- return .success(ArrowArrayHolderImpl(try StructArray(arrowData)))
+ let arrowData = try ArrowData(
+ field.type,
+ buffers: buffers,
+ children: children,
+ nullCount: nullCount,
+ length: rbLength
+ )
+ return .success(ArrowArrayHolderImpl(try NestedArray(arrowData)))
} catch let error as ArrowError {
return .failure(error)
} catch {
@@ -207,7 +211,9 @@
case .timestamp:
return makeTimestampHolder(field, buffers: buffers, nullCount: nullCount)
case .strct:
- return makeStructHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
+ return makeNestedHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
+ case .list:
+ return makeNestedHolder(field, buffers: buffers, nullCount: nullCount, children: children!, rbLength: rbLength)
default:
return .failure(.unknownType("Type \(typeId) currently not supported"))
}
@@ -230,15 +236,6 @@
}
}
-func isNestedType(_ type: org_apache_arrow_flatbuf_Type_) -> Bool {
- switch type {
- case .struct_:
- return true
- default:
- return false
- }
-}
-
func findArrowType( // swiftlint:disable:this cyclomatic_complexity function_body_length
_ field: org_apache_arrow_flatbuf_Field) -> ArrowType {
let type = field.typeType
@@ -307,7 +304,14 @@
ArrowField(childField.name ?? "", type: childType, isNullable: childField.nullable))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
+ case .list:
+ guard field.childrenCount == 1, let childField = field.children(at: 0) else {
+ return ArrowType(ArrowType.ArrowUnknown)
+ }
+ let childType = findArrowType(childField)
+ let childFieldName = childField.name ?? "item"
+ return ArrowTypeList(ArrowField(childFieldName, type: childType, isNullable: childField.nullable))
default:
return ArrowType(ArrowType.ArrowUnknown)
}
diff --git a/Arrow/Sources/Arrow/ArrowType.swift b/Arrow/Sources/Arrow/ArrowType.swift
index 381078f..a238e99 100644
--- a/Arrow/Sources/Arrow/ArrowType.swift
+++ b/Arrow/Sources/Arrow/ArrowType.swift
@@ -165,7 +165,7 @@
}
}
-public class ArrowNestedType: ArrowType {
+public class ArrowTypeStruct: ArrowType {
let fields: [ArrowField]
public init(_ info: ArrowType.Info, fields: [ArrowField]) {
self.fields = fields
@@ -173,6 +173,19 @@
}
}
+public class ArrowTypeList: ArrowType {
+ public let elementField: ArrowField
+
+ public init(_ elementField: ArrowField) {
+ self.elementField = elementField
+ super.init(ArrowType.ArrowList)
+ }
+
+ public convenience init(_ elementType: ArrowType, nullable: Bool = true) {
+ self.init(ArrowField("item", type: elementType, isNullable: nullable))
+ }
+}
+
public class ArrowType {
public private(set) var info: ArrowType.Info
public static let ArrowInt8 = Info.primitiveInfo(ArrowTypeId.int8)
@@ -195,6 +208,7 @@
public static let ArrowTime64 = Info.timeInfo(ArrowTypeId.time64)
public static let ArrowTimestamp = Info.timeInfo(ArrowTypeId.timestamp)
public static let ArrowStruct = Info.complexInfo(ArrowTypeId.strct)
+ public static let ArrowList = Info.complexInfo(ArrowTypeId.list)
public init(_ info: ArrowType.Info) {
self.info = info
@@ -320,7 +334,7 @@
return MemoryLayout<Int8>.stride
case .string:
return MemoryLayout<Int8>.stride
- case .strct:
+ case .strct, .list:
return 0
default:
fatalError("Stride requested for unknown type: \(self)")
@@ -375,6 +389,20 @@
return "z"
case ArrowTypeId.string:
return "u"
+ case ArrowTypeId.strct:
+ if let structType = self as? ArrowTypeStruct {
+ var format = "+s"
+ for field in structType.fields {
+ format += try field.type.cDataFormatId
+ }
+ return format
+ }
+ throw ArrowError.invalid("Invalid struct type")
+ case ArrowTypeId.list:
+ if let listType = self as? ArrowTypeList {
+ return "+l" + (try listType.elementField.type.cDataFormatId)
+ }
+ throw ArrowError.invalid("Invalid list type")
default:
throw ArrowError.notImplemented
}
diff --git a/Arrow/Sources/Arrow/ArrowWriter.swift b/Arrow/Sources/Arrow/ArrowWriter.swift
index 24176d3..c2ff290 100644
--- a/Arrow/Sources/Arrow/ArrowWriter.swift
+++ b/Arrow/Sources/Arrow/ArrowWriter.swift
@@ -72,7 +72,7 @@
private func writeField(_ fbb: inout FlatBufferBuilder, field: ArrowField) -> Result<Offset, ArrowError> {
var fieldsOffset: Offset?
- if let nestedField = field.type as? ArrowNestedType {
+ if let nestedField = field.type as? ArrowTypeStruct {
var offsets = [Offset]()
for field in nestedField.fields {
switch writeField(&fbb, field: field) {
@@ -182,9 +182,11 @@
org_apache_arrow_flatbuf_FieldNode(length: Int64(column.length),
nullCount: Int64(column.nullCount))
offsets.append(fbb.create(struct: fieldNode))
- if let nestedType = column.type as? ArrowNestedType {
- let structArray = column.array as? StructArray
- writeFieldNodes(nestedType.fields, columns: structArray!.arrowFields!, offsets: &offsets, fbb: &fbb)
+ if let nestedType = column.type as? ArrowTypeStruct {
+ let nestedArray = column.array as? NestedArray
+ if let nestedFields = nestedArray?.fields {
+ writeFieldNodes(nestedType.fields, columns: nestedFields, offsets: &offsets, fbb: &fbb)
+ }
}
}
}
@@ -202,10 +204,12 @@
let buffer = org_apache_arrow_flatbuf_Buffer(offset: Int64(bufferOffset), length: Int64(bufferDataSize))
buffers.append(buffer)
bufferOffset += bufferDataSize
- if let nestedType = column.type as? ArrowNestedType {
- let structArray = column.array as? StructArray
- writeBufferInfo(nestedType.fields, columns: structArray!.arrowFields!,
- bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
+ if let nestedType = column.type as? ArrowTypeStruct {
+ let nestedArray = column.array as? NestedArray
+ if let nestedFields = nestedArray?.fields {
+ writeBufferInfo(nestedType.fields, columns: nestedFields,
+ bufferOffset: &bufferOffset, buffers: &buffers, fbb: &fbb)
+ }
}
}
}
@@ -251,20 +255,21 @@
private func writeRecordBatchData(
_ writer: inout DataWriter, fields: [ArrowField],
- columns: [ArrowArrayHolder])
- -> Result<Bool, ArrowError> {
+ columns: [ArrowArrayHolder]
+ ) -> Result<Bool, ArrowError> {
for index in 0 ..< fields.count {
let column = columns[index]
let colBufferData = column.getBufferData()
for var bufferData in colBufferData {
addPadForAlignment(&bufferData)
writer.append(bufferData)
- if let nestedType = column.type as? ArrowNestedType {
- guard let structArray = column.array as? StructArray else {
+ if let nestedType = column.type as? ArrowTypeStruct {
+ guard let nestedArray = column.array as? NestedArray,
+ let nestedFields = nestedArray.fields else {
return .failure(.invalid("Struct type array expected for nested type"))
}
- switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: structArray.arrowFields!) {
+ switch writeRecordBatchData(&writer, fields: nestedType.fields, columns: nestedFields) {
case .success:
continue
case .failure(let error):
diff --git a/Arrow/Sources/Arrow/ProtoUtil.swift b/Arrow/Sources/Arrow/ProtoUtil.swift
index e91580e..9440d35 100644
--- a/Arrow/Sources/Arrow/ProtoUtil.swift
+++ b/Arrow/Sources/Arrow/ProtoUtil.swift
@@ -87,7 +87,14 @@
children.append(fromProto(field: childField))
}
- arrowType = ArrowNestedType(ArrowType.ArrowStruct, fields: children)
+ arrowType = ArrowTypeStruct(ArrowType.ArrowStruct, fields: children)
+ case .list:
+ guard field.childrenCount == 1, let childField = field.children(at: 0) else {
+ arrowType = ArrowType(ArrowType.ArrowUnknown)
+ break
+ }
+ let childArrowField = fromProto(field: childField)
+ arrowType = ArrowTypeList(childArrowField)
default:
arrowType = ArrowType(ArrowType.ArrowUnknown)
}
diff --git a/Arrow/Tests/ArrowTests/ArrayTests.swift b/Arrow/Tests/ArrowTests/ArrayTests.swift
index c7142c5..e28d8bf 100644
--- a/Arrow/Tests/ArrowTests/ArrayTests.swift
+++ b/Arrow/Tests/ArrowTests/ArrayTests.swift
@@ -320,8 +320,8 @@
let structArray = try structBuilder.finish()
XCTAssertEqual(structArray.length, 3)
XCTAssertNil(structArray[1])
- XCTAssertEqual(structArray.arrowFields![0].length, 3)
- XCTAssertNil(structArray.arrowFields![0].array.asAny(1))
+ XCTAssertEqual(structArray.fields![0].length, 3)
+ XCTAssertNil(structArray.fields![0].array.asAny(1))
XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true)
XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1)
XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2)
@@ -438,4 +438,90 @@
boolBuilder.append([true, false, true, false])
XCTAssertEqual(try boolBuilder.finish()[2], true)
}
+
+ func testListArrayPrimitive() throws {
+ let listBuilder = try ListArrayBuilder(ArrowTypeList(ArrowType(ArrowType.ArrowInt32)))
+
+ listBuilder.append([Int32(1), Int32(2), Int32(3)])
+ listBuilder.append([Int32(4), Int32(5)])
+ listBuilder.append(nil)
+ listBuilder.append([Int32(6), Int32(7), Int32(8), Int32(9)])
+
+ XCTAssertEqual(listBuilder.length, 4)
+ XCTAssertEqual(listBuilder.nullCount, 1)
+
+ let listArray = try listBuilder.finish()
+ XCTAssertEqual(listArray.length, 4)
+
+ let firstList = listArray[0]
+ XCTAssertNotNil(firstList, "First list should not be nil")
+ XCTAssertEqual(firstList!.count, 3, "First list should have 3 elements")
+ XCTAssertEqual(firstList![0] as? Int32, 1)
+ XCTAssertEqual(firstList![1] as? Int32, 2)
+ XCTAssertEqual(firstList![2] as? Int32, 3)
+
+ let secondList = listArray[1]
+ XCTAssertEqual(secondList!.count, 2)
+ XCTAssertEqual(secondList![0] as? Int32, 4)
+ XCTAssertEqual(secondList![1] as? Int32, 5)
+
+ XCTAssertNil(listArray[2])
+
+ let fourthList = listArray[3]
+ XCTAssertEqual(fourthList!.count, 4)
+ XCTAssertEqual(fourthList![0] as? Int32, 6)
+ XCTAssertEqual(fourthList![3] as? Int32, 9)
+ }
+
+ func testListArrayNested() throws {
+ let innerListType = ArrowTypeList(ArrowField("item", type: ArrowType(ArrowType.ArrowInt32), isNullable: true))
+ let outerListType = ArrowTypeList(ArrowField("item", type: innerListType, isNullable: true))
+ let outerListBuilder = try ListArrayBuilder(outerListType)
+
+ guard let innerListBuilder = outerListBuilder.valueBuilder as? ListArrayBuilder else {
+ XCTFail("Failed to cast valueBuilder to ListArrayBuilder")
+ return
+ }
+
+ outerListBuilder.bufferBuilder.append(2)
+ innerListBuilder.append([Int32(1), Int32(2)])
+ innerListBuilder.append([Int32(3), Int32(4), Int32(5)])
+
+ outerListBuilder.bufferBuilder.append(1)
+ innerListBuilder.append([Int32(6)])
+
+ outerListBuilder.bufferBuilder.append(nil)
+
+ outerListBuilder.bufferBuilder.append([])
+
+ let nestedArray = try outerListBuilder.finish()
+ XCTAssertEqual(nestedArray.length, 4)
+ XCTAssertEqual(nestedArray.nullCount, 1)
+
+ let firstOuterList = nestedArray[0]!
+ XCTAssertEqual(firstOuterList.count, 2)
+
+ let firstInnerList = firstOuterList[0] as! [Any?]
+ XCTAssertEqual(firstInnerList.count, 2)
+ XCTAssertEqual(firstInnerList[0] as? Int32, 1)
+ XCTAssertEqual(firstInnerList[1] as? Int32, 2)
+
+ let secondInnerList = firstOuterList[1] as! [Any?]
+ XCTAssertEqual(secondInnerList.count, 3)
+ XCTAssertEqual(secondInnerList[0] as? Int32, 3)
+ XCTAssertEqual(secondInnerList[1] as? Int32, 4)
+ XCTAssertEqual(secondInnerList[2] as? Int32, 5)
+
+ let secondOuterList = nestedArray[1]!
+ XCTAssertEqual(secondOuterList.count, 1)
+
+ let thirdInnerList = secondOuterList[0] as! [Any?]
+ XCTAssertEqual(thirdInnerList.count, 1)
+ XCTAssertEqual(thirdInnerList[0] as? Int32, 6)
+
+ XCTAssertNil(nestedArray[2])
+
+ let emptyList = nestedArray[3]!
+ XCTAssertEqual(emptyList.count, 0)
+ }
}
diff --git a/Arrow/Tests/ArrowTests/IPCTests.swift b/Arrow/Tests/ArrowTests/IPCTests.swift
index 26f38ce..ea44b77 100644
--- a/Arrow/Tests/ArrowTests/IPCTests.swift
+++ b/Arrow/Tests/ArrowTests/IPCTests.swift
@@ -90,10 +90,12 @@
XCTAssertEqual(recordBatch.schema.fields.count, 1)
XCTAssertEqual(recordBatch.schema.fields[0].name, "my struct")
XCTAssertEqual(recordBatch.schema.fields[0].type.id, .strct)
- let structArray = recordBatch.columns[0].array as? StructArray
- XCTAssertEqual(structArray!.arrowFields!.count, 2)
- XCTAssertEqual(structArray!.arrowFields![0].type.id, .string)
- XCTAssertEqual(structArray!.arrowFields![1].type.id, .boolean)
+ let nestedArray = recordBatch.columns[0].array as? NestedArray
+ XCTAssertNotNil(nestedArray)
+ XCTAssertNotNil(nestedArray!.fields)
+ XCTAssertEqual(nestedArray!.fields!.count, 2)
+ XCTAssertEqual(nestedArray!.fields![0].type.id, .string)
+ XCTAssertEqual(nestedArray!.fields![1].type.id, .boolean)
let column = recordBatch.columns[0]
let str = column.array as? AsString
XCTAssertEqual("\(str!.asString(0))", "{0,false}")
@@ -121,14 +123,14 @@
func makeStructSchema() -> ArrowSchema {
let testObj = StructTest()
var fields = [ArrowField]()
- let buildStructType = {() -> ArrowNestedType in
+ let buildStructType = {() -> ArrowTypeStruct in
let mirror = Mirror(reflecting: testObj)
for (property, value) in mirror.children {
let arrowType = ArrowType(ArrowType.infoForType(type(of: value)))
fields.append(ArrowField(property!, type: arrowType, isNullable: true))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
return ArrowSchema.Builder()
@@ -515,8 +517,8 @@
XCTAssertEqual(recordBatch.schema.fields.count, 1)
XCTAssertEqual(recordBatch.schema.fields[0].name, "struct1")
XCTAssertEqual(recordBatch.schema.fields[0].type.id, .strct)
- XCTAssertTrue(recordBatch.schema.fields[0].type is ArrowNestedType)
- let nestedType = (recordBatch.schema.fields[0].type as? ArrowNestedType)!
+ XCTAssertTrue(recordBatch.schema.fields[0].type is ArrowTypeStruct)
+ let nestedType = (recordBatch.schema.fields[0].type as? ArrowTypeStruct)!
XCTAssertEqual(nestedType.fields.count, 14)
let columns = recordBatch.columns
XCTAssertEqual(columns[0].nullCount, 1)
@@ -524,23 +526,24 @@
let structVal =
"\((columns[0].array as? AsString)!.asString(0))"
XCTAssertEqual(structVal, "{true,1,2,3,4,5,6,7,8,9.9,10.1,11,12,\(currentDate)}")
- let structArray = (recordBatch.columns[0].array as? StructArray)!
- XCTAssertEqual(structArray.length, 3)
- XCTAssertEqual(structArray.arrowFields!.count, 14)
- XCTAssertEqual(structArray.arrowFields![0].type.id, .boolean)
- XCTAssertEqual(structArray.arrowFields![1].type.id, .int8)
- XCTAssertEqual(structArray.arrowFields![2].type.id, .int16)
- XCTAssertEqual(structArray.arrowFields![3].type.id, .int32)
- XCTAssertEqual(structArray.arrowFields![4].type.id, .int64)
- XCTAssertEqual(structArray.arrowFields![5].type.id, .uint8)
- XCTAssertEqual(structArray.arrowFields![6].type.id, .uint16)
- XCTAssertEqual(structArray.arrowFields![7].type.id, .uint32)
- XCTAssertEqual(structArray.arrowFields![8].type.id, .uint64)
- XCTAssertEqual(structArray.arrowFields![9].type.id, .double)
- XCTAssertEqual(structArray.arrowFields![10].type.id, .float)
- XCTAssertEqual(structArray.arrowFields![11].type.id, .string)
- XCTAssertEqual(structArray.arrowFields![12].type.id, .binary)
- XCTAssertEqual(structArray.arrowFields![13].type.id, .date64)
+ let nestedArray = (recordBatch.columns[0].array as? NestedArray)!
+ XCTAssertEqual(nestedArray.length, 3)
+ XCTAssertNotNil(nestedArray.fields)
+ XCTAssertEqual(nestedArray.fields!.count, 14)
+ XCTAssertEqual(nestedArray.fields![0].type.id, .boolean)
+ XCTAssertEqual(nestedArray.fields![1].type.id, .int8)
+ XCTAssertEqual(nestedArray.fields![2].type.id, .int16)
+ XCTAssertEqual(nestedArray.fields![3].type.id, .int32)
+ XCTAssertEqual(nestedArray.fields![4].type.id, .int64)
+ XCTAssertEqual(nestedArray.fields![5].type.id, .uint8)
+ XCTAssertEqual(nestedArray.fields![6].type.id, .uint16)
+ XCTAssertEqual(nestedArray.fields![7].type.id, .uint32)
+ XCTAssertEqual(nestedArray.fields![8].type.id, .uint64)
+ XCTAssertEqual(nestedArray.fields![9].type.id, .double)
+ XCTAssertEqual(nestedArray.fields![10].type.id, .float)
+ XCTAssertEqual(nestedArray.fields![11].type.id, .string)
+ XCTAssertEqual(nestedArray.fields![12].type.id, .binary)
+ XCTAssertEqual(nestedArray.fields![13].type.id, .date64)
}
case.failure(let error):
throw error
diff --git a/Arrow/Tests/ArrowTests/TableTests.swift b/Arrow/Tests/ArrowTests/TableTests.swift
index dc5cabc..6f5482e 100644
--- a/Arrow/Tests/ArrowTests/TableTests.swift
+++ b/Arrow/Tests/ArrowTests/TableTests.swift
@@ -53,14 +53,14 @@
let testObj = StructTest()
var fields = [ArrowField]()
- let buildStructType = {() -> ArrowNestedType in
+ let buildStructType = {() -> ArrowTypeStruct in
let mirror = Mirror(reflecting: testObj)
for (property, value) in mirror.children {
let arrowType = ArrowType(ArrowType.infoForType(type(of: value)))
fields.append(ArrowField(property!, type: arrowType, isNullable: true))
}
- return ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
+ return ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}
let structType = buildStructType()