GH-49071: [Ruby] Add support for writing list and large list arrays (#49072)
### Rationale for this change
They use different offset size.
### What changes are included in this PR?
* Add `ArrowFormat::ListType#to_flatbuffers`
* Add `ArrowFormat::LargeListType#to_flatbuffers`
* Add `ArrowFormat::VariableSizeListArray#child`
* Add `ArrowFormat::VariableSizeListArray#each_buffer`
* `garrow_array_get_null_bitmap()` returns `NULL` when null bitmap doesn't exist
* Add `garrow_list_array_get_value_offsets_buffer()`
* Add `garrow_large_list_array_get_value_offsets_buffer()`
### Are these changes tested?
Yes.
### Are there any user-facing changes?
Yes.
* GitHub Issue: #49071
Authored-by: Sutou Kouhei <kou@clear-code.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp
index cf6e947..bf5bf60 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -1114,7 +1114,11 @@
auto arrow_array = garrow_array_get_raw(array);
auto arrow_null_bitmap = arrow_array->null_bitmap();
- return garrow_buffer_new_raw(&arrow_null_bitmap);
+ if (arrow_null_bitmap) {
+ return garrow_buffer_new_raw(&arrow_null_bitmap);
+ } else {
+ return nullptr;
+ }
}
/**
diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp
index 9bc5326..ef7502d 100644
--- a/c_glib/arrow-glib/composite-array.cpp
+++ b/c_glib/arrow-glib/composite-array.cpp
@@ -188,6 +188,22 @@
return arrow_list_array->raw_value_offsets();
};
+template <typename LIST_ARRAY_CLASS>
+GArrowBuffer *
+garrow_base_list_array_get_value_offsets_buffer(GArrowArray *array)
+{
+ GArrowBuffer *buffer = nullptr;
+ g_object_get(array, "buffer1", &buffer, nullptr);
+ if (buffer) {
+ return buffer;
+ }
+
+ auto arrow_array = garrow_array_get_raw(array);
+ auto arrow_list_array = std::static_pointer_cast<LIST_ARRAY_CLASS>(arrow_array);
+ auto arrow_buffer = arrow_list_array->value_offsets();
+ return garrow_buffer_new_raw(&arrow_buffer);
+};
+
G_BEGIN_DECLS
static void
@@ -385,6 +401,21 @@
n_offsets);
}
+/**
+ * garrow_list_array_get_value_offsets_buffer:
+ * @array: A #GArrowListArray.
+ *
+ * Returns: (transfer full) (nullable): The value offsets buffer.
+ *
+ * Since: 24.0.0
+ */
+GArrowBuffer *
+garrow_list_array_get_value_offsets_buffer(GArrowListArray *array)
+{
+ return garrow_base_list_array_get_value_offsets_buffer<arrow::ListArray>(
+ GARROW_ARRAY(array));
+}
+
typedef struct GArrowLargeListArrayPrivate_
{
GArrowArray *raw_values;
@@ -602,6 +633,21 @@
return reinterpret_cast<const gint64 *>(value_offsets);
}
+/**
+ * garrow_large_list_array_get_value_offsets_buffer:
+ * @array: A #GArrowLargeListArray.
+ *
+ * Returns: (transfer full) (nullable): The value offsets buffer.
+ *
+ * Since: 24.0.0
+ */
+GArrowBuffer *
+garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array)
+{
+ return garrow_base_list_array_get_value_offsets_buffer<arrow::LargeListArray>(
+ GARROW_ARRAY(array));
+}
+
typedef struct GArrowFixedSizeListArrayPrivate_
{
GArrowArray *raw_values;
diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h
index 117ffdf..73d8d7f 100644
--- a/c_glib/arrow-glib/composite-array.h
+++ b/c_glib/arrow-glib/composite-array.h
@@ -68,6 +68,10 @@
const gint32 *
garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets);
+GARROW_AVAILABLE_IN_24_0
+GArrowBuffer *
+garrow_list_array_get_value_offsets_buffer(GArrowListArray *array);
+
#define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type())
GARROW_AVAILABLE_IN_0_16
G_DECLARE_DERIVABLE_TYPE(
@@ -110,6 +114,10 @@
const gint64 *
garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets);
+GARROW_AVAILABLE_IN_24_0
+GArrowBuffer *
+garrow_large_list_array_get_value_offsets_buffer(GArrowLargeListArray *array);
+
#define GARROW_TYPE_FIXED_SIZE_LIST_ARRAY (garrow_fixed_size_list_array_get_type())
GARROW_AVAILABLE_IN_23_0
G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeListArray,
diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb b/ruby/red-arrow-format/lib/arrow-format/array.rb
index 825311f..df1356c 100644
--- a/ruby/red-arrow-format/lib/arrow-format/array.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/array.rb
@@ -370,12 +370,20 @@
end
class VariableSizeListArray < Array
+ attr_reader :child
def initialize(type, size, validity_buffer, offsets_buffer, child)
super(type, size, validity_buffer)
@offsets_buffer = offsets_buffer
@child = child
end
+ def each_buffer(&block)
+ return to_enum(__method__) unless block_given?
+
+ yield(@validity_buffer)
+ yield(@offsets_buffer)
+ end
+
def to_a
child_values = @child.to_a
values = @offsets_buffer.
diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb b/ruby/red-arrow-format/lib/arrow-format/field.rb
index fc5639b..3642c86 100644
--- a/ruby/red-arrow-format/lib/arrow-format/field.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/field.rb
@@ -49,7 +49,9 @@
else
fb_field.type = @type.to_flatbuffers
end
- if @type.respond_to?(:children)
+ if @type.respond_to?(:child)
+ fb_field.children = [@type.child.to_flatbuffers]
+ elsif @type.respond_to?(:children)
fb_field.children = @type.children.collect(&:to_flatbuffers)
end
# fb_field.custom_metadata = @custom_metadata
diff --git a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
index cf925ee..a641c87 100644
--- a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb
@@ -70,7 +70,9 @@
Enumerator.new do |yielder|
traverse = lambda do |array|
yielder << array
- if array.respond_to?(:children)
+ if array.respond_to?(:child)
+ traverse.call(array.child)
+ elsif array.respond_to?(:children)
array.children.each do |child_array|
traverse.call(child_array)
end
diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb
index 9ba8cae..50c392f 100644
--- a/ruby/red-arrow-format/lib/arrow-format/type.rb
+++ b/ruby/red-arrow-format/lib/arrow-format/type.rb
@@ -707,7 +707,6 @@
super()
@child = child
end
-
end
class ListType < VariableSizeListType
@@ -718,6 +717,10 @@
def build_array(size, validity_buffer, offsets_buffer, child)
ListArray.new(self, size, validity_buffer, offsets_buffer, child)
end
+
+ def to_flatbuffers
+ FB::List::Data.new
+ end
end
class LargeListType < VariableSizeListType
@@ -728,6 +731,10 @@
def build_array(size, validity_buffer, offsets_buffer, child)
LargeListArray.new(self, size, validity_buffer, offsets_buffer, child)
end
+
+ def to_flatbuffers
+ FB::LargeList::Data.new
+ end
end
class StructType < Type
diff --git a/ruby/red-arrow-format/test/test-writer.rb b/ruby/red-arrow-format/test/test-writer.rb
index 841194f..bf05f20 100644
--- a/ruby/red-arrow-format/test/test-writer.rb
+++ b/ruby/red-arrow-format/test/test-writer.rb
@@ -83,11 +83,22 @@
red_arrow_type.scale)
when Arrow::FixedSizeBinaryDataType
ArrowFormat::FixedSizeBinaryType.new(red_arrow_type.byte_width)
+ when Arrow::ListDataType
+ ArrowFormat::ListType.new(convert_field(red_arrow_type.field))
+ when Arrow::LargeListDataType
+ ArrowFormat::LargeListType.new(convert_field(red_arrow_type.field))
else
raise "Unsupported type: #{red_arrow_type.inspect}"
end
end
+ def convert_field(red_arrow_field)
+ ArrowFormat::Field.new(red_arrow_field.name,
+ convert_type(red_arrow_field.data_type),
+ red_arrow_field.nullable?,
+ nil)
+ end
+
def convert_buffer(buffer)
return nil if buffer.nil?
IO::Buffer.for(buffer.data.to_s)
@@ -111,6 +122,11 @@
type.build_array(red_arrow_array.size,
convert_buffer(red_arrow_array.null_bitmap),
convert_buffer(red_arrow_array.data_buffer))
+ when ArrowFormat::VariableSizeListType
+ type.build_array(red_arrow_array.size,
+ convert_buffer(red_arrow_array.null_bitmap),
+ convert_buffer(red_arrow_array.value_offsets_buffer),
+ convert_array(red_arrow_array.values_raw))
else
raise "Unsupported array #{red_arrow_array.inspect}"
end
@@ -706,6 +722,32 @@
@values)
end
end
+
+ sub_test_case("List") do
+ def build_array
+ data_type = Arrow::ListDataType.new(name: "count", type: :int8)
+ Arrow::ListArray.new(data_type, [[-128, 127], nil, [-1, 0, 1]])
+ end
+
+ def test_write
+ assert_equal([[-128, 127], nil, [-1, 0, 1]],
+ @values)
+ end
+ end
+
+ sub_test_case("LargeList") do
+ def build_array
+ data_type = Arrow::LargeListDataType.new(name: "count",
+ type: :int8)
+ Arrow::LargeListArray.new(data_type,
+ [[-128, 127], nil, [-1, 0, 1]])
+ end
+
+ def test_write
+ assert_equal([[-128, 127], nil, [-1, 0, 1]],
+ @values)
+ end
+ end
end
end
end