title: Row Format sidebar_position: 20 id: row_format license: | Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the “License”); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
This page covers the row-based serialization format for high-performance, cache-friendly data access.
Apache Fory™ Row Format is a binary format optimized for:
| Use Case | Row Format | Object Graph |
|---|---|---|
| Analytics/OLAP | ✅ | ❌ |
| Random field access | ✅ | ❌ |
| Full object serialization | ❌ | ✅ |
| Complex object graphs | ❌ | ✅ |
| Reference tracking | ❌ | ✅ |
| Cross-language (simple types) | ✅ | ✅ |
#include "fory/encoder/row_encoder.h" #include "fory/row/writer.h" using namespace fory::row; using namespace fory::row::encoder; struct Person { int32_t id; std::string name; float score; FORY_STRUCT(Person, id, name, score); }; int main() { // Create encoder RowEncoder<Person> encoder; // encode a person Person person{1, "Alice", 95.5f}; encoder.encode(person); // get the encoded row auto row = encoder.get_writer().to_row(); // Random access to fields int32_t id = row->get_int32(0); std::string name = row->get_string(1); float score = row->get_float(2); assert(id == 1); assert(name == "Alice"); assert(score == 95.5f); return 0; }
The RowEncoder<T> template class provides type-safe encoding:
#include "fory/encoder/row_encoder.h" struct Point { double x; double y; FORY_STRUCT(Point, x, y); }; // Create encoder RowEncoder<Point> encoder; // Access schema (for inspection) const Schema& schema = encoder.get_schema(); std::cout << "Fields: " << schema.field_names().size() << std::endl; // encode value Point p{1.0, 2.0}; encoder.encode(p); // get result as Row auto row = encoder.get_writer().to_row();
struct Address { std::string city; std::string country; FORY_STRUCT(Address, city, country); }; struct Person { std::string name; Address address; FORY_STRUCT(Person, name, address); }; // encode nested struct RowEncoder<Person> encoder; Person person{"Alice", {"New York", "USA"}}; encoder.encode(person); auto row = encoder.get_writer().to_row(); std::string name = row->get_string(0); // Access nested struct auto address_row = row->get_struct(1); std::string city = address_row->get_string(0); std::string country = address_row->get_string(1);
struct Record { std::vector<int32_t> values; std::string label; FORY_STRUCT(Record, values, label); }; RowEncoder<Record> encoder; Record record{{1, 2, 3, 4, 5}, "test"}; encoder.encode(record); auto row = encoder.get_writer().to_row(); auto array = row->get_array(0); int count = array->num_elements(); for (int i = 0; i < count; i++) { int32_t value = array->get_int32(i); }
// encode a vector directly (not inside a struct) std::vector<Person> people{ {"Alice", {"NYC", "USA"}}, {"Bob", {"London", "UK"}} }; RowEncoder<decltype(people)> encoder; encoder.encode(people); // get array data auto array = encoder.get_writer().copy_to_array_data(); auto first_person = array->get_struct(0); std::string first_name = first_person->get_string(0);
The Row class provides random access to struct fields:
class Row { public: // Null check bool is_null_at(int i) const; // Primitive getters bool get_boolean(int i) const; int8_t get_int8(int i) const; int16_t get_int16(int i) const; int32_t get_int32(int i) const; int64_t get_int64(int i) const; float get_float(int i) const; double get_double(int i) const; // String/binary getter std::string get_string(int i) const; std::vector<uint8_t> get_binary(int i) const; // Nested types std::shared_ptr<Row> get_struct(int i) const; std::shared_ptr<ArrayData> get_array(int i) const; std::shared_ptr<MapData> get_map(int i) const; // Metadata int num_fields() const; SchemaPtr schema() const; // Debug std::string to_string() const; };
The ArrayData class provides access to list/array elements:
class ArrayData { public: // Null check bool is_null_at(int i) const; // Element count int num_elements() const; // Primitive getters (same as Row) int32_t get_int32(int i) const; // ... other primitives // String getter std::string get_string(int i) const; // Nested types std::shared_ptr<Row> get_struct(int i) const; std::shared_ptr<ArrayData> get_array(int i) const; std::shared_ptr<MapData> get_map(int i) const; // Type info ListTypePtr type() const; };
The MapData class provides access to map key-value pairs:
class MapData { public: // Element count int num_elements(); // Access keys and values as arrays std::shared_ptr<ArrayData> keys_array(); std::shared_ptr<ArrayData> values_array(); // Type info MapTypePtr type(); };
Schemas define the structure of row data:
#include "fory/row/schema.h" using namespace fory::row; // Create schema programmatically auto person_schema = schema({ field("id", int32()), field("name", utf8()), field("score", float32()), field("active", boolean()) }); // Access schema info for (const auto& f : person_schema->fields()) { std::cout << f->name() << ": " << f->type()->name() << std::endl; }
Available types for row format:
// Primitive types DataTypePtr boolean(); // bool DataTypePtr int8(); // int8_t DataTypePtr int16(); // int16_t DataTypePtr int32(); // int32_t DataTypePtr int64(); // int64_t DataTypePtr float32(); // float DataTypePtr float64(); // double // String and binary DataTypePtr utf8(); // std::string DataTypePtr binary(); // std::vector<uint8_t> // Complex types DataTypePtr list(DataTypePtr element_type); DataTypePtr map(DataTypePtr key_type, DataTypePtr value_type); DataTypePtr struct_(std::vector<FieldPtr> fields);
The RowEncodeTrait template automatically infers types:
// Type inference for primitives RowEncodeTrait<int32_t>::Type(); // Returns int32() RowEncodeTrait<float>::Type(); // Returns float32() RowEncodeTrait<std::string>::Type(); // Returns utf8() // Type inference for collections RowEncodeTrait<std::vector<int32_t>>::Type(); // Returns list(int32()) // Type inference for maps RowEncodeTrait<std::map<std::string, int32_t>>::Type(); // Returns map(utf8(), int32()) // Type inference for structs (requires FORY_STRUCT) RowEncodeTrait<Person>::Type(); // Returns struct_({...}) RowEncodeTrait<Person>::Schema(); // Returns schema({...})
For manual row construction:
#include "fory/row/writer.h" // Create schema auto my_schema = schema({ field("x", int32()), field("y", float64()), field("name", utf8()) }); // Create writer RowWriter writer(my_schema); writer.reset(); // write fields writer.write(0, 42); // x = 42 writer.write(1, 3.14); // y = 3.14 writer.write_string(2, "test"); // name = "test" // get result auto row = writer.to_row();
For manual array construction:
// Create array type auto array_type = list(int32()); // Create writer ArrayWriter writer(array_type); writer.reset(5); // 5 elements // write elements for (int i = 0; i < 5; i++) { writer.write(i, i * 10); } // get result auto array = writer.copy_to_array_data();
// Set null at specific index writer.set_null_at(2); // Field 2 is null // Check null when reading if (!row->is_null_at(2)) { std::string value = row->get_string(2); }
+------------------+--------------------+--------------------+ | Null Bitmap | Fixed-Size Data | Variable-Size Data | +------------------+--------------------+--------------------+ | ceil(n/8) B | 8 * n bytes | variable | +------------------+--------------------+--------------------+
+------------+------------------+--------------------+--------------------+ | Num Elems | Null Bitmap | Fixed-Size Data | Variable-Size Data | +------------+------------------+--------------------+--------------------+ | 8 bytes | ceil(n/8) bytes | elem_size * n | variable | +------------+------------------+--------------------+--------------------+
+------------------+------------------+ | Keys Array | Values Array | +------------------+------------------+
RowEncoder<Person> encoder; // encode multiple records for (const auto& person : people) { encoder.encode(person); auto row = encoder.get_writer().to_row(); // Process row... }
// get buffer reference for pre-allocation auto& buffer = encoder.get_writer().buffer(); buffer->reserve(expected_size);
// Process in batches for better cache utilization std::vector<Person> batch; batch.reserve(BATCH_SIZE); while (has_more()) { batch.clear(); fill_batch(batch); for (const auto& person : batch) { encoder.encode(person); process(encoder.get_writer().to_row()); } }
// Point to existing buffer (zero-copy) Row row(schema); row.point_to(buffer, offset, size); // Access fields directly from buffer int32_t id = row.get_int32(0);
| C++ Type | Row Type | Fixed Size |
|---|---|---|
bool | boolean() | 1 byte |
int8_t | int8() | 1 byte |
int16_t | int16() | 2 bytes |
int32_t | int32() | 4 bytes |
int64_t | int64() | 8 bytes |
float | float32() | 4 bytes |
double | float64() | 8 bytes |
std::string | utf8() | Variable |
std::vector<T> | list(T) | Variable |
std::map<K,V> | map(K,V) | Variable |
std::optional<T> | Inner type | Nullable |
| Struct (FORY_STRUCT) | struct_({...}) | Variable |