title: Row Format sidebar_position: 8 id: cpp_row_format license: | Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the “License”); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
This page covers the row-based serialization format for high-performance, cache-friendly data access.
Apache Fory™ Row Format is a binary format optimized for:
| Use Case | Row Format | Object Graph |
|---|---|---|
| Analytics/OLAP | ✅ | ❌ |
| Random field access | ✅ | ❌ |
| Full object serialization | ❌ | ✅ |
| Complex object graphs | ❌ | ✅ |
| Reference tracking | ❌ | ✅ |
| Cross-language (simple types) | ✅ | ✅ |
#include "fory/encoder/row_encoder.h" #include "fory/row/writer.h" using namespace fory::row; using namespace fory::row::encoder; // Define a struct struct Person { int32_t id; std::string name; float score; }; // Register field metadata (required for row encoding) FORY_FIELD_INFO(Person, id, name, score); int main() { // Create encoder RowEncoder<Person> encoder; // Encode a person Person person{1, "Alice", 95.5f}; encoder.Encode(person); // Get the encoded row auto row = encoder.GetWriter().ToRow(); // Random access to fields int32_t id = row->GetInt32(0); std::string name = row->GetString(1); float score = row->GetFloat(2); assert(id == 1); assert(name == "Alice"); assert(score == 95.5f); return 0; }
The RowEncoder<T> template class provides type-safe encoding:
#include "fory/encoder/row_encoder.h" // Define struct with FORY_FIELD_INFO struct Point { double x; double y; }; FORY_FIELD_INFO(Point, x, y); // Create encoder RowEncoder<Point> encoder; // Access schema (for inspection) const Schema& schema = encoder.GetSchema(); std::cout << "Fields: " << schema.field_names().size() << std::endl; // Encode value Point p{1.0, 2.0}; encoder.Encode(p); // Get result as Row auto row = encoder.GetWriter().ToRow();
struct Address { std::string city; std::string country; }; FORY_FIELD_INFO(Address, city, country); struct Person { std::string name; Address address; }; FORY_FIELD_INFO(Person, name, address); // Encode nested struct RowEncoder<Person> encoder; Person person{"Alice", {"New York", "USA"}}; encoder.Encode(person); auto row = encoder.GetWriter().ToRow(); std::string name = row->GetString(0); // Access nested struct auto address_row = row->GetStruct(1); std::string city = address_row->GetString(0); std::string country = address_row->GetString(1);
struct Record { std::vector<int32_t> values; std::string label; }; FORY_FIELD_INFO(Record, values, label); RowEncoder<Record> encoder; Record record{{1, 2, 3, 4, 5}, "test"}; encoder.Encode(record); auto row = encoder.GetWriter().ToRow(); auto array = row->GetArray(0); int count = array->num_elements(); for (int i = 0; i < count; i++) { int32_t value = array->GetInt32(i); }
// Encode a vector directly (not inside a struct) std::vector<Person> people{ {"Alice", {"NYC", "USA"}}, {"Bob", {"London", "UK"}} }; RowEncoder<decltype(people)> encoder; encoder.Encode(people); // Get array data auto array = encoder.GetWriter().CopyToArrayData(); auto first_person = array->GetStruct(0); std::string first_name = first_person->GetString(0);
The Row class provides random access to struct fields:
class Row { public: // Null check bool IsNullAt(int i) const; // Primitive getters bool GetBoolean(int i) const; int8_t GetInt8(int i) const; int16_t GetInt16(int i) const; int32_t GetInt32(int i) const; int64_t GetInt64(int i) const; float GetFloat(int i) const; double GetDouble(int i) const; // String/binary getter std::string GetString(int i) const; std::vector<uint8_t> GetBinary(int i) const; // Nested types std::shared_ptr<Row> GetStruct(int i) const; std::shared_ptr<ArrayData> GetArray(int i) const; std::shared_ptr<MapData> GetMap(int i) const; // Metadata int num_fields() const; SchemaPtr schema() const; // Debug std::string ToString() const; };
The ArrayData class provides access to list/array elements:
class ArrayData { public: // Null check bool IsNullAt(int i) const; // Element count int num_elements() const; // Primitive getters (same as Row) int32_t GetInt32(int i) const; // ... other primitives // String getter std::string GetString(int i) const; // Nested types std::shared_ptr<Row> GetStruct(int i) const; std::shared_ptr<ArrayData> GetArray(int i) const; std::shared_ptr<MapData> GetMap(int i) const; // Type info ListTypePtr type() const; };
The MapData class provides access to map key-value pairs:
class MapData { public: // Element count int num_elements(); // Access keys and values as arrays std::shared_ptr<ArrayData> keys_array(); std::shared_ptr<ArrayData> values_array(); // Type info MapTypePtr type(); };
Schemas define the structure of row data:
#include "fory/row/schema.h" using namespace fory::row; // Create schema programmatically auto person_schema = schema({ field("id", int32()), field("name", utf8()), field("score", float32()), field("active", boolean()) }); // Access schema info for (const auto& f : person_schema->fields()) { std::cout << f->name() << ": " << f->type()->name() << std::endl; }
Available types for row format:
// Primitive types DataTypePtr boolean(); // bool DataTypePtr int8(); // int8_t DataTypePtr int16(); // int16_t DataTypePtr int32(); // int32_t DataTypePtr int64(); // int64_t DataTypePtr float32(); // float DataTypePtr float64(); // double // String and binary DataTypePtr utf8(); // std::string DataTypePtr binary(); // std::vector<uint8_t> // Complex types DataTypePtr list(DataTypePtr element_type); DataTypePtr map(DataTypePtr key_type, DataTypePtr value_type); DataTypePtr struct_(std::vector<FieldPtr> fields);
The RowEncodeTrait template automatically infers types:
// Type inference for primitives RowEncodeTrait<int32_t>::Type(); // Returns int32() RowEncodeTrait<float>::Type(); // Returns float32() RowEncodeTrait<std::string>::Type(); // Returns utf8() // Type inference for collections RowEncodeTrait<std::vector<int32_t>>::Type(); // Returns list(int32()) // Type inference for maps RowEncodeTrait<std::map<std::string, int32_t>>::Type(); // Returns map(utf8(), int32()) // Type inference for structs (requires FORY_FIELD_INFO) RowEncodeTrait<Person>::Type(); // Returns struct_({...}) RowEncodeTrait<Person>::Schema(); // Returns schema({...})
For manual row construction:
#include "fory/row/writer.h" // Create schema auto my_schema = schema({ field("x", int32()), field("y", float64()), field("name", utf8()) }); // Create writer RowWriter writer(my_schema); writer.Reset(); // Write fields writer.Write(0, 42); // x = 42 writer.Write(1, 3.14); // y = 3.14 writer.WriteString(2, "test"); // name = "test" // Get result auto row = writer.ToRow();
For manual array construction:
// Create array type auto array_type = list(int32()); // Create writer ArrayWriter writer(array_type); writer.Reset(5); // 5 elements // Write elements for (int i = 0; i < 5; i++) { writer.Write(i, i * 10); } // Get result auto array = writer.CopyToArrayData();
// Set null at specific index writer.SetNullAt(2); // Field 2 is null // Check null when reading if (!row->IsNullAt(2)) { std::string value = row->GetString(2); }
+------------------+--------------------+--------------------+ | Null Bitmap | Fixed-Size Data | Variable-Size Data | +------------------+--------------------+--------------------+ | ceil(n/8) B | 8 * n bytes | variable | +------------------+--------------------+--------------------+
+------------+------------------+--------------------+--------------------+ | Num Elems | Null Bitmap | Fixed-Size Data | Variable-Size Data | +------------+------------------+--------------------+--------------------+ | 8 bytes | ceil(n/8) bytes | elem_size * n | variable | +------------+------------------+--------------------+--------------------+
+------------------+------------------+ | Keys Array | Values Array | +------------------+------------------+
RowEncoder<Person> encoder; // Encode multiple records for (const auto& person : people) { encoder.Encode(person); auto row = encoder.GetWriter().ToRow(); // Process row... }
// Get buffer reference for pre-allocation auto& buffer = encoder.GetWriter().buffer(); buffer->Reserve(expected_size);
// Process in batches for better cache utilization std::vector<Person> batch; batch.reserve(BATCH_SIZE); while (hasMore()) { batch.clear(); fillBatch(batch); for (const auto& person : batch) { encoder.Encode(person); process(encoder.GetWriter().ToRow()); } }
// Point to existing buffer (zero-copy) Row row(schema); row.PointTo(buffer, offset, size); // Access fields directly from buffer int32_t id = row.GetInt32(0);
| C++ Type | Row Type | Fixed Size |
|---|---|---|
bool | boolean() | 1 byte |
int8_t | int8() | 1 byte |
int16_t | int16() | 2 bytes |
int32_t | int32() | 4 bytes |
int64_t | int64() | 8 bytes |
float | float32() | 4 bytes |
double | float64() | 8 bytes |
std::string | utf8() | Variable |
std::vector<T> | list(T) | Variable |
std::map<K,V> | map(K,V) | Variable |
std::optional<T> | Inner type | Nullable |
| Struct (FORY_FIELD_INFO) | struct_({...}) | Variable |