blob: 13634c7fcaaa7bd071c86e0b5ba05b2f872cb1b2 [file]
<?xml version='1.0' encoding='UTF-8' standalone='no'?>
<doxygen xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="compound.xsd" version="1.9.8" xml:lang="en-US">
<compounddef id="classiceberg_1_1ManifestMergeManager" kind="class" language="C++" prot="public">
<compoundname>iceberg::ManifestMergeManager</compoundname>
<includes refid="manifest__merge__manager_8h" local="no">manifest_merge_manager.h</includes>
<sectiondef kind="private-attrib">
<memberdef kind="variable" id="classiceberg_1_1ManifestMergeManager_1ac988c1a43f655b015ee1a0bd40ae5576" prot="private" static="no" mutable="no">
<type>const int64_t</type>
<definition>const int64_t iceberg::ManifestMergeManager::target_size_bytes_</definition>
<argsstring></argsstring>
<name>target_size_bytes_</name>
<qualifiedname>iceberg::ManifestMergeManager::target_size_bytes_</qualifiedname>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="109" column="17" bodyfile="iceberg/manifest/manifest_merge_manager.h" bodystart="109" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classiceberg_1_1ManifestMergeManager_1a9fcd0106d5a31821283c4feded03d165" prot="private" static="no" mutable="no">
<type>const int32_t</type>
<definition>const int32_t iceberg::ManifestMergeManager::min_count_to_merge_</definition>
<argsstring></argsstring>
<name>min_count_to_merge_</name>
<qualifiedname>iceberg::ManifestMergeManager::min_count_to_merge_</qualifiedname>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="110" column="17" bodyfile="iceberg/manifest/manifest_merge_manager.h" bodystart="110" bodyend="-1"/>
</memberdef>
<memberdef kind="variable" id="classiceberg_1_1ManifestMergeManager_1a42863d1b1f0605700923eabb00ae2cf4" prot="private" static="no" mutable="no">
<type>const bool</type>
<definition>const bool iceberg::ManifestMergeManager::merge_enabled_</definition>
<argsstring></argsstring>
<name>merge_enabled_</name>
<qualifiedname>iceberg::ManifestMergeManager::merge_enabled_</qualifiedname>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="111" column="14" bodyfile="iceberg/manifest/manifest_merge_manager.h" bodystart="111" bodyend="-1"/>
</memberdef>
</sectiondef>
<sectiondef kind="public-func">
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1a57f9f370ec6637dd378f4c18d7ff0f76" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>iceberg::ManifestMergeManager::ManifestMergeManager</definition>
<argsstring>(int64_t target_size_bytes, int32_t min_count_to_merge, bool merge_enabled)</argsstring>
<name>ManifestMergeManager</name>
<qualifiedname>iceberg::ManifestMergeManager::ManifestMergeManager</qualifiedname>
<param>
<type>int64_t</type>
<declname>target_size_bytes</declname>
</param>
<param>
<type>int32_t</type>
<declname>min_count_to_merge</declname>
</param>
<param>
<type>bool</type>
<declname>merge_enabled</declname>
</param>
<briefdescription>
<para>Construct a merge manager with the given configuration. </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>target_size_bytes</parametername>
</parameternamelist>
<parameterdescription>
<para>Target output manifest size in bytes </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>min_count_to_merge</parametername>
</parameternamelist>
<parameterdescription>
<para>Minimum number of manifests before any merging occurs </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>merge_enabled</parametername>
</parameternamelist>
<parameterdescription>
<para>Whether merging is enabled at all </para>
</parameterdescription>
</parameteritem>
</parameterlist>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="53" column="3" bodyfile="iceberg/manifest/manifest_merge_manager.cc" bodystart="37" bodyend="41"/>
</memberdef>
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1a48fd9f3a16df724b1f0ef2f7ffe43ea3" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type></type>
<definition>iceberg::ManifestMergeManager::ManifestMergeManager</definition>
<argsstring>(const ManifestMergeManager &amp;)=delete</argsstring>
<name>ManifestMergeManager</name>
<qualifiedname>iceberg::ManifestMergeManager::ManifestMergeManager</qualifiedname>
<param>
<type>const <ref refid="classiceberg_1_1ManifestMergeManager" kindref="compound">ManifestMergeManager</ref> &amp;</type>
</param>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="56" column="3"/>
</memberdef>
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1aff044321809cec4a7c7a2426a102c735" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type><ref refid="classiceberg_1_1ManifestMergeManager" kindref="compound">ManifestMergeManager</ref> &amp;</type>
<definition>ManifestMergeManager &amp; iceberg::ManifestMergeManager::operator=</definition>
<argsstring>(const ManifestMergeManager &amp;)=delete</argsstring>
<name>operator=</name>
<qualifiedname>iceberg::ManifestMergeManager::operator=</qualifiedname>
<param>
<type>const <ref refid="classiceberg_1_1ManifestMergeManager" kindref="compound">ManifestMergeManager</ref> &amp;</type>
</param>
<briefdescription>
</briefdescription>
<detaileddescription>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="57" column="24"/>
</memberdef>
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1a2ec764940fd557109ac9dc77e44b7742" prot="public" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type>Result&lt; std::vector&lt; <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> &gt; &gt;</type>
<definition>Result&lt; std::vector&lt; ManifestFile &gt; &gt; iceberg::ManifestMergeManager::MergeManifests</definition>
<argsstring>(const std::vector&lt; ManifestFile &gt; &amp;existing_manifests, const std::vector&lt; ManifestFile &gt; &amp;new_manifests, int64_t snapshot_id, const TableMetadata &amp;metadata, std::shared_ptr&lt; FileIO &gt; file_io, const ManifestWriterFactory &amp;writer_factory)</argsstring>
<name>MergeManifests</name>
<qualifiedname>iceberg::ManifestMergeManager::MergeManifests</qualifiedname>
<param>
<type>const std::vector&lt; <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> &gt; &amp;</type>
<declname>existing_manifests</declname>
</param>
<param>
<type>const std::vector&lt; <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> &gt; &amp;</type>
<declname>new_manifests</declname>
</param>
<param>
<type>int64_t</type>
<declname>snapshot_id</declname>
</param>
<param>
<type>const <ref refid="structiceberg_1_1TableMetadata" kindref="compound">TableMetadata</ref> &amp;</type>
<declname>metadata</declname>
</param>
<param>
<type>std::shared_ptr&lt; <ref refid="classiceberg_1_1FileIO" kindref="compound">FileIO</ref> &gt;</type>
<declname>file_io</declname>
</param>
<param>
<type>const <ref refid="manifest__writer_8h_1a0fcb4cda5197f4d8c18772a445455cc2" kindref="member">ManifestWriterFactory</ref> &amp;</type>
<declname>writer_factory</declname>
</param>
<briefdescription>
<para>Merge existing and new manifests according to configured thresholds. </para>
</briefdescription>
<detaileddescription>
<para>Manifests are grouped by (partition_spec_id, content) — data and delete manifests are never merged together. Within each group, a greedy bin-packing algorithm combines manifests up to target_size_bytes. The bin that contains the newest manifest for that content type is protected by min_count_to_merge: if it has fewer than that many items it is passed through unchanged.</para>
<para><simplesect kind="note"><para>Retry and rollback cleanup are handled by the caller that owns created manifest paths. TODO(Guotao): Add explicit replaced-manifest tracking here if callers need direct access.</para>
</simplesect>
<parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>existing_manifests</parametername>
</parameternamelist>
<parameterdescription>
<para>Manifests already in the base snapshot </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>new_manifests</parametername>
</parameternamelist>
<parameterdescription>
<para>Newly written manifests to incorporate </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>snapshot_id</parametername>
</parameternamelist>
<parameterdescription>
<para>The ID of the snapshot being committed. Used to preserve ADDED/DELETED status for entries written by this snapshot and to suppress stale DELETED tombstones from prior snapshots. </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>metadata</parametername>
</parameternamelist>
<parameterdescription>
<para><ref refid="classiceberg_1_1Table" kindref="compound">Table</ref> metadata (provides specs and schema for readers) </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>file_io</parametername>
</parameternamelist>
<parameterdescription>
<para>File IO used to open existing manifests for reading </para>
</parameterdescription>
</parameteritem>
<parameteritem>
<parameternamelist>
<parametername>writer_factory</parametername>
</parameternamelist>
<parameterdescription>
<para>Factory to create new <ref refid="classiceberg_1_1ManifestWriter" kindref="compound">ManifestWriter</ref> instances </para>
</parameterdescription>
</parameteritem>
</parameterlist>
<simplesect kind="return"><para>The merged manifest list, or an error </para>
</simplesect>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="81" column="10" bodyfile="iceberg/manifest/manifest_merge_manager.cc" bodystart="43" bodyend="92"/>
</memberdef>
</sectiondef>
<sectiondef kind="private-func">
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1ac7be96444b47e0ac615ea415a5aa3d28" prot="private" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type>Result&lt; std::vector&lt; <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> &gt; &gt;</type>
<definition>Result&lt; std::vector&lt; ManifestFile &gt; &gt; iceberg::ManifestMergeManager::MergeGroup</definition>
<argsstring>(const std::vector&lt; const ManifestFile * &gt; &amp;group, const ManifestFile *first, int64_t snapshot_id, const TableMetadata &amp;metadata, std::shared_ptr&lt; FileIO &gt; file_io, const ManifestWriterFactory &amp;writer_factory)</argsstring>
<name>MergeGroup</name>
<qualifiedname>iceberg::ManifestMergeManager::MergeGroup</qualifiedname>
<param>
<type>const std::vector&lt; const <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> * &gt; &amp;</type>
<declname>group</declname>
</param>
<param>
<type>const <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> *</type>
<declname>first</declname>
</param>
<param>
<type>int64_t</type>
<declname>snapshot_id</declname>
</param>
<param>
<type>const <ref refid="structiceberg_1_1TableMetadata" kindref="compound">TableMetadata</ref> &amp;</type>
<declname>metadata</declname>
</param>
<param>
<type>std::shared_ptr&lt; <ref refid="classiceberg_1_1FileIO" kindref="compound">FileIO</ref> &gt;</type>
<declname>file_io</declname>
</param>
<param>
<type>const <ref refid="manifest__writer_8h_1a0fcb4cda5197f4d8c18772a445455cc2" kindref="member">ManifestWriterFactory</ref> &amp;</type>
<declname>writer_factory</declname>
</param>
<briefdescription>
<para>Merge a group of manifests sharing the same spec_id. </para>
</briefdescription>
<detaileddescription>
<para><parameterlist kind="param"><parameteritem>
<parameternamelist>
<parametername>first</parametername>
</parameternamelist>
<parameterdescription>
<para>The overall first (newest) manifest across all groups, used to apply the min_count_to_merge threshold on the bin that contains it. </para>
</parameterdescription>
</parameteritem>
</parameterlist>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="92" column="10" bodyfile="iceberg/manifest/manifest_merge_manager.cc" bodystart="94" bodyend="148"/>
</memberdef>
<memberdef kind="function" id="classiceberg_1_1ManifestMergeManager_1a8c2a1206f8ddecdbb296c22012900c4f" prot="private" static="no" const="no" explicit="no" inline="no" virt="non-virtual">
<type>Result&lt; <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> &gt;</type>
<definition>Result&lt; ManifestFile &gt; iceberg::ManifestMergeManager::FlushBin</definition>
<argsstring>(const std::vector&lt; const ManifestFile * &gt; &amp;bin, int64_t snapshot_id, const TableMetadata &amp;metadata, std::shared_ptr&lt; FileIO &gt; file_io, const ManifestWriterFactory &amp;writer_factory)</argsstring>
<name>FlushBin</name>
<qualifiedname>iceberg::ManifestMergeManager::FlushBin</qualifiedname>
<param>
<type>const std::vector&lt; const <ref refid="structiceberg_1_1ManifestFile" kindref="compound">ManifestFile</ref> * &gt; &amp;</type>
<declname>bin</declname>
</param>
<param>
<type>int64_t</type>
<declname>snapshot_id</declname>
</param>
<param>
<type>const <ref refid="structiceberg_1_1TableMetadata" kindref="compound">TableMetadata</ref> &amp;</type>
<declname>metadata</declname>
</param>
<param>
<type>std::shared_ptr&lt; <ref refid="classiceberg_1_1FileIO" kindref="compound">FileIO</ref> &gt;</type>
<declname>file_io</declname>
</param>
<param>
<type>const <ref refid="manifest__writer_8h_1a0fcb4cda5197f4d8c18772a445455cc2" kindref="member">ManifestWriterFactory</ref> &amp;</type>
<declname>writer_factory</declname>
</param>
<briefdescription>
<para>Write a merged manifest from all manifests in a bin. </para>
</briefdescription>
<detaileddescription>
<para>Entries are written snapshot-aware:<itemizedlist>
<listitem><para>ADDED from snapshot_id → WriteAddedEntry (preserve status)</para>
</listitem><listitem><para>DELETED from snapshot_id → WriteDeletedEntry (preserve tombstone)</para>
</listitem><listitem><para>DELETED from older snapshots → dropped (stale tombstones are not carried forward)</para>
</listitem><listitem><para>All other entries → WriteExistingEntry </para>
</listitem></itemizedlist>
</para>
</detaileddescription>
<inbodydescription>
</inbodydescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="104" column="10" bodyfile="iceberg/manifest/manifest_merge_manager.cc" bodystart="150" bodyend="189"/>
</memberdef>
</sectiondef>
<briefdescription>
<para>Merges small manifests into larger ones using greedy bin-packing. </para>
</briefdescription>
<detaileddescription>
<para>Manifests are grouped by partition_spec_id before merging; manifests with different spec IDs are never merged together. Within a group, manifests are accumulated into bins until a bin would exceed target_size_bytes, at which point the bin is flushed (written) and a new one started. Manifests already larger than target_size_bytes pass through unchanged.</para>
<para><simplesect kind="note"><para>This class is non-copyable and non-movable. </para>
</simplesect>
</para>
</detaileddescription>
<location file="iceberg/manifest/manifest_merge_manager.h" line="46" column="1" bodyfile="iceberg/manifest/manifest_merge_manager.h" bodystart="46" bodyend="112"/>
<listofallmembers>
<member refid="classiceberg_1_1ManifestMergeManager_1a8c2a1206f8ddecdbb296c22012900c4f" prot="private" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>FlushBin</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1a57f9f370ec6637dd378f4c18d7ff0f76" prot="public" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>ManifestMergeManager</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1a48fd9f3a16df724b1f0ef2f7ffe43ea3" prot="public" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>ManifestMergeManager</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1a42863d1b1f0605700923eabb00ae2cf4" prot="private" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>merge_enabled_</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1ac7be96444b47e0ac615ea415a5aa3d28" prot="private" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>MergeGroup</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1a2ec764940fd557109ac9dc77e44b7742" prot="public" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>MergeManifests</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1a9fcd0106d5a31821283c4feded03d165" prot="private" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>min_count_to_merge_</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1aff044321809cec4a7c7a2426a102c735" prot="public" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>operator=</name></member>
<member refid="classiceberg_1_1ManifestMergeManager_1ac988c1a43f655b015ee1a0bd40ae5576" prot="private" virt="non-virtual"><scope>iceberg::ManifestMergeManager</scope><name>target_size_bytes_</name></member>
</listofallmembers>
</compounddef>
</doxygen>