blob: 6d17b92de5f4c56c058b3ebe68e2740a5788cd17 [file] [log] [blame]
<!doctype html><html><head><meta charset=utf-8><meta http-equiv=X-UA-Compatible content="IE=edge"><meta name=viewport content="width=device-width,initial-scale=1"><meta name=description content><meta name=author content><title>Java Quickstart</title>
<link href=../css/bootstrap.css rel=stylesheet><link href=../css/markdown.css rel=stylesheet><link href=../css/katex.min.css rel=stylesheet><link href=../css/iceberg-theme.css rel=stylesheet><link href=../font-awesome-4.7.0/css/font-awesome.min.css rel=stylesheet type=text/css><link href="//fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic" rel=stylesheet type=text/css><link href=../css/termynal.css rel=stylesheet></head><body><head><script>function addAnchor(e){e.insertAdjacentHTML("beforeend",`<a href="#${e.id}" class="anchortag" ariaLabel="Anchor"> 🔗 </a>`)}document.addEventListener("DOMContentLoaded",function(){var e=document.querySelectorAll("h1[id], h2[id], h3[id], h4[id]");e&&e.forEach(addAnchor)})</script></head><nav class="navbar navbar-default" role=navigation><topsection><div class=navbar-fixed-top><div><button type=button class=navbar-toggle data-toggle=collapse data-target=div.sidebar>
<span class=sr-only>Toggle navigation</span>
<span class=icon-bar></span>
<span class=icon-bar></span>
<span class=icon-bar></span>
</button>
<a class="page-scroll navbar-brand" href=https://iceberg.apache.org/><img class=top-navbar-logo src=https://iceberg.apache.org/docs/fd-update-slack-url//img/iceberg-logo-icon.png> Apache Iceberg</a></div><div><input type=search class=form-control id=search-input placeholder=Search... maxlength=64 data-hotkeys=s/></div><div class=versions-dropdown><span>1.4.2</span> <i class="fa fa-chevron-down"></i><div class=versions-dropdown-content><ul><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../latest>latest</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.4.2>1.4.2</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.4.1>1.4.1</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.4.0>1.4.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.3.1>1.3.1</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.3.0>1.3.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.2.1>1.2.1</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.2.0>1.2.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.1.0>1.1.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../1.0.0>1.0.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.14.1>0.14.1</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.14.0>0.14.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.13.2>0.13.2</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.13.1>0.13.1</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.13.0>0.13.0</a></li><li class=versions-dropdown-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../0.12.1>0.12.1</a></li></ul></div></div></div><div class="navbar-menu-fixed-top navbar-pages-group"><div class=versions-dropdown><div class=topnav-page-selection><a href>Quickstart</a> <i class="fa fa-chevron-down"></i></div class="topnav-page-selection"><div class=versions-dropdown-content><ul><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../hive-quickstart>Hive</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../spark-quickstart>Spark</a></li class="topnav-page-selection"></ul></div></div><div class=topnav-page-selection><a id=active href=https://iceberg.apache.org/docs/fd-update-slack-url/../../docs/latest>Docs</a></div><div class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../releases>Releases</a></div class="topnav-page-selection"><div class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../roadmap>Roadmap</a></div class="topnav-page-selection"><div class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../blogs>Blogs</a></div class="topnav-page-selection"><div class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../talks>Talks</a></div class="topnav-page-selection"><div class=versions-dropdown><div class=topnav-page-selection><a href>Project</a> <i class="fa fa-chevron-down"></i></div class="topnav-page-selection"><div class=versions-dropdown-content><ul><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../community>Community</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../spec>Spec</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../view-spec>View Spec</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../puffin-spec>Puffin Spec</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../multi-engine-support>Multi-Engine Support</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../how-to-release>How To Release</a></li class="topnav-page-selection"><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../terms>Terms</a></li class="topnav-page-selection"></ul></div></div><div class=versions-dropdown><div class=topnav-page-selection><a href>Concepts</a> <i class="fa fa-chevron-down"></i></div class="topnav-page-selection"><div class=versions-dropdown-content><ul><li class=topnav-page-selection><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../catalog>Catalogs</a></li class="topnav-page-selection"></ul></div></div><div class=versions-dropdown><div class=topnav-page-selection><a href>ASF</a> <i class="fa fa-chevron-down"></i></div class="topnav-page-selection"><div class=versions-dropdown-content><ul><li class=topnav-page-selection><a target=_blank href=https://www.apache.org/foundation/sponsorship.html>Donate</a></li class="topnav-page-selection"><li class=topnav-page-selection><a target=_blank href=https://www.apache.org/events/current-event.html>Events</a></li class="topnav-page-selection"><li class=topnav-page-selection><a target=_blank href=https://www.apache.org/licenses/>License</a></li class="topnav-page-selection"><li class=topnav-page-selection><a target=_blank href=https://www.apache.org/security/>Security</a></li class="topnav-page-selection"><li class=topnav-page-selection><a target=_blank href=https://www.apache.org/foundation/thanks.html>Sponsors</a></li class="topnav-page-selection"></ul></div></div><div class=topnav-page-selection><a href=https://github.com/apache/iceberg target=_blank><img src=https://iceberg.apache.org/docs/fd-update-slack-url//img/GitHub-Mark.png target=_blank class=top-navbar-logo></a></div><div class=topnav-page-selection><a href=https://join.slack.com/t/apache-iceberg/shared_invite/zt-27f22riz7-o8nCsl5Vbc_2h6~3DF6qlw target=_blank><img src=https://iceberg.apache.org/docs/fd-update-slack-url//img/Slack_Mark_Web.png target=_blank class=top-navbar-logo></a></div></div></topsection></nav><section><div id=search-results-container><ul id=search-results></ul></div></section><body dir=ltr><section><div class="grid-container leftnav-and-toc"><div class="sidebar markdown-body"><div id=full><ul><li><a href=../><span>Introduction</span></a></li><li><a class="chevron-toggle collapsed" data-toggle=collapse data-parent=full href=#Tables><span>Tables</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=Tables class=collapse><ul class=sub-menu><li><a href=../branching/>Branching and Tagging</a></li><li><a href=../configuration/>Configuration</a></li><li><a href=../evolution/>Evolution</a></li><li><a href=../maintenance/>Maintenance</a></li><li><a href=../metrics-reporting/>Metrics Reporting</a></li><li><a href=../partitioning/>Partitioning</a></li><li><a href=../performance/>Performance</a></li><li><a href=../reliability/>Reliability</a></li><li><a href=../schemas/>Schemas</a></li></ul></div><li><a class="chevron-toggle collapsed" data-toggle=collapse data-parent=full href=#Spark><span>Spark</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=Spark class=collapse><ul class=sub-menu><li><a href=../getting-started/>Getting Started</a></li><li><a href=../spark-configuration/>Configuration</a></li><li><a href=../spark-ddl/>DDL</a></li><li><a href=../spark-procedures/>Procedures</a></li><li><a href=../spark-queries/>Queries</a></li><li><a href=../spark-structured-streaming/>Structured Streaming</a></li><li><a href=../spark-writes/>Writes</a></li></ul></div><li><a class="chevron-toggle collapsed" data-toggle=collapse data-parent=full href=#Flink><span>Flink</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=Flink class=collapse><ul class=sub-menu><li><a href=../flink/>Flink Getting Started</a></li><li><a href=../flink-connector/>Flink Connector</a></li><li><a href=../flink-ddl/>Flink DDL</a></li><li><a href=../flink-queries/>Flink Queries</a></li><li><a href=../flink-writes/>Flink Writes</a></li><li><a href=../flink-actions/>Flink Actions</a></li><li><a href=../flink-configuration/>Flink Configuration</a></li></ul></div><li><a href=../hive/><span>Hive</span></a></li><li><a target=_blank href=https://trino.io/docs/current/connector/iceberg.html><span>Trino</span></a></li><li><a target=_blank href=https://clickhouse.com/docs/en/engines/table-engines/integrations/iceberg><span>ClickHouse</span></a></li><li><a target=_blank href=https://prestodb.io/docs/current/connector/iceberg.html><span>Presto</span></a></li><li><a target=_blank href=https://docs.dremio.com/data-formats/apache-iceberg/><span>Dremio</span></a></li><li><a target=_blank href=https://docs.starrocks.io/en-us/latest/data_source/catalog/iceberg_catalog><span>StarRocks</span></a></li><li><a target=_blank href=https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg.html><span>Amazon Athena</span></a></li><li><a target=_blank href=https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-iceberg-use-cluster.html><span>Amazon EMR</span></a></li><li><a target=_blank href=https://impala.apache.org/docs/build/html/topics/impala_iceberg.html><span>Impala</span></a></li><li><a target=_blank href=https://doris.apache.org/docs/dev/lakehouse/multi-catalog/iceberg><span>Doris</span></a></li><li><a class="chevron-toggle collapsed" data-toggle=collapse data-parent=full href=#Integrations><span>Integrations</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=Integrations class=collapse><ul class=sub-menu><li><a href=../aws/>AWS</a></li><li><a href=../dell/>Dell</a></li><li><a href=../jdbc/>JDBC</a></li><li><a href=../nessie/>Nessie</a></li></ul></div><li><a class=chevron-toggle data-toggle=collapse data-parent=full href=#API><span>API</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=API class="collapse in"><ul class=sub-menu><li><a id=active href=../java-api-quickstart/>Java Quickstart</a></li><li><a href=../api/>Java API</a></li><li><a href=../custom-catalog/>Java Custom Catalog</a></li></ul></div><li><a class="chevron-toggle collapsed" data-toggle=collapse data-parent=full href=#Migration><span>Migration</span>
<i class="fa fa-chevron-right"></i>
<i class="fa fa-chevron-down"></i></a></li><div id=Migration class=collapse><ul class=sub-menu><li><a href=../table-migration/>Overview</a></li><li><a href=../hive-migration/>Hive Migration</a></li><li><a href=../delta-lake-migration/>Delta Lake Migration</a></li></ul></div><li><a href=https://iceberg.apache.org/docs/fd-update-slack-url/../../javadoc/latest><span>Javadoc</span></a></li><li><a target=_blank href=https://py.iceberg.apache.org/><span>PyIceberg</span></a></li></div></div><div id=content class=markdown-body><div class=margin-for-toc><h1 id=java-api-quickstart>Java API Quickstart</h1><h2 id=create-a-table>Create a table</h2><p>Tables are created using either a <a href=../../../javadoc/1.4.2/index.html?org/apache/iceberg/catalog/Catalog.html><code>Catalog</code></a> or an implementation of the <a href=../../../javadoc/1.4.2/index.html?org/apache/iceberg/Tables.html><code>Tables</code></a> interface.</p><h3 id=using-a-hive-catalog>Using a Hive catalog</h3><p>The Hive catalog connects to a Hive metastore to keep track of Iceberg tables.
You can initialize a Hive catalog with a name and some properties.
(see: <a href=../configuration/#catalog-properties>Catalog properties</a>)</p><p><strong>Note:</strong> Currently, <code>setConf</code> is always required for hive catalogs, but this will change in the future.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.hive.HiveCatalog;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>HiveCatalog catalog <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> HiveCatalog();
</span></span><span style=display:flex><span>catalog.<span style=color:#a6e22e>setConf</span>(spark.<span style=color:#a6e22e>sparkContext</span>().<span style=color:#a6e22e>hadoopConfiguration</span>()); <span style=color:#75715e>// Configure using Spark&#39;s Hadoop configuration</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Map <span style=color:#f92672>&lt;</span>String, String<span style=color:#f92672>&gt;</span> properties <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> HashMap<span style=color:#f92672>&lt;</span>String, String<span style=color:#f92672>&gt;</span>();
</span></span><span style=display:flex><span>properties.<span style=color:#a6e22e>put</span>(<span style=color:#e6db74>&#34;warehouse&#34;</span>, <span style=color:#e6db74>&#34;...&#34;</span>);
</span></span><span style=display:flex><span>properties.<span style=color:#a6e22e>put</span>(<span style=color:#e6db74>&#34;uri&#34;</span>, <span style=color:#e6db74>&#34;...&#34;</span>);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>catalog.<span style=color:#a6e22e>initialize</span>(<span style=color:#e6db74>&#34;hive&#34;</span>, properties);
</span></span></code></pre></div><p>The <code>Catalog</code> interface defines methods for working with tables, like <code>createTable</code>, <code>loadTable</code>, <code>renameTable</code>, and <code>dropTable</code>. <code>HiveCatalog</code> implements the <code>Catalog</code> interface.</p><p>To create a table, pass an <code>Identifier</code> and a <code>Schema</code> along with other initial metadata:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.Table;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.catalog.TableIdentifier;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>TableIdentifier name <span style=color:#f92672>=</span> TableIdentifier.<span style=color:#a6e22e>of</span>(<span style=color:#e6db74>&#34;logging&#34;</span>, <span style=color:#e6db74>&#34;logs&#34;</span>);
</span></span><span style=display:flex><span>Table table <span style=color:#f92672>=</span> catalog.<span style=color:#a6e22e>createTable</span>(name, schema, spec);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// or to load an existing table, use the following line</span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Table table = catalog.loadTable(name);</span>
</span></span></code></pre></div><p>The logs <a href=#create-a-schema>schema</a> and <a href=#create-a-partition-spec>partition spec</a> are created below.</p><h3 id=using-a-hadoop-catalog>Using a Hadoop catalog</h3><p>A Hadoop catalog doesn&rsquo;t need to connect to a Hive MetaStore, but can only be used with HDFS or similar file systems that support atomic rename. Concurrent writes with a Hadoop catalog are not safe with a local FS or S3. To create a Hadoop catalog:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.hadoop.conf.Configuration;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.hadoop.HadoopCatalog;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Configuration conf <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> Configuration();
</span></span><span style=display:flex><span>String warehousePath <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;hdfs://host:8020/warehouse_path&#34;</span>;
</span></span><span style=display:flex><span>HadoopCatalog catalog <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> HadoopCatalog(conf, warehousePath);
</span></span></code></pre></div><p>Like the Hive catalog, <code>HadoopCatalog</code> implements <code>Catalog</code>, so it also has methods for working with tables, like <code>createTable</code>, <code>loadTable</code>, and <code>dropTable</code>.</p><p>This example creates a table with Hadoop catalog:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.Table;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.catalog.TableIdentifier;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>TableIdentifier name <span style=color:#f92672>=</span> TableIdentifier.<span style=color:#a6e22e>of</span>(<span style=color:#e6db74>&#34;logging&#34;</span>, <span style=color:#e6db74>&#34;logs&#34;</span>);
</span></span><span style=display:flex><span>Table table <span style=color:#f92672>=</span> catalog.<span style=color:#a6e22e>createTable</span>(name, schema, spec);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// or to load an existing table, use the following line</span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Table table = catalog.loadTable(name);</span>
</span></span></code></pre></div><p>The logs <a href=#create-a-schema>schema</a> and <a href=#create-a-partition-spec>partition spec</a> are created below.</p><h3 id=using-hadoop-tables>Using Hadoop tables</h3><p>Iceberg also supports tables that are stored in a directory in HDFS. Concurrent writes with a Hadoop tables are not safe when stored in the local FS or S3. Directory tables don&rsquo;t support all catalog operations, like rename, so they use the <code>Tables</code> interface instead of <code>Catalog</code>.</p><p>To create a table in HDFS, use <code>HadoopTables</code>:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.hadoop.conf.Configuration;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.hadoop.HadoopTables;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.Table;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Configuration conf <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> Configuration();
</span></span><span style=display:flex><span>HadoopTables tables <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> HadoopTables(conf);
</span></span><span style=display:flex><span>Table table <span style=color:#f92672>=</span> tables.<span style=color:#a6e22e>create</span>(schema, spec, table_location);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// or to load an existing table, use the following line</span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Table table = tables.load(table_location);</span>
</span></span></code></pre></div><div class=danger>Hadoop tables shouldn&rsquo;t be used with file systems that do not support atomic rename. Iceberg relies on rename to synchronize concurrent commits for directory tables.</div><h3 id=tables-in-spark>Tables in Spark</h3><p>Spark uses both <code>HiveCatalog</code> and <code>HadoopTables</code> to load tables. Hive is used when the identifier passed to <code>load</code> or <code>save</code> is not a path, otherwise Spark assumes it is a path-based table.</p><p>To read and write to tables from Spark see:</p><ul><li><a href=../spark-queries#querying-with-sql>SQL queries in Spark</a></li><li><a href=../spark-writes#insert-into><code>INSERT INTO</code> in Spark</a></li><li><a href=../spark-writes#merge-into><code>MERGE INTO</code> in Spark</a></li></ul><h2 id=schemas>Schemas</h2><h3 id=create-a-schema>Create a schema</h3><p>This example creates a schema for a <code>logs</code> table:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.Schema;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.types.Types;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Schema schema <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> Schema(
</span></span><span style=display:flex><span> Types.<span style=color:#a6e22e>NestedField</span>.<span style=color:#a6e22e>required</span>(1, <span style=color:#e6db74>&#34;level&#34;</span>, Types.<span style=color:#a6e22e>StringType</span>.<span style=color:#a6e22e>get</span>()),
</span></span><span style=display:flex><span> Types.<span style=color:#a6e22e>NestedField</span>.<span style=color:#a6e22e>required</span>(2, <span style=color:#e6db74>&#34;event_time&#34;</span>, Types.<span style=color:#a6e22e>TimestampType</span>.<span style=color:#a6e22e>withZone</span>()),
</span></span><span style=display:flex><span> Types.<span style=color:#a6e22e>NestedField</span>.<span style=color:#a6e22e>required</span>(3, <span style=color:#e6db74>&#34;message&#34;</span>, Types.<span style=color:#a6e22e>StringType</span>.<span style=color:#a6e22e>get</span>()),
</span></span><span style=display:flex><span> Types.<span style=color:#a6e22e>NestedField</span>.<span style=color:#a6e22e>optional</span>(4, <span style=color:#e6db74>&#34;call_stack&#34;</span>, Types.<span style=color:#a6e22e>ListType</span>.<span style=color:#a6e22e>ofRequired</span>(5, Types.<span style=color:#a6e22e>StringType</span>.<span style=color:#a6e22e>get</span>()))
</span></span><span style=display:flex><span> );
</span></span></code></pre></div><p>When using the Iceberg API directly, type IDs are required. Conversions from other schema formats, like Spark, Avro, and Parquet will automatically assign new IDs.</p><p>When a table is created, all IDs in the schema are re-assigned to ensure uniqueness.</p><h3 id=convert-a-schema-from-avro>Convert a schema from Avro</h3><p>To create an Iceberg schema from an existing Avro schema, use converters in <code>AvroSchemaUtil</code>:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.avro.Schema;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.avro.Schema.Parser;
</span></span><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.avro.AvroSchemaUtil;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Schema avroSchema <span style=color:#f92672>=</span> <span style=color:#66d9ef>new</span> Parser().<span style=color:#a6e22e>parse</span>(<span style=color:#e6db74>&#34;{\&#34;type\&#34;: \&#34;record\&#34; , ... }&#34;</span>);
</span></span><span style=display:flex><span>Schema icebergSchema <span style=color:#f92672>=</span> AvroSchemaUtil.<span style=color:#a6e22e>toIceberg</span>(avroSchema);
</span></span></code></pre></div><h3 id=convert-a-schema-from-spark>Convert a schema from Spark</h3><p>To create an Iceberg schema from an existing table, use converters in <code>SparkSchemaUtil</code>:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.spark.SparkSchemaUtil;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>Schema schema <span style=color:#f92672>=</span> SparkSchemaUtil.<span style=color:#a6e22e>schemaForTable</span>(sparkSession, table_name);
</span></span></code></pre></div><h2 id=partitioning>Partitioning</h2><h3 id=create-a-partition-spec>Create a partition spec</h3><p>Partition specs describe how Iceberg should group records into data files. Partition specs are created for a table&rsquo;s schema using a builder.</p><p>This example creates a partition spec for the <code>logs</code> table that partitions records by the hour of the log event&rsquo;s timestamp and by log level:</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#f92672>import</span> org.apache.iceberg.PartitionSpec;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>PartitionSpec spec <span style=color:#f92672>=</span> PartitionSpec.<span style=color:#a6e22e>builderFor</span>(schema)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>hour</span>(<span style=color:#e6db74>&#34;event_time&#34;</span>)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>identity</span>(<span style=color:#e6db74>&#34;level&#34;</span>)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>build</span>();
</span></span></code></pre></div><p>For more information on the different partition transforms that Iceberg offers, visit <a href=../../../spec#partitioning>this page</a>.</p><h2 id=branching-and-tagging>Branching and Tagging</h2><h3 id=creating-branches-and-tags>Creating branches and tags</h3><p>New branches and tags can be created via the Java library&rsquo;s ManageSnapshots API.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>/* Create a branch test-branch which is retained for 1 week, and the latest 2 snapshots on test-branch will always be retained.
</span></span></span><span style=display:flex><span><span style=color:#75715e>Snapshots on test-branch which are created within the last hour will also be retained. */</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>String branch <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;test-branch&#34;</span>;
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>createBranch</span>(branch, 3)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMinSnapshotsToKeep</span>(branch, 2)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxSnapshotAgeMs</span>(branch, 3600000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxRefAgeMs</span>(branch, 604800000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Create a tag historical-tag at snapshot 10 which is retained for a day</span>
</span></span><span style=display:flex><span>String tag <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;historical-tag&#34;</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>createTag</span>(tag, 10)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxRefAgeMs</span>(tag, 86400000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span></code></pre></div><h3 id=committing-to-branches>Committing to branches</h3><p>Writing to a branch can be performed by specifying <code>toBranch</code> in the operation. For the full list refer to <a href=../../java/api/#update-operations>UpdateOperations</a>.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#75715e>// Append FILE_A to branch test-branch </span>
</span></span><span style=display:flex><span>String branch <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;test-branch&#34;</span>;
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>newAppend</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>appendFile</span>(FILE_A)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>toBranch</span>(branch)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Perform row level updates on &#34;test-branch&#34;</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>newRowDelta</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>addRows</span>(DATA_FILE)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>addDeletes</span>(DELETES)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>toBranch</span>(branch)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Perform a rewrite operation replacing small_file_1 and small_file_2 on &#34;test-branch&#34; with compacted_file.</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>newRewrite</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>rewriteFiles</span>(ImmutableSet.<span style=color:#a6e22e>of</span>(small_file_1,small_file_2), ImmutableSet.<span style=color:#a6e22e>of</span>(compacted_file))
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>toBranch</span>(branch)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span></code></pre></div><h3 id=reading-from-branches-and-tags>Reading from branches and tags</h3><p>Reading from a branch or tag can be done as usual via the Table Scan API, by passing in a branch or tag in the <code>useRef</code> API. When a branch is passed in, the snapshot that&rsquo;s used is the head of the branch. Note that currently reading from a branch and specifying an <code>asOfSnapshotId</code> in the scan is not supported.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#75715e>// Read from the head snapshot of test-branch</span>
</span></span><span style=display:flex><span>TableScan branchRead <span style=color:#f92672>=</span> table.<span style=color:#a6e22e>newScan</span>().<span style=color:#a6e22e>useRef</span>(<span style=color:#e6db74>&#34;test-branch&#34;</span>);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Read from the snapshot referenced by audit-tag</span>
</span></span><span style=display:flex><span>Table tagRead <span style=color:#f92672>=</span> table.<span style=color:#a6e22e>newScan</span>().<span style=color:#a6e22e>useRef</span>(<span style=color:#e6db74>&#34;audit-tag&#34;</span>);
</span></span></code></pre></div><h3 id=replacing-and-fast-forwarding-branches-and-tags>Replacing and fast forwarding branches and tags</h3><p>The snapshots which existing branches and tags point to can be updated via the <code>replace</code> APIs. The fast forward operation is similar to git fast-forwarding. Fast forward can be used to advance a target branch to the head of a source branch or a tag when the target branch is an ancestor of the source. For both fast forward and replace, retention properties of the target branch are maintained by default.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Update &#34;test-branch&#34; to point to snapshot 4</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>replaceBranch</span>(branch, 4)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>()
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>String tag <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;audit-tag&#34;</span>;
</span></span><span style=display:flex><span><span style=color:#75715e>// Replace &#34;audit-tag&#34; to point to snapshot 3 and update its retention</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>replaceBranch</span>(tag, 4)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxRefAgeMs</span>(1000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>()
</span></span></code></pre></div><h3 id=updating-retention-properties>Updating retention properties</h3><p>Retention properties for branches and tags can be updated as well.
Use the setMaxRefAgeMs for updating the retention property of the branch or tag itself. Branch snapshot retention properties can be updated via the <code>setMinSnapshotsToKeep</code> and <code>setMaxSnapshotAgeMs</code> APIs.</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span>String branch <span style=color:#f92672>=</span> <span style=color:#e6db74>&#34;test-branch&#34;</span>;
</span></span><span style=display:flex><span><span style=color:#75715e>// Update retention properties for test-branch</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMinSnapshotsToKeep</span>(branch, 10)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxSnapshotAgeMs</span>(branch, 7200000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxRefAgeMs</span>(branch, 604800000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Update retention properties for test-tag</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>setMaxRefAgeMs</span>(<span style=color:#e6db74>&#34;test-tag&#34;</span>, 604800000)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>();
</span></span></code></pre></div><h3 id=removing-branches-and-tags>Removing branches and tags</h3><p>Branches and tags can be removed via the <code>removeBranch</code> and <code>removeTag</code> APIs respectively</p><div class=highlight><pre tabindex=0 style=color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-java data-lang=java><span style=display:flex><span><span style=color:#75715e>// Remove test-branch</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>removeBranch</span>(<span style=color:#e6db74>&#34;test-branch&#34;</span>)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>()
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#75715e>// Remove test-tag</span>
</span></span><span style=display:flex><span>table.<span style=color:#a6e22e>manageSnapshots</span>()
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>removeTag</span>(<span style=color:#e6db74>&#34;test-tag&#34;</span>)
</span></span><span style=display:flex><span> .<span style=color:#a6e22e>commit</span>()
</span></span></code></pre></div></div><div id=toc class=markdown-body><div id=full><nav id=TableOfContents><ul><li><a href=#create-a-table>Create a table</a><ul><li><a href=#using-a-hive-catalog>Using a Hive catalog</a></li><li><a href=#using-a-hadoop-catalog>Using a Hadoop catalog</a></li><li><a href=#using-hadoop-tables>Using Hadoop tables</a></li><li><a href=#tables-in-spark>Tables in Spark</a></li></ul></li><li><a href=#schemas>Schemas</a><ul><li><a href=#create-a-schema>Create a schema</a></li><li><a href=#convert-a-schema-from-avro>Convert a schema from Avro</a></li><li><a href=#convert-a-schema-from-spark>Convert a schema from Spark</a></li></ul></li><li><a href=#partitioning>Partitioning</a><ul><li><a href=#create-a-partition-spec>Create a partition spec</a></li></ul></li><li><a href=#branching-and-tagging>Branching and Tagging</a><ul><li><a href=#creating-branches-and-tags>Creating branches and tags</a></li><li><a href=#committing-to-branches>Committing to branches</a></li><li><a href=#reading-from-branches-and-tags>Reading from branches and tags</a></li><li><a href=#replacing-and-fast-forwarding-branches-and-tags>Replacing and fast forwarding branches and tags</a></li><li><a href=#updating-retention-properties>Updating retention properties</a></li><li><a href=#removing-branches-and-tags>Removing branches and tags</a></li></ul></li></ul></nav></div></div></div></div></section></body><script src=https://iceberg.apache.org/docs/fd-update-slack-url//js/jquery-1.11.0.js></script><script src=https://iceberg.apache.org/docs/fd-update-slack-url//js/jquery.easing.min.js></script><script type=text/javascript src=https://iceberg.apache.org/docs/fd-update-slack-url//js/search.js></script><script src=https://iceberg.apache.org/docs/fd-update-slack-url//js/bootstrap.min.js></script><script src=https://iceberg.apache.org/docs/fd-update-slack-url//js/iceberg-theme.js></script></html>