blob: 665c5cc0d772ee0b1a6a4b4d0215a8fd417e888e [file] [log] [blame]
<!DOCTYPE HTML>
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<title>ArrowReader</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../../jquery/jquery-ui.min.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../../jquery-ui.overrides.css" title="Style">
<script type="text/javascript" src="../../../../../script.js"></script>
<script type="text/javascript" src="../../../../../jquery/jszip/dist/jszip.min.js"></script>
<script type="text/javascript" src="../../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
<!--[if IE]>
<script type="text/javascript" src="../../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
<![endif]-->
<script type="text/javascript" src="../../../../../jquery/jquery-3.6.0.min.js"></script>
<script type="text/javascript" src="../../../../../jquery/jquery-ui.min.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="ArrowReader";
}
}
catch(err) {
}
//-->
var data = {"i0":10,"i1":10};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
var pathtoroot = "../../../../../";
var useModuleDirectories = true;
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<header role="banner">
<nav role="navigation">
<div class="fixedNav">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a id="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<ul class="navListSearch">
<li><label for="search">SEARCH:</label>
<input type="text" id="search" value="search" disabled="disabled">
<input type="reset" id="reset" value="reset" disabled="disabled">
</li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
</div>
<div class="navPadding">&nbsp;</div>
<script type="text/javascript"><!--
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
//-->
</script>
</nav>
</header>
<!-- ======== START OF CLASS DATA ======== -->
<main role="main">
<div class="header">
<div class="subTitle"><span class="packageLabelInType">Package</span>&nbsp;<a href="package-summary.html">org.apache.iceberg.arrow.vectorized</a></div>
<h2 title="Class ArrowReader" class="title">Class ArrowReader</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li><a href="../../io/CloseableGroup.html" title="class in org.apache.iceberg.io">org.apache.iceberg.io.CloseableGroup</a></li>
<li>
<ul class="inheritance">
<li>org.apache.iceberg.arrow.vectorized.ArrowReader</li>
</ul>
</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd><code>java.io.Closeable</code>, <code>java.lang.AutoCloseable</code></dd>
</dl>
<hr>
<pre>public class <span class="typeNameLabel">ArrowReader</span>
extends <a href="../../io/CloseableGroup.html" title="class in org.apache.iceberg.io">CloseableGroup</a></pre>
<div class="block">Vectorized reader that returns an iterator of <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a>. See <a href="#open(org.apache.iceberg.io.CloseableIterable)"><code>open(CloseableIterable)</code></a> ()} to learn about the behavior of the iterator.
<p>The following Iceberg data types are supported and have been tested:
<ul>
<li>Iceberg: <a href="../../types/Types.BooleanType.html" title="class in org.apache.iceberg.types"><code>Types.BooleanType</code></a>, Arrow: <code>Types.MinorType.BIT</code>
<li>Iceberg: <a href="../../types/Types.IntegerType.html" title="class in org.apache.iceberg.types"><code>Types.IntegerType</code></a>, Arrow: <code>Types.MinorType.INT</code>
<li>Iceberg: <a href="../../types/Types.LongType.html" title="class in org.apache.iceberg.types"><code>Types.LongType</code></a>, Arrow: <code>Types.MinorType.BIGINT</code>
<li>Iceberg: <a href="../../types/Types.FloatType.html" title="class in org.apache.iceberg.types"><code>Types.FloatType</code></a>, Arrow: <code>Types.MinorType.FLOAT4</code>
<li>Iceberg: <a href="../../types/Types.DoubleType.html" title="class in org.apache.iceberg.types"><code>Types.DoubleType</code></a>, Arrow: <code>Types.MinorType.FLOAT8</code>
<li>Iceberg: <a href="../../types/Types.StringType.html" title="class in org.apache.iceberg.types"><code>Types.StringType</code></a>, Arrow: <code>Types.MinorType.VARCHAR</code>
<li>Iceberg: <a href="../../types/Types.TimestampType.html" title="class in org.apache.iceberg.types"><code>Types.TimestampType</code></a> (both with and without timezone), Arrow: <code>Types.MinorType.TIMEMICRO</code>
<li>Iceberg: <a href="../../types/Types.BinaryType.html" title="class in org.apache.iceberg.types"><code>Types.BinaryType</code></a>, Arrow: <code>Types.MinorType.VARBINARY</code>
<li>Iceberg: <a href="../../types/Types.DateType.html" title="class in org.apache.iceberg.types"><code>Types.DateType</code></a>, Arrow: <code>Types.MinorType.DATEDAY</code>
<li>Iceberg: <a href="../../types/Types.TimeType.html" title="class in org.apache.iceberg.types"><code>Types.TimeType</code></a>, Arrow: <code>Types.MinorType.TIMEMICRO</code>
<li>Iceberg: <a href="../../types/Types.UUIDType.html" title="class in org.apache.iceberg.types"><code>Types.UUIDType</code></a>, Arrow: <code>Types.MinorType.FIXEDSIZEBINARY</code>(16)
</ul>
<p>Features that don't work in this implementation:
<ul>
<li>Type promotion: In case of type promotion, the Arrow vector corresponding to the data type
in the parquet file is returned instead of the data type in the latest schema. See
https://github.com/apache/iceberg/issues/2483.
<li>Columns with constant values are physically encoded as a dictionary. The Arrow vector type
is int32 instead of the type as per the schema. See
https://github.com/apache/iceberg/issues/2484.
<li>Data types: <a href="../../types/Types.ListType.html" title="class in org.apache.iceberg.types"><code>Types.ListType</code></a>, <a href="../../types/Types.MapType.html" title="class in org.apache.iceberg.types"><code>Types.MapType</code></a>, <a href="../../types/Types.StructType.html" title="class in org.apache.iceberg.types"><code>Types.StructType</code></a>, <a href="../../types/Types.FixedType.html" title="class in org.apache.iceberg.types"><code>Types.FixedType</code></a> and <a href="../../types/Types.DecimalType.html" title="class in org.apache.iceberg.types"><code>Types.DecimalType</code></a> See
https://github.com/apache/iceberg/issues/2485 and
https://github.com/apache/iceberg/issues/2486.
<li>Delete files are not supported. See https://github.com/apache/iceberg/issues/2487.
</ul></div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<section>
<ul class="blockList">
<li class="blockList"><a id="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Constructor</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr class="altColor">
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E(org.apache.iceberg.TableScan,int,boolean)">ArrowReader</a></span>&#8203;(<a href="../../TableScan.html" title="interface in org.apache.iceberg">TableScan</a>&nbsp;scan,
int&nbsp;batchSize,
boolean&nbsp;reuseContainers)</code></th>
<td class="colLast">
<div class="block">Create a new instance of the reader.</div>
</td>
</tr>
</table>
</li>
</ul>
</section>
<!-- ========== METHOD SUMMARY =========== -->
<section>
<ul class="blockList">
<li class="blockList"><a id="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colSecond" scope="col">Method</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>void</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#close()">close</a></span>()</code></th>
<td class="colLast">
<div class="block">Close all the registered resources.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code><a href="../../io/CloseableIterator.html" title="interface in org.apache.iceberg.io">CloseableIterator</a>&lt;<a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized">ColumnarBatch</a>&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#open(org.apache.iceberg.io.CloseableIterable)">open</a></span>&#8203;(<a href="../../io/CloseableIterable.html" title="interface in org.apache.iceberg.io">CloseableIterable</a>&lt;<a href="../../CombinedScanTask.html" title="interface in org.apache.iceberg">CombinedScanTask</a>&gt;&nbsp;tasks)</code></th>
<td class="colLast">
<div class="block">Returns a new iterator of <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> objects.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.org.apache.iceberg.io.CloseableGroup">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;org.apache.iceberg.io.<a href="../../io/CloseableGroup.html" title="class in org.apache.iceberg.io">CloseableGroup</a></h3>
<code><a href="../../io/CloseableGroup.html#addCloseable(java.io.Closeable)">addCloseable</a>, <a href="../../io/CloseableGroup.html#addCloseable(java.lang.AutoCloseable)">addCloseable</a>, <a href="../../io/CloseableGroup.html#setSuppressCloseFailure(boolean)">setSuppressCloseFailure</a></code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<section>
<ul class="blockList">
<li class="blockList"><a id="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a id="&lt;init&gt;(org.apache.iceberg.TableScan,int,boolean)">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>ArrowReader</h4>
<pre>public&nbsp;ArrowReader&#8203;(<a href="../../TableScan.html" title="interface in org.apache.iceberg">TableScan</a>&nbsp;scan,
int&nbsp;batchSize,
boolean&nbsp;reuseContainers)</pre>
<div class="block">Create a new instance of the reader.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>scan</code> - the table scan object.</dd>
<dd><code>batchSize</code> - the maximum number of rows per Arrow batch.</dd>
<dd><code>reuseContainers</code> - whether to reuse Arrow vectors when iterating through the data. If set
to <code>false</code>, every <code>Iterator.next()</code> call creates new instances of Arrow
vectors. If set to <code>true</code>, the Arrow vectors in the previous <code>Iterator.next()</code>
may be reused for the data returned in the current <code>Iterator.next()</code>. This option
avoids allocating memory again and again. Irrespective of the value of <code>
reuseContainers</code>, the Arrow vectors in the previous <code>Iterator.next()</code> call are closed
before creating new instances if the current <code>Iterator.next()</code>.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
<!-- ============ METHOD DETAIL ========== -->
<section>
<ul class="blockList">
<li class="blockList"><a id="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a id="open(org.apache.iceberg.io.CloseableIterable)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>open</h4>
<pre class="methodSignature">public&nbsp;<a href="../../io/CloseableIterator.html" title="interface in org.apache.iceberg.io">CloseableIterator</a>&lt;<a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized">ColumnarBatch</a>&gt;&nbsp;open&#8203;(<a href="../../io/CloseableIterable.html" title="interface in org.apache.iceberg.io">CloseableIterable</a>&lt;<a href="../../CombinedScanTask.html" title="interface in org.apache.iceberg">CombinedScanTask</a>&gt;&nbsp;tasks)</pre>
<div class="block">Returns a new iterator of <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> objects.
<p>Note that the reader owns the <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> objects and takes care of closing them.
The caller should not hold onto a <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> or try to close them.
<p>If <code>reuseContainers</code> is <code>false</code>, the Arrow vectors in the previous <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> are closed before returning the next <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> object. This implies
that the caller should either use the <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> or transfer the ownership of <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> before getting the next <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a>.
<p>If <code>reuseContainers</code> is <code>true</code>, the Arrow vectors in the previous <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> may be reused for the next <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a>. This implies that the caller
should either use the <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> or deep copy the <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a> before
getting the next <a href="ColumnarBatch.html" title="class in org.apache.iceberg.arrow.vectorized"><code>ColumnarBatch</code></a>.
<p>This method works for only when the following conditions are true:
<ol>
<li>At least one column is queried,
<li>There are no delete files, and
<li>Supported data types are queried (see <a href="#SUPPORTED_TYPES"><code>SUPPORTED_TYPES</code></a>).
</ol>
When any of these conditions fail, an <code>UnsupportedOperationException</code> is thrown.</div>
</li>
</ul>
<a id="close()">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>close</h4>
<pre class="methodSignature">public&nbsp;void&nbsp;close()
throws java.io.IOException</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from class:&nbsp;<code><a href="../../io/CloseableGroup.html#close()">CloseableGroup</a></code></span></div>
<div class="block">Close all the registered resources. Close method of each resource will only be called once.
Checked exception from AutoCloseable will be wrapped to runtime exception.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code>close</code>&nbsp;in interface&nbsp;<code>java.lang.AutoCloseable</code></dd>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code>close</code>&nbsp;in interface&nbsp;<code>java.io.Closeable</code></dd>
<dt><span class="overrideSpecifyLabel">Overrides:</span></dt>
<dd><code><a href="../../io/CloseableGroup.html#close()">close</a></code>&nbsp;in class&nbsp;<code><a href="../../io/CloseableGroup.html" title="class in org.apache.iceberg.io">CloseableGroup</a></code></dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code>java.io.IOException</code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
</div>
</main>
<!-- ========= END OF CLASS DATA ========= -->
<footer role="contentinfo">
<nav role="navigation">
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a id="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</nav>
</footer>
</body>
</html>