blob: ea1425a50d35d8ae221f0fa13643b99694f2ec44 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>org.apache.drill.exec.physical.impl.scan (Drill : 1.20.3 API)</title>
<link rel="stylesheet" type="text/css" href="../../../../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="org.apache.drill.exec.physical.impl.scan (Drill : 1.20.3 API)";
}
}
catch(err) {
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-use.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../../org/apache/drill/exec/physical/impl/rangepartitioner/package-summary.html">Prev&nbsp;Package</a></li>
<li><a href="../../../../../../../org/apache/drill/exec/physical/impl/scan/columns/package-summary.html">Next&nbsp;Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../../index.html?org/apache/drill/exec/physical/impl/scan/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<div class="header">
<h1 title="Package" class="title">Package&nbsp;org.apache.drill.exec.physical.impl.scan</h1>
<div class="docSummary">
<div class="block">Defines the scan operation implementation.</div>
</div>
<p>See:&nbsp;<a href="#package.description">Description</a></p>
</div>
<div class="contentContainer">
<ul class="blockList">
<li class="blockList">
<table class="typeSummary" border="0" cellpadding="3" cellspacing="0" summary="Interface Summary table, listing interfaces, and an explanation">
<caption><span>Interface Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Interface</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../../../org/apache/drill/exec/physical/impl/scan/RowBatchReader.html" title="interface in org.apache.drill.exec.physical.impl.scan">RowBatchReader</a></td>
<td class="colLast">
<div class="block">Extended version of a record reader used by the revised
scan batch operator.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><a href="../../../../../../../org/apache/drill/exec/physical/impl/scan/ScanOperatorEvents.html" title="interface in org.apache.drill.exec.physical.impl.scan">ScanOperatorEvents</a></td>
<td class="colLast">
<div class="block">Interface to the set of readers, and reader schema, that the scan operator
manages.</div>
</td>
</tr>
</tbody>
</table>
</li>
<li class="blockList">
<table class="typeSummary" border="0" cellpadding="3" cellspacing="0" summary="Class Summary table, listing classes, and an explanation">
<caption><span>Class Summary</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Class</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tbody>
<tr class="altColor">
<td class="colFirst"><a href="../../../../../../../org/apache/drill/exec/physical/impl/scan/ScanOperatorExec.html" title="class in org.apache.drill.exec.physical.impl.scan">ScanOperatorExec</a></td>
<td class="colLast">
<div class="block">Implementation of the revised scan operator that uses a mutator aware of
batch sizes.</div>
</td>
</tr>
</tbody>
</table>
</li>
</ul>
<a name="package.description">
<!-- -->
</a>
<h2 title="Package org.apache.drill.exec.physical.impl.scan Description">Package org.apache.drill.exec.physical.impl.scan Description</h2>
<div class="block">Defines the scan operation implementation. The scan operator is a generic mechanism
that fits into the Drill Volcano-based iterator protocol to return record batches
from one or more readers.
<p>
Two versions of the scan operator exist:<ul>
<li><code>ScanBatch</code>: the original version that uses readers based on the
<code>RecordReader</code> interface. <tt>ScanBatch</tt> cannot, however, handle
limited-length vectors.</li>
<li><code>ScanOperatorExec</code>: the revised version that uses a more modular
design and that offers a mutator that is a bit easier to use, and can limit
vector sizes.</li></ul>
New code should use the new version, existing code will continue to use the
<tt>ScanBatch</tt> version until all readers are converted to the new format.
<p>
Further, the new version is designed to allow intensive unit test without
the need for the Drill server. New readers should exploit this feature to
include intensive tests to keep Drill quality high.
<p>
See <code>ScanOperatorExec</code> for details of the scan operator protocol
and components.
<h4>Traditional Class Structure<h4>
The original design was simple: but required each reader to handle many
detailed tasks.
<pre><code>
+------------+ +-----------+
| Scan Batch | +---> | ScanBatch |
| Creator | | +-----------+
+------------+ | |
| | |
v | |
+------------+ | v
| Format | ---+ +---------------+
| Plugin | -----> | Record Reader |
+------------+ +---------------+
</code></pre>
The scan batch creator is unique to each storage plugin and is created
based on the physical operator configuration ("pop config"). The
scan batch creator delegates to the format plugin to create both the
scan batch (the scan operator) and the set of readers which the scan
batch will manage.
<p>
The scan batch
provides a <code>Mutator</code> that creates the vectors used by the
record readers. Schema continuity comes from reusing the Mutator from one
file/block to the next.
<p>
One characteristic of this system is that all the record readers are
created up front. If we must read 1000 blocks, we'll create 1000 record
readers. Developers must be very careful to only allocate resources when
the reader is opened, and release resources when the reader is closed.
Else, resource bloat becomes a large problem.
<h4>Revised Class Structure</h4>
The new design is more complex because it divides tasks up into separate
classes. The class structure is larger, but each class is smaller, more
focused and does just one task.
<pre><code>
+------------+ +---------------+
| Scan Batch | -------> | Format Plugin |
| Creator | +---------------+
+------------+ / | \
/ | \
+---------------------+ | \ +---------------+
| OperatorRecordBatch | | +---->| ScanFramework |
+---------------------+ | | +---------------+
v | |
+------------------+ |
| ScanOperatorExec | |
+------------------+ v
| +--------------+
+----------> | Batch Reader |
+--------------+
</code></pre>
Here, the scan batch creator again delegates to the format plugin. The
format plugin creates three objects:
<ul>
<li>The <code>OperatorRecordBatch</code>, which encapsulates the Volcano
iterator protocol. It also holds onto the output batch. This allows the
operator implementation to just focus on its specific job.</li>
<li>The <code>ScanOperatorExec</code> is the operator implementation for
the new result-set-loader based scan.</li>
<li>The scan framework is specific to each kind of reader. It handles
everything which is unique to that reader. Rather than inheriting from
the scan itself, the framework follows the strategy pattern: it says how
to do a scan for the target format.<li>
</ul>
The overall structure uses the "composition" pattern: what is combined
into a small set of classes in the traditional model is broken out into
focused classes in the revised model.
<p>
A key part of the scan strategy is the batch reader. ("Batch" because
it reads an entire batch at a time, using the result set loader.) The
framework creates batch readers one by one as needed. Resource bloat
is less of an issue because only one batch reader instance exists at
any time for each scan operator instance.
<p>
Each of the above is further broken down into additional classes to
handle projection and so on.</div>
</div>
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../../../../overview-summary.html">Overview</a></li>
<li class="navBarCell1Rev">Package</li>
<li>Class</li>
<li><a href="package-use.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../../../../org/apache/drill/exec/physical/impl/rangepartitioner/package-summary.html">Prev&nbsp;Package</a></li>
<li><a href="../../../../../../../org/apache/drill/exec/physical/impl/scan/columns/package-summary.html">Next&nbsp;Package</a></li>
</ul>
<ul class="navList">
<li><a href="../../../../../../../index.html?org/apache/drill/exec/physical/impl/scan/package-summary.html" target="_top">Frames</a></li>
<li><a href="package-summary.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 1970 <a href="https://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>