blob: 65155d424d86c93c766fb9969bb9bebe7494fd61 [file] [log] [blame]
<!DOCTYPE HTML>
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<title>SizeBasedFileRewriter</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../jquery/jquery-ui.css" title="Style">
<script type="text/javascript" src="../../../../script.js"></script>
<script type="text/javascript" src="../../../../jquery/jszip/dist/jszip.min.js"></script>
<script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
<!--[if IE]>
<script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
<![endif]-->
<script type="text/javascript" src="../../../../jquery/jquery-3.5.1.js"></script>
<script type="text/javascript" src="../../../../jquery/jquery-ui.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="SizeBasedFileRewriter";
}
}
catch(err) {
}
//-->
var data = {"i0":6,"i1":10,"i2":10,"i3":6,"i4":6,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
var pathtoroot = "../../../../";
var useModuleDirectories = true;
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<header role="banner">
<nav role="navigation">
<div class="fixedNav">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a id="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<ul class="navListSearch">
<li><label for="search">SEARCH:</label>
<input type="text" id="search" value="search" disabled="disabled">
<input type="reset" id="reset" value="reset" disabled="disabled">
</li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
</div>
<div class="navPadding">&nbsp;</div>
<script type="text/javascript"><!--
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
//-->
</script>
</nav>
</header>
<!-- ======== START OF CLASS DATA ======== -->
<main role="main">
<div class="header">
<div class="subTitle"><span class="packageLabelInType">Package</span>&nbsp;<a href="package-summary.html">org.apache.iceberg.actions</a></div>
<h2 title="Class SizeBasedFileRewriter" class="title">Class SizeBasedFileRewriter&lt;T extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a>&lt;F&gt;,&#8203;F extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a>&lt;F&gt;&gt;</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.iceberg.actions.SizeBasedFileRewriter&lt;T,&#8203;F&gt;</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd><code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a>&lt;T,&#8203;F&gt;</code></dd>
</dl>
<dl>
<dt>Direct Known Subclasses:</dt>
<dd><code><a href="SizeBasedDataRewriter.html" title="class in org.apache.iceberg.actions">SizeBasedDataRewriter</a></code>, <code><a href="SizeBasedPositionDeletesRewriter.html" title="class in org.apache.iceberg.actions">SizeBasedPositionDeletesRewriter</a></code></dd>
</dl>
<hr>
<pre>public abstract class <span class="typeNameLabel">SizeBasedFileRewriter&lt;T extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a>&lt;F&gt;,&#8203;F extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a>&lt;F&gt;&gt;</span>
extends java.lang.Object
implements <a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a>&lt;T,&#8203;F&gt;</pre>
<div class="block">A file rewriter that determines which files to rewrite based on their size.
<p>If files are smaller than the <a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a> threshold or larger than the <a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a> threshold, they are considered targets for being rewritten.
<p>Once selected, files are grouped based on the <a href="../util/BinPacking.html" title="class in org.apache.iceberg.util"><code>bin-packing algorithm</code></a> into
groups of no more than <a href="#MAX_FILE_GROUP_SIZE_BYTES"><code>MAX_FILE_GROUP_SIZE_BYTES</code></a>. Groups will be actually rewritten if
they contain more than <a href="#MIN_INPUT_FILES"><code>MIN_INPUT_FILES</code></a> or if they would produce at least one file of
<a href="#TARGET_FILE_SIZE_BYTES"><code>TARGET_FILE_SIZE_BYTES</code></a>.
<p>Note that implementations may add extra conditions for selecting files or filtering groups.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- =========== FIELD SUMMARY =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<table class="memberSummary">
<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colSecond" scope="col">Field</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_GROUP_SIZE_BYTES">MAX_FILE_GROUP_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">This option controls the largest amount of data that should be rewritten in a single file
group.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_GROUP_SIZE_BYTES_DEFAULT">MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_BYTES">MAX_FILE_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">Controls which files will be considered for rewriting.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static double</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_DEFAULT_RATIO">MAX_FILE_SIZE_DEFAULT_RATIO</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_BYTES">MIN_FILE_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">Controls which files will be considered for rewriting.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static double</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_DEFAULT_RATIO">MIN_FILE_SIZE_DEFAULT_RATIO</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES">MIN_INPUT_FILES</a></span></code></th>
<td class="colLast">
<div class="block">Any file group exceeding this number of files will be rewritten regardless of other criteria.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static int</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES_DEFAULT">MIN_INPUT_FILES_DEFAULT</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#REWRITE_ALL">REWRITE_ALL</a></span></code></th>
<td class="colLast">
<div class="block">Overrides other options and forces rewriting of all provided files.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static boolean</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#REWRITE_ALL_DEFAULT">REWRITE_ALL_DEFAULT</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#TARGET_FILE_SIZE_BYTES">TARGET_FILE_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">The target output file size that this file rewriter will attempt to generate.</div>
</td>
</tr>
</table>
</li>
</ul>
</section>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier</th>
<th class="colSecond" scope="col">Constructor</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>protected </code></td>
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E(org.apache.iceberg.Table)">SizeBasedFileRewriter</a></span>&#8203;(<a href="../Table.html" title="interface in org.apache.iceberg">Table</a>&nbsp;table)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
</table>
</li>
</ul>
</section>
<!-- ========== METHOD SUMMARY =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colSecond" scope="col">Method</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>protected abstract long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#defaultTargetFileSize()">defaultTargetFileSize</a></span>()</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>protected boolean</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enoughContent(java.util.List)">enoughContent</a></span>&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>protected boolean</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enoughInputFiles(java.util.List)">enoughInputFiles</a></span>&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>protected abstract java.lang.Iterable&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#filterFileGroups(java.util.List)">filterFileGroups</a></span>&#8203;(java.util.List&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;&nbsp;groups)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>protected abstract java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#filterFiles(java.lang.Iterable)">filterFiles</a></span>&#8203;(java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;tasks)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>void</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#init(java.util.Map)">init</a></span>&#8203;(java.util.Map&lt;java.lang.String,&#8203;java.lang.String&gt;&nbsp;options)</code></th>
<td class="colLast">
<div class="block">Initializes this rewriter using provided options.</div>
</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#inputSize(java.util.List)">inputSize</a></span>&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#numOutputFiles(long)">numOutputFiles</a></span>&#8203;(long&nbsp;inputSize)</code></th>
<td class="colLast">
<div class="block">Determines the preferable number of output files when rewriting a particular file group.</div>
</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>java.lang.Iterable&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#planFileGroups(java.lang.Iterable)">planFileGroups</a></span>&#8203;(java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;tasks)</code></th>
<td class="colLast">
<div class="block">Selects files which this rewriter believes are valid targets to be rewritten based on their
scan tasks and groups those scan tasks into file groups.</div>
</td>
</tr>
<tr id="i9" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#splitSize(long)">splitSize</a></span>&#8203;(long&nbsp;inputSize)</code></th>
<td class="colLast">
<div class="block">Returns the smallest of our max write file threshold and our estimated split size based on the
number of output files we want to generate.</div>
</td>
</tr>
<tr id="i10" class="altColor">
<td class="colFirst"><code>protected <a href="../Table.html" title="interface in org.apache.iceberg">Table</a></code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#table()">table</a></span>()</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i11" class="rowColor">
<td class="colFirst"><code>protected boolean</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#tooMuchContent(java.util.List)">tooMuchContent</a></span>&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i12" class="altColor">
<td class="colFirst"><code>java.util.Set&lt;java.lang.String&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#validOptions()">validOptions</a></span>()</code></th>
<td class="colLast">
<div class="block">Returns a set of supported options for this rewriter.</div>
</td>
</tr>
<tr id="i13" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#writeMaxFileSize()">writeMaxFileSize</a></span>()</code></th>
<td class="colLast">
<div class="block">Estimates a larger max target file size than the target size used in task creation to avoid
creating tiny remainder files.</div>
</td>
</tr>
<tr id="i14" class="altColor">
<td class="colFirst"><code>protected boolean</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#wronglySized(T)">wronglySized</a></span>&#8203;(<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&nbsp;task)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.org.apache.iceberg.actions.FileRewriter">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.iceberg.actions.<a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a></h3>
<code><a href="FileRewriter.html#description()">description</a>, <a href="FileRewriter.html#rewrite(java.util.List)">rewrite</a></code></li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ FIELD DETAIL =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="field.detail">
<!-- -->
</a>
<h3>Field Detail</h3>
<a id="TARGET_FILE_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>TARGET_FILE_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String TARGET_FILE_SIZE_BYTES</pre>
<div class="block">The target output file size that this file rewriter will attempt to generate.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_FILE_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_FILE_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String MIN_FILE_SIZE_BYTES</pre>
<div class="block">Controls which files will be considered for rewriting. Files with sizes under this threshold
will be considered for rewriting regardless of any other criteria.
<p>Defaults to 75% of the target file size.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_FILE_SIZE_DEFAULT_RATIO">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_FILE_SIZE_DEFAULT_RATIO</h4>
<pre>public static final&nbsp;double MIN_FILE_SIZE_DEFAULT_RATIO</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_FILE_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String MAX_FILE_SIZE_BYTES</pre>
<div class="block">Controls which files will be considered for rewriting. Files with sizes above this threshold
will be considered for rewriting regardless of any other criteria.
<p>Defaults to 180% of the target file size.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_SIZE_DEFAULT_RATIO">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_FILE_SIZE_DEFAULT_RATIO</h4>
<pre>public static final&nbsp;double MAX_FILE_SIZE_DEFAULT_RATIO</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_INPUT_FILES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_INPUT_FILES</h4>
<pre>public static final&nbsp;java.lang.String MIN_INPUT_FILES</pre>
<div class="block">Any file group exceeding this number of files will be rewritten regardless of other criteria.
This config ensures file groups that contain many files are compacted even if the total size of
that group is less than the target file size. This can also be thought of as the maximum number
of wrongly sized files that could remain in a partition after rewriting.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_INPUT_FILES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_INPUT_FILES_DEFAULT">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_INPUT_FILES_DEFAULT</h4>
<pre>public static final&nbsp;int MIN_INPUT_FILES_DEFAULT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_INPUT_FILES_DEFAULT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="REWRITE_ALL">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>REWRITE_ALL</h4>
<pre>public static final&nbsp;java.lang.String REWRITE_ALL</pre>
<div class="block">Overrides other options and forces rewriting of all provided files.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.REWRITE_ALL">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="REWRITE_ALL_DEFAULT">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>REWRITE_ALL_DEFAULT</h4>
<pre>public static final&nbsp;boolean REWRITE_ALL_DEFAULT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.REWRITE_ALL_DEFAULT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_GROUP_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_FILE_GROUP_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String MAX_FILE_GROUP_SIZE_BYTES</pre>
<div class="block">This option controls the largest amount of data that should be rewritten in a single file
group. It helps with breaking down the rewriting of very large partitions which may not be
rewritable otherwise due to the resource constraints of the cluster. For example, a sort-based
rewrite may not scale to TB-sized partitions, and those partitions need to be worked on in
small subsections to avoid exhaustion of resources.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_GROUP_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_GROUP_SIZE_BYTES_DEFAULT">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</h4>
<pre>public static final&nbsp;long MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_GROUP_SIZE_BYTES_DEFAULT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a id="&lt;init&gt;(org.apache.iceberg.Table)">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>SizeBasedFileRewriter</h4>
<pre>protected&nbsp;SizeBasedFileRewriter&#8203;(<a href="../Table.html" title="interface in org.apache.iceberg">Table</a>&nbsp;table)</pre>
</li>
</ul>
</li>
</ul>
</section>
<!-- ============ METHOD DETAIL ========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a id="defaultTargetFileSize()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>defaultTargetFileSize</h4>
<pre class="methodSignature">protected abstract&nbsp;long&nbsp;defaultTargetFileSize()</pre>
</li>
</ul>
<a id="filterFiles(java.lang.Iterable)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>filterFiles</h4>
<pre class="methodSignature">protected abstract&nbsp;java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;filterFiles&#8203;(java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;tasks)</pre>
</li>
</ul>
<a id="filterFileGroups(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>filterFileGroups</h4>
<pre class="methodSignature">protected abstract&nbsp;java.lang.Iterable&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;&nbsp;filterFileGroups&#8203;(java.util.List&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;&nbsp;groups)</pre>
</li>
</ul>
<a id="table()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>table</h4>
<pre class="methodSignature">protected&nbsp;<a href="../Table.html" title="interface in org.apache.iceberg">Table</a>&nbsp;table()</pre>
</li>
</ul>
<a id="validOptions()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>validOptions</h4>
<pre class="methodSignature">public&nbsp;java.util.Set&lt;java.lang.String&gt;&nbsp;validOptions()</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="FileRewriter.html#validOptions()">FileRewriter</a></code></span></div>
<div class="block">Returns a set of supported options for this rewriter. Only options specified in this list will
be accepted at runtime. Any other options will be rejected.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="FileRewriter.html#validOptions()">validOptions</a></code>&nbsp;in interface&nbsp;<code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;,&#8203;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;&gt;</code></dd>
</dl>
</li>
</ul>
<a id="init(java.util.Map)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>init</h4>
<pre class="methodSignature">public&nbsp;void&nbsp;init&#8203;(java.util.Map&lt;java.lang.String,&#8203;java.lang.String&gt;&nbsp;options)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="FileRewriter.html#init(java.util.Map)">FileRewriter</a></code></span></div>
<div class="block">Initializes this rewriter using provided options.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="FileRewriter.html#init(java.util.Map)">init</a></code>&nbsp;in interface&nbsp;<code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;,&#8203;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;&gt;</code></dd>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>options</code> - options to initialize this rewriter</dd>
</dl>
</li>
</ul>
<a id="wronglySized(org.apache.iceberg.ContentScanTask)">
<!-- -->
</a><a id="wronglySized(T)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>wronglySized</h4>
<pre class="methodSignature">protected&nbsp;boolean&nbsp;wronglySized&#8203;(<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&nbsp;task)</pre>
</li>
</ul>
<a id="planFileGroups(java.lang.Iterable)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>planFileGroups</h4>
<pre class="methodSignature">public&nbsp;java.lang.Iterable&lt;java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&gt;&nbsp;planFileGroups&#8203;(java.lang.Iterable&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;tasks)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="FileRewriter.html#planFileGroups(java.lang.Iterable)">FileRewriter</a></code></span></div>
<div class="block">Selects files which this rewriter believes are valid targets to be rewritten based on their
scan tasks and groups those scan tasks into file groups. The file groups are then rewritten in
a single executable unit, such as a Spark job.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="FileRewriter.html#planFileGroups(java.lang.Iterable)">planFileGroups</a></code>&nbsp;in interface&nbsp;<code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;,&#8203;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a>&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>&gt;&gt;</code></dd>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>tasks</code> - an iterable of scan task for files in a partition</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>groups of scan tasks for files to be rewritten in a single executable unit</dd>
</dl>
</li>
</ul>
<a id="enoughInputFiles(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>enoughInputFiles</h4>
<pre class="methodSignature">protected&nbsp;boolean&nbsp;enoughInputFiles&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</pre>
</li>
</ul>
<a id="enoughContent(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>enoughContent</h4>
<pre class="methodSignature">protected&nbsp;boolean&nbsp;enoughContent&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</pre>
</li>
</ul>
<a id="tooMuchContent(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>tooMuchContent</h4>
<pre class="methodSignature">protected&nbsp;boolean&nbsp;tooMuchContent&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</pre>
</li>
</ul>
<a id="inputSize(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>inputSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;inputSize&#8203;(java.util.List&lt;<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>&gt;&nbsp;group)</pre>
</li>
</ul>
<a id="splitSize(long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;splitSize&#8203;(long&nbsp;inputSize)</pre>
<div class="block">Returns the smallest of our max write file threshold and our estimated split size based on the
number of output files we want to generate. Add an overhead onto the estimated split size to
try to avoid small errors in size creating brand-new files.</div>
</li>
</ul>
<a id="numOutputFiles(long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>numOutputFiles</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;numOutputFiles&#8203;(long&nbsp;inputSize)</pre>
<div class="block">Determines the preferable number of output files when rewriting a particular file group.
<p>If the rewriter is handling 10.1 GB of data with a target file size of 1 GB, it could
produce 11 files, one of which would only have 0.1 GB. This would most likely be less
preferable to 10 files with 1.01 GB each. So this method decides whether to round up or round
down based on what the estimated average file size will be if the remainder (0.1 GB) is
distributed amongst other files. If the new average file size is no more than 10% greater than
the target file size, then this method will round down when determining the number of output
files. Otherwise, the remainder will be written into a separate file.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>inputSize</code> - a total input size for a file group</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the number of files this rewriter should create</dd>
</dl>
</li>
</ul>
<a id="writeMaxFileSize()">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>writeMaxFileSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;writeMaxFileSize()</pre>
<div class="block">Estimates a larger max target file size than the target size used in task creation to avoid
creating tiny remainder files.
<p>While we create tasks that should all be smaller than our target size, there is a chance
that the actual data will end up being larger than our target size due to various factors of
compression, serialization, which are outside our control. If this occurs, instead of making a
single file that is close in size to our target, we would end up producing one file of the
target size, and then a small extra file with the remaining data.
<p>For example, if our target is 512 MB, we may generate a rewrite task that should be 500 MB.
When we write the data we may find we actually have to write out 530 MB. If we use the target
size while writing, we would produce a 512 MB file and an 18 MB file. If instead we use a
larger size estimated by this method, then we end up writing a single file.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the target size plus one half of the distance between max and target</dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
</div>
</main>
<!-- ========= END OF CLASS DATA ========= -->
<footer role="contentinfo">
<nav role="navigation">
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a id="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</nav>
</footer>
</body>
</html>