blob: a24219e09d457e26b13cbb6d63c391cdafa13300 [file] [log] [blame]
<!DOCTYPE HTML>
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc -->
<title>BinPackStrategy</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../jquery/jquery-ui.css" title="Style">
<script type="text/javascript" src="../../../../script.js"></script>
<script type="text/javascript" src="../../../../jquery/jszip/dist/jszip.min.js"></script>
<script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script>
<!--[if IE]>
<script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script>
<![endif]-->
<script type="text/javascript" src="../../../../jquery/jquery-3.5.1.js"></script>
<script type="text/javascript" src="../../../../jquery/jquery-ui.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="BinPackStrategy";
}
}
catch(err) {
}
//-->
var data = {"i0":10,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
var pathtoroot = "../../../../";
var useModuleDirectories = true;
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<header role="banner">
<nav role="navigation">
<div class="fixedNav">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a id="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<ul class="navListSearch">
<li><label for="search">SEARCH:</label>
<input type="text" id="search" value="search" disabled="disabled">
<input type="reset" id="reset" value="reset" disabled="disabled">
</li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
</div>
<div class="navPadding">&nbsp;</div>
<script type="text/javascript"><!--
$('.navPadding').css('padding-top', $('.fixedNav').css("height"));
//-->
</script>
</nav>
</header>
<!-- ======== START OF CLASS DATA ======== -->
<main role="main">
<div class="header">
<div class="subTitle"><span class="packageLabelInType">Package</span>&nbsp;<a href="package-summary.html">org.apache.iceberg.actions</a></div>
<h2 title="Class BinPackStrategy" class="title">Class BinPackStrategy</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li>java.lang.Object</li>
<li>
<ul class="inheritance">
<li>org.apache.iceberg.actions.BinPackStrategy</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd><code>java.io.Serializable</code>, <code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
</dl>
<dl>
<dt>Direct Known Subclasses:</dt>
<dd><code><a href="SortStrategy.html" title="class in org.apache.iceberg.actions">SortStrategy</a></code>, <code><a href="../spark/actions/Spark3BinPackStrategy.html" title="class in org.apache.iceberg.spark.actions">Spark3BinPackStrategy</a></code></dd>
</dl>
<hr>
<pre>public abstract class <span class="typeNameLabel">BinPackStrategy</span>
extends java.lang.Object
implements <a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></pre>
<div class="block">A rewrite strategy for data files which determines which files to rewrite
based on their size. If files are either smaller than the <a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a> threshold or
larger than the <a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a> threshold, they are considered targets for being rewritten.
<p>
Once selected files are grouped based on a <a href="../util/BinPacking.html" title="class in org.apache.iceberg.util"><code>BinPacking</code></a> into groups defined
by <a href="RewriteDataFiles.html#MAX_FILE_GROUP_SIZE_BYTES"><code>RewriteDataFiles.MAX_FILE_GROUP_SIZE_BYTES</code></a>. Groups will be considered for rewriting if they contain
more files than <a href="#MIN_INPUT_FILES"><code>MIN_INPUT_FILES</code></a> or would produce at least one file of
<a href="RewriteDataFiles.html#TARGET_FILE_SIZE_BYTES"><code>RewriteDataFiles.TARGET_FILE_SIZE_BYTES</code></a>.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../serialized-form.html#org.apache.iceberg.actions.BinPackStrategy">Serialized Form</a></dd>
</dl>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- =========== FIELD SUMMARY =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<table class="memberSummary">
<caption><span>Fields</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colSecond" scope="col">Field</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#DELETE_FILE_THRESHOLD">DELETE_FILE_THRESHOLD</a></span></code></th>
<td class="colLast">
<div class="block">The minimum number of deletes that needs to be associated with a data file for it to be considered for rewriting.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static int</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#DELETE_FILE_THRESHOLD_DEFAULT">DELETE_FILE_THRESHOLD_DEFAULT</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_BYTES">MAX_FILE_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">Adjusts files which will be considered for rewriting.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static double</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_DEFAULT_RATIO">MAX_FILE_SIZE_DEFAULT_RATIO</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_BYTES">MIN_FILE_SIZE_BYTES</a></span></code></th>
<td class="colLast">
<div class="block">Adjusts files which will be considered for rewriting.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static double</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_DEFAULT_RATIO">MIN_FILE_SIZE_DEFAULT_RATIO</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES">MIN_INPUT_FILES</a></span></code></th>
<td class="colLast">
<div class="block">The minimum number of files that need to be in a file group for it to be considered for
compaction if the total size of that group is less than the <a href="RewriteDataFiles.html#TARGET_FILE_SIZE_BYTES"><code>RewriteDataFiles.TARGET_FILE_SIZE_BYTES</code></a>.</div>
</td>
</tr>
<tr class="rowColor">
<td class="colFirst"><code>static int</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES_DEFAULT">MIN_INPUT_FILES_DEFAULT</a></span></code></th>
<td class="colLast">&nbsp;</td>
</tr>
</table>
</li>
</ul>
</section>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Constructor</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr class="altColor">
<th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E()">BinPackStrategy</a></span>()</code></th>
<td class="colLast">&nbsp;</td>
</tr>
</table>
</li>
</ul>
</section>
<!-- ========== METHOD SUMMARY =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colSecond" scope="col">Method</th>
<th class="colLast" scope="col">Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#inputFileSize(java.util.List)">inputFileSize</a></span>&#8203;(java.util.List&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;fileToRewrite)</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#maxGroupSize()">maxGroupSize</a></span>()</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>java.lang.String</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#name()">name</a></span>()</code></th>
<td class="colLast">
<div class="block">Returns the name of this rewrite strategy</div>
</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#numOutputFiles(long)">numOutputFiles</a></span>&#8203;(long&nbsp;totalSizeInBytes)</code></th>
<td class="colLast">
<div class="block">Determine how many output files to create when rewriting.</div>
</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#options(java.util.Map)">options</a></span>&#8203;(java.util.Map&lt;java.lang.String,&#8203;java.lang.String&gt;&nbsp;options)</code></th>
<td class="colLast">
<div class="block">Sets options to be used with this strategy</div>
</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>java.lang.Iterable&lt;java.util.List&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#planFileGroups(java.lang.Iterable)">planFileGroups</a></span>&#8203;(java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;dataFiles)</code></th>
<td class="colLast">
<div class="block">Groups file scans into lists which will be processed in a single executable unit.</div>
</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#selectFilesToRewrite(java.lang.Iterable)">selectFilesToRewrite</a></span>&#8203;(java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;dataFiles)</code></th>
<td class="colLast">
<div class="block">Selects files which this strategy believes are valid targets to be rewritten.</div>
</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#splitSize(long)">splitSize</a></span>&#8203;(long&nbsp;totalSizeInBytes)</code></th>
<td class="colLast">
<div class="block">Returns the smallest of our max write file threshold, and our estimated split size based on
the number of output files we want to generate.</div>
</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#targetFileSize()">targetFileSize</a></span>()</code></th>
<td class="colLast">&nbsp;</td>
</tr>
<tr id="i9" class="rowColor">
<td class="colFirst"><code>java.util.Set&lt;java.lang.String&gt;</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#validOptions()">validOptions</a></span>()</code></th>
<td class="colLast">
<div class="block">Returns a set of options which this rewrite strategy can use.</div>
</td>
</tr>
<tr id="i10" class="altColor">
<td class="colFirst"><code>protected long</code></td>
<th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#writeMaxFileSize()">writeMaxFileSize</a></span>()</code></th>
<td class="colLast">
<div class="block">Estimates a larger max target file size than our target size used in task creation to avoid
tasks which are predicted to have a certain size, but exceed that target size when serde is complete creating
tiny remainder files.</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.Object</h3>
<code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a id="methods.inherited.from.class.org.apache.iceberg.actions.RewriteStrategy">
<!-- -->
</a>
<h3>Methods inherited from interface&nbsp;org.apache.iceberg.actions.<a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></h3>
<code><a href="RewriteStrategy.html#rewriteFiles(java.util.List)">rewriteFiles</a>, <a href="RewriteStrategy.html#table()">table</a></code></li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ============ FIELD DETAIL =========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="field.detail">
<!-- -->
</a>
<h3>Field Detail</h3>
<a id="MIN_INPUT_FILES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_INPUT_FILES</h4>
<pre>public static final&nbsp;java.lang.String MIN_INPUT_FILES</pre>
<div class="block">The minimum number of files that need to be in a file group for it to be considered for
compaction if the total size of that group is less than the <a href="RewriteDataFiles.html#TARGET_FILE_SIZE_BYTES"><code>RewriteDataFiles.TARGET_FILE_SIZE_BYTES</code></a>.
This can also be thought of as the maximum number of non-target-size files that could remain in a file
group (partition) after rewriting.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MIN_INPUT_FILES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_INPUT_FILES_DEFAULT">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_INPUT_FILES_DEFAULT</h4>
<pre>public static final&nbsp;int MIN_INPUT_FILES_DEFAULT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MIN_INPUT_FILES_DEFAULT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_FILE_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_FILE_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String MIN_FILE_SIZE_BYTES</pre>
<div class="block">Adjusts files which will be considered for rewriting. Files smaller than
<a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a> will be considered for rewriting. This functions independently
of <a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a>.
<p>
Defaults to 75% of the target file size</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MIN_FILE_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MIN_FILE_SIZE_DEFAULT_RATIO">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MIN_FILE_SIZE_DEFAULT_RATIO</h4>
<pre>public static final&nbsp;double MIN_FILE_SIZE_DEFAULT_RATIO</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MIN_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_SIZE_BYTES">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_FILE_SIZE_BYTES</h4>
<pre>public static final&nbsp;java.lang.String MAX_FILE_SIZE_BYTES</pre>
<div class="block">Adjusts files which will be considered for rewriting. Files larger than
<a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a> will be considered for rewriting. This functions independently
of <a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a>.
<p>
Defaults to 180% of the target file size</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MAX_FILE_SIZE_BYTES">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="MAX_FILE_SIZE_DEFAULT_RATIO">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>MAX_FILE_SIZE_DEFAULT_RATIO</h4>
<pre>public static final&nbsp;double MAX_FILE_SIZE_DEFAULT_RATIO</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.MAX_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="DELETE_FILE_THRESHOLD">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>DELETE_FILE_THRESHOLD</h4>
<pre>public static final&nbsp;java.lang.String DELETE_FILE_THRESHOLD</pre>
<div class="block">The minimum number of deletes that needs to be associated with a data file for it to be considered for rewriting.
If a data file has this number of deletes or more, it will be rewritten regardless of its file size determined
by <a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a> and <a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a>.
If a file group contains a file that satisfies this condition, the file group will be rewritten regardless of
the number of files in the file group determined by <a href="#MIN_INPUT_FILES"><code>MIN_INPUT_FILES</code></a>
<p>
Defaults to Integer.MAX_VALUE, which means this feature is not enabled by default.</div>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.DELETE_FILE_THRESHOLD">Constant Field Values</a></dd>
</dl>
</li>
</ul>
<a id="DELETE_FILE_THRESHOLD_DEFAULT">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>DELETE_FILE_THRESHOLD_DEFAULT</h4>
<pre>public static final&nbsp;int DELETE_FILE_THRESHOLD_DEFAULT</pre>
<dl>
<dt><span class="seeLabel">See Also:</span></dt>
<dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.BinPackStrategy.DELETE_FILE_THRESHOLD_DEFAULT">Constant Field Values</a></dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a id="&lt;init&gt;()">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>BinPackStrategy</h4>
<pre>public&nbsp;BinPackStrategy()</pre>
</li>
</ul>
</li>
</ul>
</section>
<!-- ============ METHOD DETAIL ========== -->
<section role="region">
<ul class="blockList">
<li class="blockList"><a id="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a id="name()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>name</h4>
<pre class="methodSignature">public&nbsp;java.lang.String&nbsp;name()</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="RewriteStrategy.html#name()">RewriteStrategy</a></code></span></div>
<div class="block">Returns the name of this rewrite strategy</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="RewriteStrategy.html#name()">name</a></code>&nbsp;in interface&nbsp;<code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
</dl>
</li>
</ul>
<a id="validOptions()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>validOptions</h4>
<pre class="methodSignature">public&nbsp;java.util.Set&lt;java.lang.String&gt;&nbsp;validOptions()</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="RewriteStrategy.html#validOptions()">RewriteStrategy</a></code></span></div>
<div class="block">Returns a set of options which this rewrite strategy can use. This is an allowed-list and any options not
specified here will be rejected at runtime.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="RewriteStrategy.html#validOptions()">validOptions</a></code>&nbsp;in interface&nbsp;<code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
</dl>
</li>
</ul>
<a id="options(java.util.Map)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>options</h4>
<pre class="methodSignature">public&nbsp;<a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a>&nbsp;options&#8203;(java.util.Map&lt;java.lang.String,&#8203;java.lang.String&gt;&nbsp;options)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="RewriteStrategy.html#options(java.util.Map)">RewriteStrategy</a></code></span></div>
<div class="block">Sets options to be used with this strategy</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="RewriteStrategy.html#options(java.util.Map)">options</a></code>&nbsp;in interface&nbsp;<code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
</dl>
</li>
</ul>
<a id="selectFilesToRewrite(java.lang.Iterable)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>selectFilesToRewrite</h4>
<pre class="methodSignature">public&nbsp;java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;selectFilesToRewrite&#8203;(java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;dataFiles)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="RewriteStrategy.html#selectFilesToRewrite(java.lang.Iterable)">RewriteStrategy</a></code></span></div>
<div class="block">Selects files which this strategy believes are valid targets to be rewritten.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="RewriteStrategy.html#selectFilesToRewrite(java.lang.Iterable)">selectFilesToRewrite</a></code>&nbsp;in interface&nbsp;<code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>dataFiles</code> - iterable of FileScanTasks for files in a given partition</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>iterable containing only FileScanTasks to be rewritten</dd>
</dl>
</li>
</ul>
<a id="planFileGroups(java.lang.Iterable)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>planFileGroups</h4>
<pre class="methodSignature">public&nbsp;java.lang.Iterable&lt;java.util.List&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&gt;&nbsp;planFileGroups&#8203;(java.lang.Iterable&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;dataFiles)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="RewriteStrategy.html#planFileGroups(java.lang.Iterable)">RewriteStrategy</a></code></span></div>
<div class="block">Groups file scans into lists which will be processed in a single executable unit. Each group will end up being
committed as an independent set of changes. This creates the jobs which will eventually be run as by the underlying
Action.</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="RewriteStrategy.html#planFileGroups(java.lang.Iterable)">planFileGroups</a></code>&nbsp;in interface&nbsp;<code><a href="RewriteStrategy.html" title="interface in org.apache.iceberg.actions">RewriteStrategy</a></code></dd>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>dataFiles</code> - iterable of FileScanTasks to be rewritten</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>iterable of lists of FileScanTasks which will be processed together</dd>
</dl>
</li>
</ul>
<a id="targetFileSize()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>targetFileSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;targetFileSize()</pre>
</li>
</ul>
<a id="numOutputFiles(long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>numOutputFiles</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;numOutputFiles&#8203;(long&nbsp;totalSizeInBytes)</pre>
<div class="block">Determine how many output files to create when rewriting. We use this to determine the split-size
we want to use when actually writing files to avoid the following situation.
<p>
If we are writing 10.1 G of data with a target file size of 1G we would end up with
11 files, one of which would only have 0.1g. This would most likely be less preferable to
10 files each of which was 1.01g. So here we decide whether to round up or round down
based on what the estimated average file size will be if we ignore the remainder (0.1g). If
the new file size is less than 10% greater than the target file size then we will round down
when determining the number of output files.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>totalSizeInBytes</code> - total data size for a file group</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the number of files this strategy should create</dd>
</dl>
</li>
</ul>
<a id="splitSize(long)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;splitSize&#8203;(long&nbsp;totalSizeInBytes)</pre>
<div class="block">Returns the smallest of our max write file threshold, and our estimated split size based on
the number of output files we want to generate. Add a overhead onto the estimated splitSize to try to avoid
small errors in size creating brand-new files.</div>
</li>
</ul>
<a id="inputFileSize(java.util.List)">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>inputFileSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;inputFileSize&#8203;(java.util.List&lt;<a href="../FileScanTask.html" title="interface in org.apache.iceberg">FileScanTask</a>&gt;&nbsp;fileToRewrite)</pre>
</li>
</ul>
<a id="maxGroupSize()">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>maxGroupSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;maxGroupSize()</pre>
</li>
</ul>
<a id="writeMaxFileSize()">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>writeMaxFileSize</h4>
<pre class="methodSignature">protected&nbsp;long&nbsp;writeMaxFileSize()</pre>
<div class="block">Estimates a larger max target file size than our target size used in task creation to avoid
tasks which are predicted to have a certain size, but exceed that target size when serde is complete creating
tiny remainder files.
<p>
While we create tasks that should all be smaller than our target size there is a chance that the actual
data will end up being larger than our target size due to various factors of compression, serialization and
other factors outside our control. If this occurs, instead of making a single file that is close in size to
our target we would end up producing one file of the target size, and then a small extra file with the remaining
data. For example, if our target is 512 MB we may generate a rewrite task that should be 500 MB. When we write
the data we may find we actually have to write out 530 MB. If we use the target size while writing we would
produced a 512 MB file and a 18 MB file. If instead we use a larger size estimated by this method,
then we end up writing a single file.</div>
<dl>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the target size plus one half of the distance between max and target</dd>
</dl>
</li>
</ul>
</li>
</ul>
</section>
</li>
</ul>
</div>
</div>
</main>
<!-- ========= END OF CLASS DATA ========= -->
<footer role="contentinfo">
<nav role="navigation">
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a id="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a id="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../index.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li><a href="#field.summary">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li><a href="#field.detail">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a id="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</nav>
</footer>
</body>
</html>