| <!DOCTYPE HTML> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc --> |
| <title>SizeBasedFileRewriter</title> |
| <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <link rel="stylesheet" type="text/css" href="../../../../jquery/jquery-ui.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| <script type="text/javascript" src="../../../../jquery/jszip/dist/jszip.min.js"></script> |
| <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils.min.js"></script> |
| <!--[if IE]> |
| <script type="text/javascript" src="../../../../jquery/jszip-utils/dist/jszip-utils-ie.min.js"></script> |
| <![endif]--> |
| <script type="text/javascript" src="../../../../jquery/jquery-3.5.1.js"></script> |
| <script type="text/javascript" src="../../../../jquery/jquery-ui.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="SizeBasedFileRewriter"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var data = {"i0":6,"i1":10,"i2":10,"i3":6,"i4":6,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":10,"i12":10,"i13":10,"i14":10}; |
| var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| var pathtoroot = "../../../../"; |
| var useModuleDirectories = true; |
| loadScripts(document, 'script');</script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <header role="banner"> |
| <nav role="navigation"> |
| <div class="fixedNav"> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a id="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a id="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../index.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses.html">All Classes</a></li> |
| </ul> |
| <ul class="navListSearch"> |
| <li><label for="search">SEARCH:</label> |
| <input type="text" id="search" value="search" disabled="disabled"> |
| <input type="reset" id="reset" value="reset" disabled="disabled"> |
| </li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a id="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| </div> |
| <div class="navPadding"> </div> |
| <script type="text/javascript"><!-- |
| $('.navPadding').css('padding-top', $('.fixedNav').css("height")); |
| //--> |
| </script> |
| </nav> |
| </header> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <main role="main"> |
| <div class="header"> |
| <div class="subTitle"><span class="packageLabelInType">Package</span> <a href="package-summary.html">org.apache.iceberg.actions</a></div> |
| <h2 title="Class SizeBasedFileRewriter" class="title">Class SizeBasedFileRewriter<T extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a><F>,​F extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a><F>></h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li>java.lang.Object</li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.iceberg.actions.SizeBasedFileRewriter<T,​F></li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd><code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a><T,​F></code></dd> |
| </dl> |
| <dl> |
| <dt>Direct Known Subclasses:</dt> |
| <dd><code><a href="SizeBasedDataRewriter.html" title="class in org.apache.iceberg.actions">SizeBasedDataRewriter</a></code>, <code><a href="SizeBasedPositionDeletesRewriter.html" title="class in org.apache.iceberg.actions">SizeBasedPositionDeletesRewriter</a></code></dd> |
| </dl> |
| <hr> |
| <pre>public abstract class <span class="typeNameLabel">SizeBasedFileRewriter<T extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a><F>,​F extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a><F>></span> |
| extends java.lang.Object |
| implements <a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a><T,​F></pre> |
| <div class="block">A file rewriter that determines which files to rewrite based on their size. |
| |
| <p>If files are smaller than the <a href="#MIN_FILE_SIZE_BYTES"><code>MIN_FILE_SIZE_BYTES</code></a> threshold or larger than the <a href="#MAX_FILE_SIZE_BYTES"><code>MAX_FILE_SIZE_BYTES</code></a> threshold, they are considered targets for being rewritten. |
| |
| <p>Once selected, files are grouped based on the <a href="../util/BinPacking.html" title="class in org.apache.iceberg.util"><code>bin-packing algorithm</code></a> into |
| groups of no more than <a href="#MAX_FILE_GROUP_SIZE_BYTES"><code>MAX_FILE_GROUP_SIZE_BYTES</code></a>. Groups will be actually rewritten if |
| they contain more than <a href="#MIN_INPUT_FILES"><code>MIN_INPUT_FILES</code></a> or if they would produce at least one file of |
| <a href="#TARGET_FILE_SIZE_BYTES"><code>TARGET_FILE_SIZE_BYTES</code></a>. |
| |
| <p>Note that implementations may add extra conditions for selecting files or filtering groups.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <table class="memberSummary"> |
| <caption><span>Fields</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colSecond" scope="col">Field</th> |
| <th class="colLast" scope="col">Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_GROUP_SIZE_BYTES">MAX_FILE_GROUP_SIZE_BYTES</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">This option controls the largest amount of data that should be rewritten in a single file |
| group.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_GROUP_SIZE_BYTES_DEFAULT">MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</a></span></code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_BYTES">MAX_FILE_SIZE_BYTES</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">Controls which files will be considered for rewriting.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static double</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MAX_FILE_SIZE_DEFAULT_RATIO">MAX_FILE_SIZE_DEFAULT_RATIO</a></span></code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_BYTES">MIN_FILE_SIZE_BYTES</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">Controls which files will be considered for rewriting.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static double</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_FILE_SIZE_DEFAULT_RATIO">MIN_FILE_SIZE_DEFAULT_RATIO</a></span></code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES">MIN_INPUT_FILES</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">Any file group exceeding this number of files will be rewritten regardless of other criteria.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static int</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#MIN_INPUT_FILES_DEFAULT">MIN_INPUT_FILES_DEFAULT</a></span></code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#REWRITE_ALL">REWRITE_ALL</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">Overrides other options and forces rewriting of all provided files.</div> |
| </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>static boolean</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#REWRITE_ALL_DEFAULT">REWRITE_ALL_DEFAULT</a></span></code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static java.lang.String</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#TARGET_FILE_SIZE_BYTES">TARGET_FILE_SIZE_BYTES</a></span></code></th> |
| <td class="colLast"> |
| <div class="block">The target output file size that this file rewriter will attempt to generate.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| </section> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier</th> |
| <th class="colSecond" scope="col">Constructor</th> |
| <th class="colLast" scope="col">Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>protected </code></td> |
| <th class="colConstructorName" scope="row"><code><span class="memberNameLink"><a href="#%3Cinit%3E(org.apache.iceberg.Table)">SizeBasedFileRewriter</a></span>​(<a href="../Table.html" title="interface in org.apache.iceberg">Table</a> table)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| </section> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colSecond" scope="col">Method</th> |
| <th class="colLast" scope="col">Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>protected abstract long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#defaultTargetFileSize()">defaultTargetFileSize</a></span>()</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>protected boolean</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enoughContent(java.util.List)">enoughContent</a></span>​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code>protected boolean</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#enoughInputFiles(java.util.List)">enoughInputFiles</a></span>​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>protected abstract java.lang.Iterable<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>></code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#filterFileGroups(java.util.List)">filterFileGroups</a></span>​(java.util.List<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>> groups)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>protected abstract java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>></code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#filterFiles(java.lang.Iterable)">filterFiles</a></span>​(java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> tasks)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#init(java.util.Map)">init</a></span>​(java.util.Map<java.lang.String,​java.lang.String> options)</code></th> |
| <td class="colLast"> |
| <div class="block">Initializes this rewriter using provided options.</div> |
| </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code>protected long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#inputSize(java.util.List)">inputSize</a></span>​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>protected long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#numOutputFiles(long)">numOutputFiles</a></span>​(long inputSize)</code></th> |
| <td class="colLast"> |
| <div class="block">Determines the preferable number of output files when rewriting a particular file group.</div> |
| </td> |
| </tr> |
| <tr id="i8" class="altColor"> |
| <td class="colFirst"><code>java.lang.Iterable<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>></code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#planFileGroups(java.lang.Iterable)">planFileGroups</a></span>​(java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> tasks)</code></th> |
| <td class="colLast"> |
| <div class="block">Selects files which this rewriter believes are valid targets to be rewritten based on their |
| scan tasks and groups those scan tasks into file groups.</div> |
| </td> |
| </tr> |
| <tr id="i9" class="rowColor"> |
| <td class="colFirst"><code>protected long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#splitSize(long)">splitSize</a></span>​(long inputSize)</code></th> |
| <td class="colLast"> |
| <div class="block">Returns the smallest of our max write file threshold and our estimated split size based on the |
| number of output files we want to generate.</div> |
| </td> |
| </tr> |
| <tr id="i10" class="altColor"> |
| <td class="colFirst"><code>protected <a href="../Table.html" title="interface in org.apache.iceberg">Table</a></code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#table()">table</a></span>()</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i11" class="rowColor"> |
| <td class="colFirst"><code>protected boolean</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#tooMuchContent(java.util.List)">tooMuchContent</a></span>​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| <tr id="i12" class="altColor"> |
| <td class="colFirst"><code>java.util.Set<java.lang.String></code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#validOptions()">validOptions</a></span>()</code></th> |
| <td class="colLast"> |
| <div class="block">Returns a set of supported options for this rewriter.</div> |
| </td> |
| </tr> |
| <tr id="i13" class="rowColor"> |
| <td class="colFirst"><code>protected long</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#writeMaxFileSize()">writeMaxFileSize</a></span>()</code></th> |
| <td class="colLast"> |
| <div class="block">Estimates a larger max target file size than the target size used in task creation to avoid |
| creating tiny remainder files.</div> |
| </td> |
| </tr> |
| <tr id="i14" class="altColor"> |
| <td class="colFirst"><code>protected boolean</code></td> |
| <th class="colSecond" scope="row"><code><span class="memberNameLink"><a href="#wronglySized(T)">wronglySized</a></span>​(<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> task)</code></th> |
| <td class="colLast"> </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a id="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.Object</h3> |
| <code>clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait</code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a id="methods.inherited.from.class.org.apache.iceberg.actions.FileRewriter"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface org.apache.iceberg.actions.<a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a></h3> |
| <code><a href="FileRewriter.html#description()">description</a>, <a href="FileRewriter.html#rewrite(java.util.List)">rewrite</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ FIELD DETAIL =========== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="field.detail"> |
| <!-- --> |
| </a> |
| <h3>Field Detail</h3> |
| <a id="TARGET_FILE_SIZE_BYTES"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>TARGET_FILE_SIZE_BYTES</h4> |
| <pre>public static final java.lang.String TARGET_FILE_SIZE_BYTES</pre> |
| <div class="block">The target output file size that this file rewriter will attempt to generate.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.TARGET_FILE_SIZE_BYTES">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MIN_FILE_SIZE_BYTES"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_FILE_SIZE_BYTES</h4> |
| <pre>public static final java.lang.String MIN_FILE_SIZE_BYTES</pre> |
| <div class="block">Controls which files will be considered for rewriting. Files with sizes under this threshold |
| will be considered for rewriting regardless of any other criteria. |
| |
| <p>Defaults to 75% of the target file size.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_FILE_SIZE_BYTES">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MIN_FILE_SIZE_DEFAULT_RATIO"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_FILE_SIZE_DEFAULT_RATIO</h4> |
| <pre>public static final double MIN_FILE_SIZE_DEFAULT_RATIO</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MAX_FILE_SIZE_BYTES"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MAX_FILE_SIZE_BYTES</h4> |
| <pre>public static final java.lang.String MAX_FILE_SIZE_BYTES</pre> |
| <div class="block">Controls which files will be considered for rewriting. Files with sizes above this threshold |
| will be considered for rewriting regardless of any other criteria. |
| |
| <p>Defaults to 180% of the target file size.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_SIZE_BYTES">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MAX_FILE_SIZE_DEFAULT_RATIO"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MAX_FILE_SIZE_DEFAULT_RATIO</h4> |
| <pre>public static final double MAX_FILE_SIZE_DEFAULT_RATIO</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_SIZE_DEFAULT_RATIO">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MIN_INPUT_FILES"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_INPUT_FILES</h4> |
| <pre>public static final java.lang.String MIN_INPUT_FILES</pre> |
| <div class="block">Any file group exceeding this number of files will be rewritten regardless of other criteria. |
| This config ensures file groups that contain many files are compacted even if the total size of |
| that group is less than the target file size. This can also be thought of as the maximum number |
| of wrongly sized files that could remain in a partition after rewriting.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_INPUT_FILES">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MIN_INPUT_FILES_DEFAULT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MIN_INPUT_FILES_DEFAULT</h4> |
| <pre>public static final int MIN_INPUT_FILES_DEFAULT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MIN_INPUT_FILES_DEFAULT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="REWRITE_ALL"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>REWRITE_ALL</h4> |
| <pre>public static final java.lang.String REWRITE_ALL</pre> |
| <div class="block">Overrides other options and forces rewriting of all provided files.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.REWRITE_ALL">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="REWRITE_ALL_DEFAULT"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>REWRITE_ALL_DEFAULT</h4> |
| <pre>public static final boolean REWRITE_ALL_DEFAULT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.REWRITE_ALL_DEFAULT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MAX_FILE_GROUP_SIZE_BYTES"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>MAX_FILE_GROUP_SIZE_BYTES</h4> |
| <pre>public static final java.lang.String MAX_FILE_GROUP_SIZE_BYTES</pre> |
| <div class="block">This option controls the largest amount of data that should be rewritten in a single file |
| group. It helps with breaking down the rewriting of very large partitions which may not be |
| rewritable otherwise due to the resource constraints of the cluster. For example, a sort-based |
| rewrite may not scale to TB-sized partitions, and those partitions need to be worked on in |
| small subsections to avoid exhaustion of resources.</div> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_GROUP_SIZE_BYTES">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="MAX_FILE_GROUP_SIZE_BYTES_DEFAULT"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</h4> |
| <pre>public static final long MAX_FILE_GROUP_SIZE_BYTES_DEFAULT</pre> |
| <dl> |
| <dt><span class="seeLabel">See Also:</span></dt> |
| <dd><a href="../../../../constant-values.html#org.apache.iceberg.actions.SizeBasedFileRewriter.MAX_FILE_GROUP_SIZE_BYTES_DEFAULT">Constant Field Values</a></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a id="<init>(org.apache.iceberg.Table)"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>SizeBasedFileRewriter</h4> |
| <pre>protected SizeBasedFileRewriter​(<a href="../Table.html" title="interface in org.apache.iceberg">Table</a> table)</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| <!-- ============ METHOD DETAIL ========== --> |
| <section role="region"> |
| <ul class="blockList"> |
| <li class="blockList"><a id="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a id="defaultTargetFileSize()"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>defaultTargetFileSize</h4> |
| <pre class="methodSignature">protected abstract long defaultTargetFileSize()</pre> |
| </li> |
| </ul> |
| <a id="filterFiles(java.lang.Iterable)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>filterFiles</h4> |
| <pre class="methodSignature">protected abstract java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> filterFiles​(java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> tasks)</pre> |
| </li> |
| </ul> |
| <a id="filterFileGroups(java.util.List)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>filterFileGroups</h4> |
| <pre class="methodSignature">protected abstract java.lang.Iterable<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>> filterFileGroups​(java.util.List<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>> groups)</pre> |
| </li> |
| </ul> |
| <a id="table()"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>table</h4> |
| <pre class="methodSignature">protected <a href="../Table.html" title="interface in org.apache.iceberg">Table</a> table()</pre> |
| </li> |
| </ul> |
| <a id="validOptions()"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>validOptions</h4> |
| <pre class="methodSignature">public java.util.Set<java.lang.String> validOptions()</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from interface: <code><a href="FileRewriter.html#validOptions()">FileRewriter</a></code></span></div> |
| <div class="block">Returns a set of supported options for this rewriter. Only options specified in this list will |
| be accepted at runtime. Any other options will be rejected.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="FileRewriter.html#validOptions()">validOptions</a></code> in interface <code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>,​<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="init(java.util.Map)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>init</h4> |
| <pre class="methodSignature">public void init​(java.util.Map<java.lang.String,​java.lang.String> options)</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from interface: <code><a href="FileRewriter.html#init(java.util.Map)">FileRewriter</a></code></span></div> |
| <div class="block">Initializes this rewriter using provided options.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="FileRewriter.html#init(java.util.Map)">init</a></code> in interface <code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>,​<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>options</code> - options to initialize this rewriter</dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="wronglySized(org.apache.iceberg.ContentScanTask)"> |
| <!-- --> |
| </a><a id="wronglySized(T)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>wronglySized</h4> |
| <pre class="methodSignature">protected boolean wronglySized​(<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> task)</pre> |
| </li> |
| </ul> |
| <a id="planFileGroups(java.lang.Iterable)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>planFileGroups</h4> |
| <pre class="methodSignature">public java.lang.Iterable<java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>>> planFileGroups​(java.lang.Iterable<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> tasks)</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from interface: <code><a href="FileRewriter.html#planFileGroups(java.lang.Iterable)">FileRewriter</a></code></span></div> |
| <div class="block">Selects files which this rewriter believes are valid targets to be rewritten based on their |
| scan tasks and groups those scan tasks into file groups. The file groups are then rewritten in |
| a single executable unit, such as a Spark job.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="FileRewriter.html#planFileGroups(java.lang.Iterable)">planFileGroups</a></code> in interface <code><a href="FileRewriter.html" title="interface in org.apache.iceberg.actions">FileRewriter</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a> extends <a href="../ContentScanTask.html" title="interface in org.apache.iceberg">ContentScanTask</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>,​<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a> extends <a href="../ContentFile.html" title="interface in org.apache.iceberg">ContentFile</a><<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">F</a>>></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>tasks</code> - an iterable of scan task for files in a partition</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>groups of scan tasks for files to be rewritten in a single executable unit</dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="enoughInputFiles(java.util.List)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>enoughInputFiles</h4> |
| <pre class="methodSignature">protected boolean enoughInputFiles​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</pre> |
| </li> |
| </ul> |
| <a id="enoughContent(java.util.List)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>enoughContent</h4> |
| <pre class="methodSignature">protected boolean enoughContent​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</pre> |
| </li> |
| </ul> |
| <a id="tooMuchContent(java.util.List)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>tooMuchContent</h4> |
| <pre class="methodSignature">protected boolean tooMuchContent​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</pre> |
| </li> |
| </ul> |
| <a id="inputSize(java.util.List)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>inputSize</h4> |
| <pre class="methodSignature">protected long inputSize​(java.util.List<<a href="SizeBasedFileRewriter.html" title="type parameter in SizeBasedFileRewriter">T</a>> group)</pre> |
| </li> |
| </ul> |
| <a id="splitSize(long)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>splitSize</h4> |
| <pre class="methodSignature">protected long splitSize​(long inputSize)</pre> |
| <div class="block">Returns the smallest of our max write file threshold and our estimated split size based on the |
| number of output files we want to generate. Add an overhead onto the estimated split size to |
| try to avoid small errors in size creating brand-new files.</div> |
| </li> |
| </ul> |
| <a id="numOutputFiles(long)"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>numOutputFiles</h4> |
| <pre class="methodSignature">protected long numOutputFiles​(long inputSize)</pre> |
| <div class="block">Determines the preferable number of output files when rewriting a particular file group. |
| |
| <p>If the rewriter is handling 10.1 GB of data with a target file size of 1 GB, it could |
| produce 11 files, one of which would only have 0.1 GB. This would most likely be less |
| preferable to 10 files with 1.01 GB each. So this method decides whether to round up or round |
| down based on what the estimated average file size will be if the remainder (0.1 GB) is |
| distributed amongst other files. If the new average file size is no more than 10% greater than |
| the target file size, then this method will round down when determining the number of output |
| files. Otherwise, the remainder will be written into a separate file.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>inputSize</code> - a total input size for a file group</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the number of files this rewriter should create</dd> |
| </dl> |
| </li> |
| </ul> |
| <a id="writeMaxFileSize()"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>writeMaxFileSize</h4> |
| <pre class="methodSignature">protected long writeMaxFileSize()</pre> |
| <div class="block">Estimates a larger max target file size than the target size used in task creation to avoid |
| creating tiny remainder files. |
| |
| <p>While we create tasks that should all be smaller than our target size, there is a chance |
| that the actual data will end up being larger than our target size due to various factors of |
| compression, serialization, which are outside our control. If this occurs, instead of making a |
| single file that is close in size to our target, we would end up producing one file of the |
| target size, and then a small extra file with the remaining data. |
| |
| <p>For example, if our target is 512 MB, we may generate a rewrite task that should be 500 MB. |
| When we write the data we may find we actually have to write out 530 MB. If we use the target |
| size while writing, we would produce a 512 MB file and an 18 MB file. If instead we use a |
| larger size estimated by this method, then we end up writing a single file.</div> |
| <dl> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the target size plus one half of the distance between max and target</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </section> |
| </li> |
| </ul> |
| </div> |
| </div> |
| </main> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <footer role="contentinfo"> |
| <nav role="navigation"> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a id="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a id="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../index.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a id="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| </nav> |
| </footer> |
| </body> |
| </html> |