| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_402) on Mon Apr 15 02:02:03 UTC 2024 --> |
| <title>CachedBatchSerializer (Spark 3.4.3 JavaDoc)</title> |
| <meta name="date" content="2024-04-15"> |
| <link rel="stylesheet" type="text/css" href="../../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="CachedBatchSerializer (Spark 3.4.3 JavaDoc)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":6,"i1":6,"i2":6,"i3":6,"i4":6,"i5":6,"i6":6,"i7":6}; |
| var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],4:["t3","Abstract Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/spark/sql/columnar/ExtractableLiteral.html" title="class in org.apache.spark.sql.columnar"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/spark/sql/columnar/CachedBatchSerializer.html" target="_top">Frames</a></li> |
| <li><a href="CachedBatchSerializer.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.spark.sql.columnar</div> |
| <h2 title="Interface CachedBatchSerializer" class="title">Interface CachedBatchSerializer</h2> |
| </div> |
| <div class="contentContainer"> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Superinterfaces:</dt> |
| <dd>java.io.Serializable</dd> |
| </dl> |
| <dl> |
| <dt>All Known Implementing Classes:</dt> |
| <dd><a href="../../../../../org/apache/spark/sql/columnar/SimpleMetricsCachedBatchSerializer.html" title="class in org.apache.spark.sql.columnar">SimpleMetricsCachedBatchSerializer</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public interface <span class="typeNameLabel">CachedBatchSerializer</span> |
| extends scala.Serializable</pre> |
| <div class="block">Provides APIs that handle transformations of SQL data associated with the cache/persist APIs.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t3" class="tableTab"><span><a href="javascript:show(4);">Abstract Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>scala.Function2<Object,scala.collection.Iterator<<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>>,scala.collection.Iterator<<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#buildFilter-scala.collection.Seq-scala.collection.Seq-">buildFilter</a></span>(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Expression> predicates, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cachedAttributes)</code> |
| <div class="block">Builds a function that can be used to filter batches prior to being decompressed.</div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/vectorized/ColumnarBatch.html" title="class in org.apache.spark.sql.vectorized">ColumnarBatch</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#convertCachedBatchToColumnarBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-">convertCachedBatchToColumnarBatch</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cacheAttributes, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> selectedAttributes, |
| org.apache.spark.sql.internal.SQLConf conf)</code> |
| <div class="block">Convert the cached data into a ColumnarBatch.</div> |
| </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><org.apache.spark.sql.catalyst.InternalRow></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#convertCachedBatchToInternalRow-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-">convertCachedBatchToInternalRow</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cacheAttributes, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> selectedAttributes, |
| org.apache.spark.sql.internal.SQLConf conf)</code> |
| <div class="block">Convert the cached batch into <code>InternalRow</code>s.</div> |
| </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#convertColumnarBatchToCachedBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-org.apache.spark.storage.StorageLevel-org.apache.spark.sql.internal.SQLConf-">convertColumnarBatchToCachedBatch</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/vectorized/ColumnarBatch.html" title="class in org.apache.spark.sql.vectorized">ColumnarBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema, |
| <a href="../../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> storageLevel, |
| org.apache.spark.sql.internal.SQLConf conf)</code> |
| <div class="block">Convert an <code>RDD[ColumnarBatch]</code> into an <code>RDD[CachedBatch]</code> in preparation for caching the data.</div> |
| </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#convertInternalRowToCachedBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-org.apache.spark.storage.StorageLevel-org.apache.spark.sql.internal.SQLConf-">convertInternalRowToCachedBatch</a></span>(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><org.apache.spark.sql.catalyst.InternalRow> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema, |
| <a href="../../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> storageLevel, |
| org.apache.spark.sql.internal.SQLConf conf)</code> |
| <div class="block">Convert an <code>RDD[InternalRow]</code> into an <code>RDD[CachedBatch]</code> in preparation for caching the data.</div> |
| </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#supportsColumnarInput-scala.collection.Seq-">supportsColumnarInput</a></span>(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema)</code> |
| <div class="block">Can <code>convertColumnarBatchToCachedBatch()</code> be called instead of |
| <code>convertInternalRowToCachedBatch()</code> for this given schema? True if it can and false if it |
| cannot.</div> |
| </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code>boolean</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#supportsColumnarOutput-org.apache.spark.sql.types.StructType-">supportsColumnarOutput</a></span>(<a href="../../../../../org/apache/spark/sql/types/StructType.html" title="class in org.apache.spark.sql.types">StructType</a> schema)</code> |
| <div class="block">Can <code>convertCachedBatchToColumnarBatch()</code> be called instead of |
| <code>convertCachedBatchToInternalRow()</code> for this given schema? True if it can and false if it |
| cannot.</div> |
| </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>scala.Option<scala.collection.Seq<String>></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../../org/apache/spark/sql/columnar/CachedBatchSerializer.html#vectorTypes-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-">vectorTypes</a></span>(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> attributes, |
| org.apache.spark.sql.internal.SQLConf conf)</code> |
| <div class="block">The exact java types of the columns that are output in columnar processing mode.</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="buildFilter-scala.collection.Seq-scala.collection.Seq-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>buildFilter</h4> |
| <pre>scala.Function2<Object,scala.collection.Iterator<<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>>,scala.collection.Iterator<<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>>> buildFilter(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Expression> predicates, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cachedAttributes)</pre> |
| <div class="block">Builds a function that can be used to filter batches prior to being decompressed. |
| In most cases extending <a href="../../../../../org/apache/spark/sql/columnar/SimpleMetricsCachedBatchSerializer.html" title="class in org.apache.spark.sql.columnar"><code>SimpleMetricsCachedBatchSerializer</code></a> will provide the filter logic |
| necessary. You will need to provide metrics for this to work. <a href="../../../../../org/apache/spark/sql/columnar/SimpleMetricsCachedBatch.html" title="interface in org.apache.spark.sql.columnar"><code>SimpleMetricsCachedBatch</code></a> |
| provides the APIs to hold those metrics and explains the metrics used, really just min and max. |
| Note that this is intended to skip batches that are not needed, and the actual filtering of |
| individual rows is handled later.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>predicates</code> - the set of expressions to use for filtering.</dd> |
| <dd><code>cachedAttributes</code> - the schema/attributes of the data that is cached. This can be helpful |
| if you don't store it with the data.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>a function that takes the partition id and the iterator of batches in the partition. |
| It returns an iterator of batches that should be decompressed.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="convertCachedBatchToColumnarBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>convertCachedBatchToColumnarBatch</h4> |
| <pre><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/vectorized/ColumnarBatch.html" title="class in org.apache.spark.sql.vectorized">ColumnarBatch</a>> convertCachedBatchToColumnarBatch(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cacheAttributes, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> selectedAttributes, |
| org.apache.spark.sql.internal.SQLConf conf)</pre> |
| <div class="block">Convert the cached data into a ColumnarBatch. This currently is only used if |
| <code>supportsColumnarOutput()</code> returns true for the associated schema, but there are other checks |
| that can force row based output. One of the main advantages of doing columnar output over row |
| based output is that the code generation is more standard and can be combined with code |
| generation for downstream operations.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - the cached batches that should be converted.</dd> |
| <dd><code>cacheAttributes</code> - the attributes of the data in the batch.</dd> |
| <dd><code>selectedAttributes</code> - the fields that should be loaded from the data and the order they |
| should appear in the output batch.</dd> |
| <dd><code>conf</code> - the configuration for the job.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>an RDD of the input cached batches transformed into the ColumnarBatch format.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="convertCachedBatchToInternalRow-org.apache.spark.rdd.RDD-scala.collection.Seq-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>convertCachedBatchToInternalRow</h4> |
| <pre><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><org.apache.spark.sql.catalyst.InternalRow> convertCachedBatchToInternalRow(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> cacheAttributes, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> selectedAttributes, |
| org.apache.spark.sql.internal.SQLConf conf)</pre> |
| <div class="block">Convert the cached batch into <code>InternalRow</code>s. If you want this to be performant, code |
| generation is advised.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - the cached batches that should be converted.</dd> |
| <dd><code>cacheAttributes</code> - the attributes of the data in the batch.</dd> |
| <dd><code>selectedAttributes</code> - the field that should be loaded from the data and the order they |
| should appear in the output rows.</dd> |
| <dd><code>conf</code> - the configuration for the job.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>RDD of the rows that were stored in the cached batches.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="convertColumnarBatchToCachedBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-org.apache.spark.storage.StorageLevel-org.apache.spark.sql.internal.SQLConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>convertColumnarBatchToCachedBatch</h4> |
| <pre><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> convertColumnarBatchToCachedBatch(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/vectorized/ColumnarBatch.html" title="class in org.apache.spark.sql.vectorized">ColumnarBatch</a>> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema, |
| <a href="../../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> storageLevel, |
| org.apache.spark.sql.internal.SQLConf conf)</pre> |
| <div class="block">Convert an <code>RDD[ColumnarBatch]</code> into an <code>RDD[CachedBatch]</code> in preparation for caching the data. |
| This will only be called if <code>supportsColumnarInput()</code> returned true for the given schema and |
| the plan up to this point would could produce columnar output without modifying it.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - the input <code>RDD</code> to be converted.</dd> |
| <dd><code>schema</code> - the schema of the data being stored.</dd> |
| <dd><code>storageLevel</code> - where the data will be stored.</dd> |
| <dd><code>conf</code> - the config for the query.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>The data converted into a format more suitable for caching.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="convertInternalRowToCachedBatch-org.apache.spark.rdd.RDD-scala.collection.Seq-org.apache.spark.storage.StorageLevel-org.apache.spark.sql.internal.SQLConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>convertInternalRowToCachedBatch</h4> |
| <pre><a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><<a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar">CachedBatch</a>> convertInternalRowToCachedBatch(<a href="../../../../../org/apache/spark/rdd/RDD.html" title="class in org.apache.spark.rdd">RDD</a><org.apache.spark.sql.catalyst.InternalRow> input, |
| scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema, |
| <a href="../../../../../org/apache/spark/storage/StorageLevel.html" title="class in org.apache.spark.storage">StorageLevel</a> storageLevel, |
| org.apache.spark.sql.internal.SQLConf conf)</pre> |
| <div class="block">Convert an <code>RDD[InternalRow]</code> into an <code>RDD[CachedBatch]</code> in preparation for caching the data.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>input</code> - the input <code>RDD</code> to be converted.</dd> |
| <dd><code>schema</code> - the schema of the data being stored.</dd> |
| <dd><code>storageLevel</code> - where the data will be stored.</dd> |
| <dd><code>conf</code> - the config for the query.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>The data converted into a format more suitable for caching.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="supportsColumnarInput-scala.collection.Seq-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>supportsColumnarInput</h4> |
| <pre>boolean supportsColumnarInput(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> schema)</pre> |
| <div class="block">Can <code>convertColumnarBatchToCachedBatch()</code> be called instead of |
| <code>convertInternalRowToCachedBatch()</code> for this given schema? True if it can and false if it |
| cannot. Columnar input is only supported if the plan could produce columnar output. Currently |
| this is mostly supported by input formats like parquet and orc, but more operations are likely |
| to be supported soon.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>schema</code> - the schema of the data being stored.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>True if columnar input can be supported, else false.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="supportsColumnarOutput-org.apache.spark.sql.types.StructType-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>supportsColumnarOutput</h4> |
| <pre>boolean supportsColumnarOutput(<a href="../../../../../org/apache/spark/sql/types/StructType.html" title="class in org.apache.spark.sql.types">StructType</a> schema)</pre> |
| <div class="block">Can <code>convertCachedBatchToColumnarBatch()</code> be called instead of |
| <code>convertCachedBatchToInternalRow()</code> for this given schema? True if it can and false if it |
| cannot. Columnar output is typically preferred because it is more efficient. Note that |
| <code>convertCachedBatchToInternalRow()</code> must always be supported as there are other checks that |
| can force row based output.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>schema</code> - the schema of the data being checked.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>true if columnar output should be used for this schema, else false.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="vectorTypes-scala.collection.Seq-org.apache.spark.sql.internal.SQLConf-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>vectorTypes</h4> |
| <pre>scala.Option<scala.collection.Seq<String>> vectorTypes(scala.collection.Seq<org.apache.spark.sql.catalyst.expressions.Attribute> attributes, |
| org.apache.spark.sql.internal.SQLConf conf)</pre> |
| <div class="block">The exact java types of the columns that are output in columnar processing mode. This |
| is a performance optimization for code generation and is optional.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>attributes</code> - the attributes to be output.</dd> |
| <dd><code>conf</code> - the config for the query that will read the data.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>(undocumented)</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../../org/apache/spark/sql/columnar/CachedBatch.html" title="interface in org.apache.spark.sql.columnar"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../../org/apache/spark/sql/columnar/ExtractableLiteral.html" title="class in org.apache.spark.sql.columnar"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../../index.html?org/apache/spark/sql/columnar/CachedBatchSerializer.html" target="_top">Frames</a></li> |
| <li><a href="CachedBatchSerializer.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li>Constr | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <script defer="defer" type="text/javascript" src="../../../../../lib/jquery.js"></script><script defer="defer" type="text/javascript" src="../../../../../lib/api-javadocs.js"></script></body> |
| </html> |