blob: 61a5d0a83d84afbf10d7d29cb5f172e5d578368d [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content='The following options for repartition by range are possible:
1. Return a new SparkDataFrame range partitioned by
the given columns into numPartitions.
2. Return a new SparkDataFrame range partitioned by the given column(s),
using spark.sql.shuffle.partitions as number of partitions.
At least one partition-by expression must be specified.
When no explicit sort order is specified, "ascending nulls first" is assumed.'><!-- Inform modern browsers that this page supports both dark and light color schemes,
and the page author prefers light. --><meta name="color-scheme" content="dark light"><script>
// If `prefers-color-scheme` is not supported, fall back to light mode.
// i.e. In this case, inject the `light` CSS before the others, with
// no media filter so that it will be downloaded with highest priority.
if (window.matchMedia("(prefers-color-scheme: dark)").media === "not all") {
document.documentElement.style.display = "none";
document.head.insertAdjacentHTML(
"beforeend",
"<link id=\"css\" rel=\"stylesheet\" href=\"https://bootswatch.com/5/flatly/bootstrap.css\" onload=\"document.documentElement.style.display = ''\">"
);
}
</script><title>Repartition by range — repartitionByRange • SparkR</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet"><meta property="og:title" content="Repartition by range — repartitionByRange"><meta property="og:description" content='The following options for repartition by range are possible:
1. Return a new SparkDataFrame range partitioned by
the given columns into numPartitions.
2. Return a new SparkDataFrame range partitioned by the given column(s),
using spark.sql.shuffle.partitions as number of partitions.
At least one partition-by expression must be specified.
When no explicit sort order is specified, "ascending nulls first" is assumed.'><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Flatly Theme - Light --><link id="css-light" rel="stylesheet" href="https://bootswatch.com/5/flatly/bootstrap.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><!-- Darkly Theme - Dark --><link id="css-dark" rel="stylesheet" href="https://bootswatch.com/5/darkly/bootstrap.css" media="(prefers-color-scheme: dark)"><!-- preferably CSS --><link rel="stylesheet" href="../preferably.css"><link id="css-code-light" rel="stylesheet" href="../code-color-scheme-light.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><link id="css-code-dark" rel="stylesheet" href="../code-color-scheme-dark.css" media="(prefers-color-scheme: dark)"><script src="../darkswitch.js"></script></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
<a class="external-link navbar-brand" href="https://spark.apache.org/">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" alt="" max-height="100%"></a>
<a class="navbar-brand me-2" href="../index.html">SparkR</a>
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">4.0.0</small>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-2">
<ul class="navbar-nav me-auto"><li class="active nav-item">
<a class="nav-link" href="../reference/index.html">Reference</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<a class="dropdown-item" href="../articles/sparkr-vignettes.html">SparkR - Practical Guide</a>
</div>
</li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
<ul class="navbar-nav"><li>
<a class="external-link nav-link" id="css-toggle-btn" aria-label="github">
<span class="fas fa fas fa-adjust fa-lg"></span>
</a>
</li>
</ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="" class="logo" alt=""><h1>Repartition by range</h1>
<div class="d-none name"><code>repartitionByRange.Rd</code></div>
</div>
<div class="ref-description section level2">
<p>The following options for repartition by range are possible:</p><ul><li><p>1. Return a new SparkDataFrame range partitioned by
the given columns into <code>numPartitions</code>.</p></li>
<li><p>2. Return a new SparkDataFrame range partitioned by the given column(s),
using <code>spark.sql.shuffle.partitions</code> as number of partitions.</p></li>
</ul><p>At least one partition-by expression must be specified.
When no explicit sort order is specified, "ascending nulls first" is assumed.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">repartitionByRange</span><span class="op">(</span><span class="va">x</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for SparkDataFrame</span></span>
<span><span class="fu">repartitionByRange</span><span class="op">(</span><span class="va">x</span>, numPartitions <span class="op">=</span> <span class="cn">NULL</span>, col <span class="op">=</span> <span class="cn">NULL</span>, <span class="va">...</span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt>x</dt>
<dd><p>a SparkDataFrame.</p></dd>
<dt>...</dt>
<dd><p>additional column(s) to be used in the range partitioning.</p></dd>
<dt>numPartitions</dt>
<dd><p>the number of partitions to use.</p></dd>
<dt>col</dt>
<dd><p>the column by which the range partitioning will be performed.</p></dd>
</dl></div>
<div class="section level2">
<h2 id="details">Details<a class="anchor" aria-label="anchor" href="#details"></a></h2>
<p>Note that due to performance reasons this method uses sampling to estimate the ranges.
Hence, the output may not be consistent, since sampling can return different values.
The sample size can be controlled by the config
<code>spark.sql.execution.rangeExchange.sampleSizePerPartition</code>.</p>
</div>
<div class="section level2">
<h2 id="note">Note<a class="anchor" aria-label="anchor" href="#note"></a></h2>
<p>repartitionByRange since 2.4.0</p>
</div>
<div class="section level2">
<h2 id="see-also">See also<a class="anchor" aria-label="anchor" href="#see-also"></a></h2>
<div class="dont-index"><p><a href="repartition.html">repartition</a>, <a href="coalesce.html">coalesce</a></p>
<p>Other SparkDataFrame functions:
<code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>,
<code><a href="summarize.html">agg</a>()</code>,
<code><a href="alias.html">alias</a>()</code>,
<code><a href="arrange.html">arrange</a>()</code>,
<code><a href="as.data.frame.html">as.data.frame</a>()</code>,
<code><a href="attach.html">attach,SparkDataFrame-method</a></code>,
<code><a href="broadcast.html">broadcast</a>()</code>,
<code><a href="cache.html">cache</a>()</code>,
<code><a href="checkpoint.html">checkpoint</a>()</code>,
<code><a href="coalesce.html">coalesce</a>()</code>,
<code><a href="collect.html">collect</a>()</code>,
<code><a href="columns.html">colnames</a>()</code>,
<code><a href="coltypes.html">coltypes</a>()</code>,
<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a>()</code>,
<code><a href="crossJoin.html">crossJoin</a>()</code>,
<code><a href="cube.html">cube</a>()</code>,
<code><a href="dapplyCollect.html">dapplyCollect</a>()</code>,
<code><a href="dapply.html">dapply</a>()</code>,
<code><a href="describe.html">describe</a>()</code>,
<code><a href="dim.html">dim</a>()</code>,
<code><a href="distinct.html">distinct</a>()</code>,
<code><a href="dropDuplicates.html">dropDuplicates</a>()</code>,
<code><a href="nafunctions.html">dropna</a>()</code>,
<code><a href="drop.html">drop</a>()</code>,
<code><a href="dtypes.html">dtypes</a>()</code>,
<code><a href="exceptAll.html">exceptAll</a>()</code>,
<code><a href="except.html">except</a>()</code>,
<code><a href="explain.html">explain</a>()</code>,
<code><a href="filter.html">filter</a>()</code>,
<code><a href="first.html">first</a>()</code>,
<code><a href="gapplyCollect.html">gapplyCollect</a>()</code>,
<code><a href="gapply.html">gapply</a>()</code>,
<code><a href="getNumPartitions.html">getNumPartitions</a>()</code>,
<code><a href="groupBy.html">group_by</a>()</code>,
<code><a href="head.html">head</a>()</code>,
<code><a href="hint.html">hint</a>()</code>,
<code><a href="histogram.html">histogram</a>()</code>,
<code><a href="insertInto.html">insertInto</a>()</code>,
<code><a href="intersectAll.html">intersectAll</a>()</code>,
<code><a href="intersect.html">intersect</a>()</code>,
<code><a href="isLocal.html">isLocal</a>()</code>,
<code><a href="isStreaming.html">isStreaming</a>()</code>,
<code><a href="join.html">join</a>()</code>,
<code><a href="limit.html">limit</a>()</code>,
<code><a href="localCheckpoint.html">localCheckpoint</a>()</code>,
<code><a href="merge.html">merge</a>()</code>,
<code><a href="mutate.html">mutate</a>()</code>,
<code><a href="ncol.html">ncol</a>()</code>,
<code><a href="nrow.html">nrow</a>()</code>,
<code><a href="persist.html">persist</a>()</code>,
<code><a href="printSchema.html">printSchema</a>()</code>,
<code><a href="randomSplit.html">randomSplit</a>()</code>,
<code><a href="rbind.html">rbind</a>()</code>,
<code><a href="rename.html">rename</a>()</code>,
<code><a href="repartition.html">repartition</a>()</code>,
<code><a href="rollup.html">rollup</a>()</code>,
<code><a href="sample.html">sample</a>()</code>,
<code><a href="saveAsTable.html">saveAsTable</a>()</code>,
<code><a href="schema.html">schema</a>()</code>,
<code><a href="selectExpr.html">selectExpr</a>()</code>,
<code><a href="select.html">select</a>()</code>,
<code><a href="showDF.html">showDF</a>()</code>,
<code><a href="show.html">show</a>()</code>,
<code><a href="storageLevel.html">storageLevel</a>()</code>,
<code><a href="str.html">str</a>()</code>,
<code><a href="subset.html">subset</a>()</code>,
<code><a href="summary.html">summary</a>()</code>,
<code><a href="take.html">take</a>()</code>,
<code><a href="toJSON.html">toJSON</a>()</code>,
<code><a href="unionAll.html">unionAll</a>()</code>,
<code><a href="unionByName.html">unionByName</a>()</code>,
<code><a href="union.html">union</a>()</code>,
<code><a href="unpersist.html">unpersist</a>()</code>,
<code><a href="unpivot.html">unpivot</a>()</code>,
<code><a href="withColumn.html">withColumn</a>()</code>,
<code><a href="withWatermark.html">withWatermark</a>()</code>,
<code><a href="with.html">with</a>()</code>,
<code><a href="write.df.html">write.df</a>()</code>,
<code><a href="write.jdbc.html">write.jdbc</a>()</code>,
<code><a href="write.json.html">write.json</a>()</code>,
<code><a href="write.orc.html">write.orc</a>()</code>,
<code><a href="write.parquet.html">write.parquet</a>()</code>,
<code><a href="write.stream.html">write.stream</a>()</code>,
<code><a href="write.text.html">write.text</a>()</code></p></div>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="kw">if</span> <span class="op">(</span><span class="cn">FALSE</span><span class="op">)</span> <span class="op">{</span></span></span>
<span class="r-in"><span><span class="fu"><a href="sparkR.session.html">sparkR.session</a></span><span class="op">(</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">path</span> <span class="op">&lt;-</span> <span class="st">"path/to/file.json"</span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu"><a href="read.json.html">read.json</a></span><span class="op">(</span><span class="va">path</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">newDF</span> <span class="op">&lt;-</span> <span class="fu">repartitionByRange</span><span class="op">(</span><span class="va">df</span>, col <span class="op">=</span> <span class="va">df</span><span class="op">$</span><span class="va">col1</span>, <span class="va">df</span><span class="op">$</span><span class="va">col2</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">newDF</span> <span class="op">&lt;-</span> <span class="fu">repartitionByRange</span><span class="op">(</span><span class="va">df</span>, <span class="fl">3L</span>, col <span class="op">=</span> <span class="va">df</span><span class="op">$</span><span class="va">col1</span>, <span class="va">df</span><span class="op">$</span><span class="va">col2</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="op">}</span></span></span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="copyright">
<p></p><p>Developed by <a href="https://www.apache.org/" class="external-link"> The Apache Software Foundation</a>.</p>
</div>
<div class="pkgdown">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.9.</p>
<p class="preferably">Using <a href="https://preferably.amirmasoudabdol.name/?source=footer" class="external-link">preferably</a> template.</p>
</div>
</footer></div>
</body></html>