blob: 48b2853e8638636fb653cde01c0c728bbd18c7b5 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head><title>R: Subset</title>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="stylesheet" type="text/css" href="R.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/languages/r.min.js"></script>
<script>hljs.initHighlightingOnLoad();</script>
</head><body>
<table width="100%" summary="page for [[ {SparkR}"><tr><td>[[ {SparkR}</td><td align="right">R Documentation</td></tr></table>
<h2>Subset</h2>
<h3>Description</h3>
<p>Return subsets of SparkDataFrame according to given conditions
</p>
<h3>Usage</h3>
<pre>
## S4 method for signature 'SparkDataFrame,numericOrcharacter'
x[[i]]
## S4 replacement method for signature 'SparkDataFrame,numericOrcharacter'
x[[i]] &lt;- value
## S4 method for signature 'SparkDataFrame'
x[i, j, ..., drop = F]
## S4 method for signature 'SparkDataFrame'
subset(x, subset, select, drop = F, ...)
subset(x, ...)
</pre>
<h3>Arguments</h3>
<table summary="R argblock">
<tr valign="top"><td><code>x</code></td>
<td>
<p>a SparkDataFrame.</p>
</td></tr>
<tr valign="top"><td><code>i,subset</code></td>
<td>
<p>(Optional) a logical expression to filter on rows.
For extract operator [[ and replacement operator [[&lt;-, the indexing parameter for
a single Column.</p>
</td></tr>
<tr valign="top"><td><code>value</code></td>
<td>
<p>a Column or an atomic vector in the length of 1 as literal value, or <code>NULL</code>.
If <code>NULL</code>, the specified Column is dropped.</p>
</td></tr>
<tr valign="top"><td><code>j,select</code></td>
<td>
<p>expression for the single Column or a list of columns to select from the SparkDataFrame.</p>
</td></tr>
<tr valign="top"><td><code>...</code></td>
<td>
<p>currently not used.</p>
</td></tr>
<tr valign="top"><td><code>drop</code></td>
<td>
<p>if TRUE, a Column will be returned if the resulting dataset has only one column.
Otherwise, a SparkDataFrame will always be returned.</p>
</td></tr>
</table>
<h3>Value</h3>
<p>A new SparkDataFrame containing only the rows that meet the condition with selected columns.
</p>
<h3>Note</h3>
<p>[[ since 1.4.0
</p>
<p>[[&lt;- since 2.1.1
</p>
<p>[ since 1.4.0
</p>
<p>subset since 1.5.0
</p>
<h3>See Also</h3>
<p><a href="withColumn.html">withColumn</a>
</p>
<p>Other SparkDataFrame functions: <code><a href="select.html">$</a></code>,
<code><a href="select.html">$,SparkDataFrame-method</a></code>, <code><a href="select.html">$&lt;-</a></code>,
<code><a href="select.html">$&lt;-,SparkDataFrame-method</a></code>,
<code><a href="select.html">select</a></code>, <code><a href="select.html">select</a></code>,
<code><a href="select.html">select,SparkDataFrame,Column-method</a></code>,
<code><a href="select.html">select,SparkDataFrame,character-method</a></code>,
<code><a href="select.html">select,SparkDataFrame,list-method</a></code>;
<code><a href="SparkDataFrame.html">SparkDataFrame-class</a></code>; <code><a href="summarize.html">agg</a></code>,
<code><a href="summarize.html">agg</a></code>, <code><a href="summarize.html">agg</a></code>,
<code><a href="summarize.html">agg,GroupedData-method</a></code>,
<code><a href="summarize.html">agg,SparkDataFrame-method</a></code>,
<code><a href="summarize.html">summarize</a></code>, <code><a href="summarize.html">summarize</a></code>,
<code><a href="summarize.html">summarize</a></code>,
<code><a href="summarize.html">summarize,GroupedData-method</a></code>,
<code><a href="summarize.html">summarize,SparkDataFrame-method</a></code>;
<code><a href="arrange.html">arrange</a></code>, <code><a href="arrange.html">arrange</a></code>,
<code><a href="arrange.html">arrange</a></code>,
<code><a href="arrange.html">arrange,SparkDataFrame,Column-method</a></code>,
<code><a href="arrange.html">arrange,SparkDataFrame,character-method</a></code>,
<code><a href="arrange.html">orderBy,SparkDataFrame,characterOrColumn-method</a></code>;
<code><a href="as.data.frame.html">as.data.frame</a></code>,
<code><a href="as.data.frame.html">as.data.frame,SparkDataFrame-method</a></code>;
<code><a href="attach.html">attach</a></code>,
<code><a href="attach.html">attach,SparkDataFrame-method</a></code>;
<code><a href="cache.html">cache</a></code>, <code><a href="cache.html">cache</a></code>,
<code><a href="cache.html">cache,SparkDataFrame-method</a></code>;
<code><a href="coalesce.html">coalesce</a></code>, <code><a href="coalesce.html">coalesce</a></code>,
<code><a href="coalesce.html">coalesce</a></code>,
<code><a href="coalesce.html">coalesce,Column-method</a></code>,
<code><a href="coalesce.html">coalesce,SparkDataFrame-method</a></code>;
<code><a href="collect.html">collect</a></code>, <code><a href="collect.html">collect</a></code>,
<code><a href="collect.html">collect,SparkDataFrame-method</a></code>;
<code><a href="columns.html">colnames</a></code>, <code><a href="columns.html">colnames</a></code>,
<code><a href="columns.html">colnames,SparkDataFrame-method</a></code>,
<code><a href="columns.html">colnames&lt;-</a></code>, <code><a href="columns.html">colnames&lt;-</a></code>,
<code><a href="columns.html">colnames&lt;-,SparkDataFrame-method</a></code>,
<code><a href="columns.html">columns</a></code>, <code><a href="columns.html">columns</a></code>,
<code><a href="columns.html">columns,SparkDataFrame-method</a></code>,
<code><a href="columns.html">names</a></code>,
<code><a href="columns.html">names,SparkDataFrame-method</a></code>,
<code><a href="columns.html">names&lt;-</a></code>,
<code><a href="columns.html">names&lt;-,SparkDataFrame-method</a></code>;
<code><a href="coltypes.html">coltypes</a></code>, <code><a href="coltypes.html">coltypes</a></code>,
<code><a href="coltypes.html">coltypes,SparkDataFrame-method</a></code>,
<code><a href="coltypes.html">coltypes&lt;-</a></code>, <code><a href="coltypes.html">coltypes&lt;-</a></code>,
<code><a href="coltypes.html">coltypes&lt;-,SparkDataFrame,character-method</a></code>;
<code><a href="nrow.html">count,SparkDataFrame-method</a></code>,
<code><a href="nrow.html">nrow</a></code>, <code><a href="nrow.html">nrow</a></code>,
<code><a href="nrow.html">nrow,SparkDataFrame-method</a></code>;
<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></code>,
<code><a href="createOrReplaceTempView.html">createOrReplaceTempView</a></code>,
<code><a href="createOrReplaceTempView.html">createOrReplaceTempView,SparkDataFrame,character-method</a></code>;
<code><a href="crossJoin.html">crossJoin</a></code>,
<code><a href="crossJoin.html">crossJoin,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="dapplyCollect.html">dapplyCollect</a></code>, <code><a href="dapplyCollect.html">dapplyCollect</a></code>,
<code><a href="dapplyCollect.html">dapplyCollect,SparkDataFrame,function-method</a></code>;
<code><a href="dapply.html">dapply</a></code>, <code><a href="dapply.html">dapply</a></code>,
<code><a href="dapply.html">dapply,SparkDataFrame,function,structType-method</a></code>;
<code><a href="summary.html">describe</a></code>, <code><a href="summary.html">describe</a></code>,
<code><a href="summary.html">describe</a></code>,
<code><a href="summary.html">describe,SparkDataFrame,ANY-method</a></code>,
<code><a href="summary.html">describe,SparkDataFrame,character-method</a></code>,
<code><a href="summary.html">describe,SparkDataFrame-method</a></code>,
<code><a href="summary.html">summary</a></code>, <code><a href="summary.html">summary</a></code>,
<code><a href="summary.html">summary,SparkDataFrame-method</a></code>;
<code><a href="dim.html">dim</a></code>,
<code><a href="dim.html">dim,SparkDataFrame-method</a></code>;
<code><a href="distinct.html">distinct</a></code>, <code><a href="distinct.html">distinct</a></code>,
<code><a href="distinct.html">distinct,SparkDataFrame-method</a></code>,
<code><a href="distinct.html">unique</a></code>,
<code><a href="distinct.html">unique,SparkDataFrame-method</a></code>;
<code><a href="dropDuplicates.html">dropDuplicates</a></code>,
<code><a href="dropDuplicates.html">dropDuplicates</a></code>,
<code><a href="dropDuplicates.html">dropDuplicates,SparkDataFrame-method</a></code>;
<code><a href="nafunctions.html">dropna</a></code>, <code><a href="nafunctions.html">dropna</a></code>,
<code><a href="nafunctions.html">dropna,SparkDataFrame-method</a></code>,
<code><a href="nafunctions.html">fillna</a></code>, <code><a href="nafunctions.html">fillna</a></code>,
<code><a href="nafunctions.html">fillna,SparkDataFrame-method</a></code>,
<code><a href="nafunctions.html">na.omit</a></code>, <code><a href="nafunctions.html">na.omit</a></code>,
<code><a href="nafunctions.html">na.omit,SparkDataFrame-method</a></code>;
<code><a href="drop.html">drop</a></code>, <code><a href="drop.html">drop</a></code>,
<code><a href="drop.html">drop</a></code>, <code><a href="drop.html">drop,ANY-method</a></code>,
<code><a href="drop.html">drop,SparkDataFrame-method</a></code>;
<code><a href="dtypes.html">dtypes</a></code>, <code><a href="dtypes.html">dtypes</a></code>,
<code><a href="dtypes.html">dtypes,SparkDataFrame-method</a></code>;
<code><a href="except.html">except</a></code>, <code><a href="except.html">except</a></code>,
<code><a href="except.html">except,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="explain.html">explain</a></code>, <code><a href="explain.html">explain</a></code>,
<code><a href="explain.html">explain,SparkDataFrame-method</a></code>;
<code><a href="filter.html">filter</a></code>, <code><a href="filter.html">filter</a></code>,
<code><a href="filter.html">filter,SparkDataFrame,characterOrColumn-method</a></code>,
<code><a href="filter.html">where</a></code>, <code><a href="filter.html">where</a></code>,
<code><a href="filter.html">where,SparkDataFrame,characterOrColumn-method</a></code>;
<code><a href="first.html">first</a></code>, <code><a href="first.html">first</a></code>,
<code><a href="first.html">first</a></code>,
<code><a href="first.html">first,SparkDataFrame-method</a></code>,
<code><a href="first.html">first,characterOrColumn-method</a></code>;
<code><a href="gapplyCollect.html">gapplyCollect</a></code>, <code><a href="gapplyCollect.html">gapplyCollect</a></code>,
<code><a href="gapplyCollect.html">gapplyCollect</a></code>,
<code><a href="gapplyCollect.html">gapplyCollect,GroupedData-method</a></code>,
<code><a href="gapplyCollect.html">gapplyCollect,SparkDataFrame-method</a></code>;
<code><a href="gapply.html">gapply</a></code>, <code><a href="gapply.html">gapply</a></code>,
<code><a href="gapply.html">gapply</a></code>,
<code><a href="gapply.html">gapply,GroupedData-method</a></code>,
<code><a href="gapply.html">gapply,SparkDataFrame-method</a></code>;
<code><a href="getNumPartitions.html">getNumPartitions</a></code>,
<code><a href="getNumPartitions.html">getNumPartitions,SparkDataFrame-method</a></code>;
<code><a href="groupBy.html">groupBy</a></code>, <code><a href="groupBy.html">groupBy</a></code>,
<code><a href="groupBy.html">groupBy,SparkDataFrame-method</a></code>,
<code><a href="groupBy.html">group_by</a></code>, <code><a href="groupBy.html">group_by</a></code>,
<code><a href="groupBy.html">group_by,SparkDataFrame-method</a></code>;
<code><a href="head.html">head</a></code>,
<code><a href="head.html">head,SparkDataFrame-method</a></code>;
<code><a href="histogram.html">histogram</a></code>,
<code><a href="histogram.html">histogram,SparkDataFrame,characterOrColumn-method</a></code>;
<code><a href="insertInto.html">insertInto</a></code>, <code><a href="insertInto.html">insertInto</a></code>,
<code><a href="insertInto.html">insertInto,SparkDataFrame,character-method</a></code>;
<code><a href="intersect.html">intersect</a></code>, <code><a href="intersect.html">intersect</a></code>,
<code><a href="intersect.html">intersect,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="isLocal.html">isLocal</a></code>, <code><a href="isLocal.html">isLocal</a></code>,
<code><a href="isLocal.html">isLocal,SparkDataFrame-method</a></code>;
<code><a href="join.html">join</a></code>,
<code><a href="join.html">join,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="limit.html">limit</a></code>, <code><a href="limit.html">limit</a></code>,
<code><a href="limit.html">limit,SparkDataFrame,numeric-method</a></code>;
<code><a href="merge.html">merge</a></code>, <code><a href="merge.html">merge</a></code>,
<code><a href="merge.html">merge,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="mutate.html">mutate</a></code>, <code><a href="mutate.html">mutate</a></code>,
<code><a href="mutate.html">mutate,SparkDataFrame-method</a></code>,
<code><a href="mutate.html">transform</a></code>, <code><a href="mutate.html">transform</a></code>,
<code><a href="mutate.html">transform,SparkDataFrame-method</a></code>;
<code><a href="ncol.html">ncol</a></code>,
<code><a href="ncol.html">ncol,SparkDataFrame-method</a></code>;
<code><a href="persist.html">persist</a></code>, <code><a href="persist.html">persist</a></code>,
<code><a href="persist.html">persist,SparkDataFrame,character-method</a></code>;
<code><a href="printSchema.html">printSchema</a></code>, <code><a href="printSchema.html">printSchema</a></code>,
<code><a href="printSchema.html">printSchema,SparkDataFrame-method</a></code>;
<code><a href="randomSplit.html">randomSplit</a></code>, <code><a href="randomSplit.html">randomSplit</a></code>,
<code><a href="randomSplit.html">randomSplit,SparkDataFrame,numeric-method</a></code>;
<code><a href="rbind.html">rbind</a></code>, <code><a href="rbind.html">rbind</a></code>,
<code><a href="rbind.html">rbind,SparkDataFrame-method</a></code>;
<code><a href="registerTempTable-deprecated.html">registerTempTable</a></code>,
<code><a href="registerTempTable-deprecated.html">registerTempTable</a></code>,
<code><a href="registerTempTable-deprecated.html">registerTempTable,SparkDataFrame,character-method</a></code>;
<code><a href="rename.html">rename</a></code>, <code><a href="rename.html">rename</a></code>,
<code><a href="rename.html">rename,SparkDataFrame-method</a></code>,
<code><a href="rename.html">withColumnRenamed</a></code>,
<code><a href="rename.html">withColumnRenamed</a></code>,
<code><a href="rename.html">withColumnRenamed,SparkDataFrame,character,character-method</a></code>;
<code><a href="repartition.html">repartition</a></code>, <code><a href="repartition.html">repartition</a></code>,
<code><a href="repartition.html">repartition,SparkDataFrame-method</a></code>;
<code><a href="sample.html">sample</a></code>, <code><a href="sample.html">sample</a></code>,
<code><a href="sample.html">sample,SparkDataFrame,logical,numeric-method</a></code>,
<code><a href="sample.html">sample_frac</a></code>, <code><a href="sample.html">sample_frac</a></code>,
<code><a href="sample.html">sample_frac,SparkDataFrame,logical,numeric-method</a></code>;
<code><a href="write.parquet.html">saveAsParquetFile</a></code>,
<code><a href="write.parquet.html">saveAsParquetFile</a></code>,
<code><a href="write.parquet.html">saveAsParquetFile,SparkDataFrame,character-method</a></code>,
<code><a href="write.parquet.html">write.parquet</a></code>, <code><a href="write.parquet.html">write.parquet</a></code>,
<code><a href="write.parquet.html">write.parquet,SparkDataFrame,character-method</a></code>;
<code><a href="saveAsTable.html">saveAsTable</a></code>, <code><a href="saveAsTable.html">saveAsTable</a></code>,
<code><a href="saveAsTable.html">saveAsTable,SparkDataFrame,character-method</a></code>;
<code><a href="write.df.html">saveDF</a></code>, <code><a href="write.df.html">saveDF</a></code>,
<code><a href="write.df.html">saveDF,SparkDataFrame,character-method</a></code>,
<code><a href="write.df.html">write.df</a></code>, <code><a href="write.df.html">write.df</a></code>,
<code><a href="write.df.html">write.df</a></code>,
<code><a href="write.df.html">write.df,SparkDataFrame-method</a></code>;
<code><a href="schema.html">schema</a></code>, <code><a href="schema.html">schema</a></code>,
<code><a href="schema.html">schema,SparkDataFrame-method</a></code>;
<code><a href="selectExpr.html">selectExpr</a></code>, <code><a href="selectExpr.html">selectExpr</a></code>,
<code><a href="selectExpr.html">selectExpr,SparkDataFrame,character-method</a></code>;
<code><a href="showDF.html">showDF</a></code>, <code><a href="showDF.html">showDF</a></code>,
<code><a href="showDF.html">showDF,SparkDataFrame-method</a></code>;
<code><a href="show.html">show</a></code>, <code><a href="show.html">show</a></code>,
<code><a href="show.html">show,Column-method</a></code>,
<code><a href="show.html">show,GroupedData-method</a></code>,
<code><a href="show.html">show,SparkDataFrame-method</a></code>,
<code><a href="show.html">show,WindowSpec-method</a></code>;
<code><a href="storageLevel.html">storageLevel</a></code>,
<code><a href="storageLevel.html">storageLevel,SparkDataFrame-method</a></code>;
<code><a href="str.html">str</a></code>,
<code><a href="str.html">str,SparkDataFrame-method</a></code>;
<code><a href="take.html">take</a></code>, <code><a href="take.html">take</a></code>,
<code><a href="take.html">take,SparkDataFrame,numeric-method</a></code>;
<code><a href="union.html">union</a></code>, <code><a href="union.html">union</a></code>,
<code><a href="union.html">union,SparkDataFrame,SparkDataFrame-method</a></code>,
<code><a href="union.html">unionAll</a></code>, <code><a href="union.html">unionAll</a></code>,
<code><a href="union.html">unionAll,SparkDataFrame,SparkDataFrame-method</a></code>;
<code><a href="unpersist.html">unpersist</a></code>, <code><a href="unpersist.html">unpersist</a></code>,
<code><a href="unpersist.html">unpersist,SparkDataFrame-method</a></code>;
<code><a href="withColumn.html">withColumn</a></code>, <code><a href="withColumn.html">withColumn</a></code>,
<code><a href="withColumn.html">withColumn,SparkDataFrame,character-method</a></code>;
<code><a href="with.html">with</a></code>,
<code><a href="with.html">with,SparkDataFrame-method</a></code>;
<code><a href="write.jdbc.html">write.jdbc</a></code>, <code><a href="write.jdbc.html">write.jdbc</a></code>,
<code><a href="write.jdbc.html">write.jdbc,SparkDataFrame,character,character-method</a></code>;
<code><a href="write.json.html">write.json</a></code>, <code><a href="write.json.html">write.json</a></code>,
<code><a href="write.json.html">write.json,SparkDataFrame,character-method</a></code>;
<code><a href="write.orc.html">write.orc</a></code>, <code><a href="write.orc.html">write.orc</a></code>,
<code><a href="write.orc.html">write.orc,SparkDataFrame,character-method</a></code>;
<code><a href="write.text.html">write.text</a></code>, <code><a href="write.text.html">write.text</a></code>,
<code><a href="write.text.html">write.text,SparkDataFrame,character-method</a></code>
</p>
<p>Other subsetting functions: <code><a href="select.html">$</a></code>,
<code><a href="select.html">$,SparkDataFrame-method</a></code>, <code><a href="select.html">$&lt;-</a></code>,
<code><a href="select.html">$&lt;-,SparkDataFrame-method</a></code>,
<code><a href="select.html">select</a></code>, <code><a href="select.html">select</a></code>,
<code><a href="select.html">select,SparkDataFrame,Column-method</a></code>,
<code><a href="select.html">select,SparkDataFrame,character-method</a></code>,
<code><a href="select.html">select,SparkDataFrame,list-method</a></code>;
<code><a href="filter.html">filter</a></code>, <code><a href="filter.html">filter</a></code>,
<code><a href="filter.html">filter,SparkDataFrame,characterOrColumn-method</a></code>,
<code><a href="filter.html">where</a></code>, <code><a href="filter.html">where</a></code>,
<code><a href="filter.html">where,SparkDataFrame,characterOrColumn-method</a></code>
</p>
<h3>Examples</h3>
<pre><code class="r">## Not run:
##D # Columns can be selected using [[ and [
##D df[[2]] == df[[&quot;age&quot;]]
##D df[,2] == df[,&quot;age&quot;]
##D df[,c(&quot;name&quot;, &quot;age&quot;)]
##D # Or to filter rows
##D df[df$age &gt; 20,]
##D # SparkDataFrame can be subset on both rows and Columns
##D df[df$name == &quot;Smith&quot;, c(1,2)]
##D df[df$age %in% c(19, 30), 1:2]
##D subset(df, df$age %in% c(19, 30), 1:2)
##D subset(df, df$age %in% c(19), select = c(1,2))
##D subset(df, select = c(1,2))
##D # Columns can be selected and set
##D df[[&quot;age&quot;]] &lt;- 23
##D df[[1]] &lt;- df$age
##D df[[2]] &lt;- NULL # drop column
## End(Not run)
</code></pre>
<hr><div align="center">[Package <em>SparkR</em> version 2.1.1 <a href="00Index.html">Index</a>]</div>
</body></html>