blob: f1070cdf72ff7980da08bec79d360fb0f5123d7d [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="spark.kstest Conduct the two-sided Kolmogorov-Smirnov (KS) test for data sampled from a
continuous distribution.
By comparing the largest difference between the empirical cumulative
distribution of the sample data and the theoretical distribution we can provide a test for the
the null hypothesis that the sample data comes from that theoretical distribution.
Users can call summary to obtain a summary of the test, and print.summary.KSTest
to print out a summary result."><!-- Inform modern browsers that this page supports both dark and light color schemes,
and the page author prefers light. --><meta name="color-scheme" content="dark light"><script>
// If `prefers-color-scheme` is not supported, fall back to light mode.
// i.e. In this case, inject the `light` CSS before the others, with
// no media filter so that it will be downloaded with highest priority.
if (window.matchMedia("(prefers-color-scheme: dark)").media === "not all") {
document.documentElement.style.display = "none";
document.head.insertAdjacentHTML(
"beforeend",
"<link id=\"css\" rel=\"stylesheet\" href=\"https://bootswatch.com/5/flatly/bootstrap.css\" onload=\"document.documentElement.style.display = ''\">"
);
}
</script><title>(One-Sample) Kolmogorov-Smirnov Test — spark.kstest • SparkR</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.3.1/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.3.1/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><link href="../extra.css" rel="stylesheet"><meta property="og:title" content="(One-Sample) Kolmogorov-Smirnov Test — spark.kstest"><meta property="og:description" content="spark.kstest Conduct the two-sided Kolmogorov-Smirnov (KS) test for data sampled from a
continuous distribution.
By comparing the largest difference between the empirical cumulative
distribution of the sample data and the theoretical distribution we can provide a test for the
the null hypothesis that the sample data comes from that theoretical distribution.
Users can call summary to obtain a summary of the test, and print.summary.KSTest
to print out a summary result."><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Flatly Theme - Light --><link id="css-light" rel="stylesheet" href="https://bootswatch.com/5/flatly/bootstrap.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><!-- Darkly Theme - Dark --><link id="css-dark" rel="stylesheet" href="https://bootswatch.com/5/darkly/bootstrap.css" media="(prefers-color-scheme: dark)"><!-- preferably CSS --><link rel="stylesheet" href="../preferably.css"><link id="css-code-light" rel="stylesheet" href="../code-color-scheme-light.css" media="(prefers-color-scheme: light), (prefers-color-scheme: no-preference)"><link id="css-code-dark" rel="stylesheet" href="../code-color-scheme-dark.css" media="(prefers-color-scheme: dark)"><script src="../darkswitch.js"></script></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-primary"><div class="container">
<a class="external-link navbar-brand" href="https://spark.apache.org/">
<img src="https://spark.apache.org/images/spark-logo-rev.svg" alt="" max-height="100%"></a>
<a class="navbar-brand me-2" href="../index.html">SparkR</a>
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">4.0.0</small>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-2">
<ul class="navbar-nav me-auto"><li class="active nav-item">
<a class="nav-link" href="../reference/index.html">Reference</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<a class="dropdown-item" href="../articles/sparkr-vignettes.html">SparkR - Practical Guide</a>
</div>
</li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
<ul class="navbar-nav"><li>
<a class="external-link nav-link" id="css-toggle-btn" aria-label="github">
<span class="fas fa fas fa-adjust fa-lg"></span>
</a>
</li>
</ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="" class="logo" alt=""><h1>(One-Sample) Kolmogorov-Smirnov Test</h1>
<div class="d-none name"><code>spark.kstest.Rd</code></div>
</div>
<div class="ref-description section level2">
<p><code>spark.kstest</code> Conduct the two-sided Kolmogorov-Smirnov (KS) test for data sampled from a
continuous distribution.</p>
<p>By comparing the largest difference between the empirical cumulative
distribution of the sample data and the theoretical distribution we can provide a test for the
the null hypothesis that the sample data comes from that theoretical distribution.</p>
<p>Users can call <code>summary</code> to obtain a summary of the test, and <code>print.summary.KSTest</code>
to print out a summary result.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">spark.kstest</span><span class="op">(</span><span class="va">data</span>, <span class="va">...</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for SparkDataFrame</span></span>
<span><span class="fu">spark.kstest</span><span class="op">(</span></span>
<span> <span class="va">data</span>,</span>
<span> testCol <span class="op">=</span> <span class="st">"test"</span>,</span>
<span> nullHypothesis <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"norm"</span><span class="op">)</span>,</span>
<span> distParams <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">1</span><span class="op">)</span></span>
<span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S4 method for KSTest</span></span>
<span><span class="fu"><a href="summary.html">summary</a></span><span class="op">(</span><span class="va">object</span><span class="op">)</span></span>
<span></span>
<span><span class="co"># S3 method for summary.KSTest</span></span>
<span><span class="fu"><a href="https://rdrr.io/r/base/print.html" class="external-link">print</a></span><span class="op">(</span><span class="va">x</span>, <span class="va">...</span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt>data</dt>
<dd><p>a SparkDataFrame of user data.</p></dd>
<dt>...</dt>
<dd><p>additional argument(s) passed to the method.</p></dd>
<dt>testCol</dt>
<dd><p>column name where the test data is from. It should be a column of double type.</p></dd>
<dt>nullHypothesis</dt>
<dd><p>name of the theoretical distribution tested against. Currently only
<code>"norm"</code> for normal distribution is supported.</p></dd>
<dt>distParams</dt>
<dd><p>parameters(s) of the distribution. For <code>nullHypothesis = "norm"</code>,
we can provide as a vector the mean and standard deviation of
the distribution. If none is provided, then standard normal will be used.
If only one is provided, then the standard deviation will be set to be one.</p></dd>
<dt>object</dt>
<dd><p>test result object of KSTest by <code>spark.kstest</code>.</p></dd>
<dt>x</dt>
<dd><p>summary object of KSTest returned by <code>summary</code>.</p></dd>
</dl></div>
<div class="section level2">
<h2 id="value">Value<a class="anchor" aria-label="anchor" href="#value"></a></h2>
<p><code>spark.kstest</code> returns a test result object.</p>
<p><code>summary</code> returns summary information of KSTest object, which is a list.
The list includes the <code>p.value</code> (p-value), <code>statistic</code> (test statistic
computed for the test), <code>nullHypothesis</code> (the null hypothesis with its
parameters tested against) and <code>degreesOfFreedom</code> (degrees of freedom of the test).</p>
</div>
<div class="section level2">
<h2 id="note">Note<a class="anchor" aria-label="anchor" href="#note"></a></h2>
<p>spark.kstest since 2.1.0</p>
<p>summary(KSTest) since 2.1.0</p>
<p>print.summary.KSTest since 2.1.0</p>
</div>
<div class="section level2">
<h2 id="see-also">See also<a class="anchor" aria-label="anchor" href="#see-also"></a></h2>
<div class="dont-index"><p><a href="https://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing" class="external-link">
MLlib: Hypothesis Testing</a></p></div>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="kw">if</span> <span class="op">(</span><span class="cn">FALSE</span><span class="op">)</span> <span class="op">{</span></span></span>
<span class="r-in"><span><span class="va">data</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>test <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.1</span>, <span class="fl">0.15</span>, <span class="fl">0.2</span>, <span class="fl">0.3</span>, <span class="fl">0.25</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu"><a href="createDataFrame.html">createDataFrame</a></span><span class="op">(</span><span class="va">data</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">test</span> <span class="op">&lt;-</span> <span class="fu">spark.kstest</span><span class="op">(</span><span class="va">df</span>, <span class="st">"test"</span>, <span class="st">"norm"</span>, <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0</span>, <span class="fl">1</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="co"># get a summary of the test result</span></span></span>
<span class="r-in"><span><span class="va">testSummary</span> <span class="op">&lt;-</span> <span class="fu"><a href="summary.html">summary</a></span><span class="op">(</span><span class="va">test</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">testSummary</span></span></span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="co"># print out the summary in an organized way</span></span></span>
<span class="r-in"><span><span class="fu">print.summary.KSTest</span><span class="op">(</span><span class="va">testSummary</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="op">}</span></span></span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="copyright">
<p></p><p>Developed by <a href="https://www.apache.org/" class="external-link"> The Apache Software Foundation</a>.</p>
</div>
<div class="pkgdown">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.9.</p>
<p class="preferably">Using <a href="https://preferably.amirmasoudabdol.name/?source=footer" class="external-link">preferably</a> template.</p>
</div>
</footer></div>
</body></html>