blob: e0a7b96695c7f2febfccab38be439f1829fff24c [file] [log] [blame]
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>pyspark.streaming.kinesis.KinesisUtils.createStream &#8212; PySpark 3.3.4 documentation</title>
<link rel="stylesheet" href="../../_static/css/index.73d71520a4ca3b99cfee5594769eaaae.css">
<link rel="stylesheet"
href="../../_static/vendor/fontawesome/5.13.0/css/all.min.css">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-solid-900.woff2">
<link rel="preload" as="font" type="font/woff2" crossorigin
href="../../_static/vendor/fontawesome/5.13.0/webfonts/fa-brands-400.woff2">
<link rel="stylesheet"
href="../../_static/vendor/open-sans_all/1.44.1/index.css">
<link rel="stylesheet"
href="../../_static/vendor/lato_latin-ext/1.44.1/index.css">
<link rel="stylesheet" href="../../_static/basic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css" />
<link rel="stylesheet" type="text/css" href="../../_static/css/pyspark.css" />
<link rel="preload" as="script" href="../../_static/js/index.3da636dd464baa7582d2.js">
<script id="documentation_options" data-url_root="../../" src="../../_static/documentation_options.js"></script>
<script src="../../_static/jquery.js"></script>
<script src="../../_static/underscore.js"></script>
<script src="../../_static/doctools.js"></script>
<script src="../../_static/language_data.js"></script>
<script src="../../_static/clipboard.min.js"></script>
<script src="../../_static/copybutton.js"></script>
<script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
<script async="async" src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
<script type="text/x-mathjax-config">MathJax.Hub.Config({"tex2jax": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true, "ignoreClass": "document", "processClass": "math|output_area"}})</script>
<link rel="canonical" href="https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.streaming.kinesis.KinesisUtils.createStream.html" />
<link rel="search" title="Search" href="../../search.html" />
<link rel="next" title="pyspark.streaming.kinesis.InitialPositionInStream.LATEST" href="pyspark.streaming.kinesis.InitialPositionInStream.LATEST.html" />
<link rel="prev" title="pyspark.streaming.DStream.window" href="pyspark.streaming.DStream.window.html" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="docsearch:language" content="en" />
</head>
<body data-spy="scroll" data-target="#bd-toc-nav" data-offset="80">
<nav class="navbar navbar-light navbar-expand-lg bg-light fixed-top bd-navbar" id="navbar-main">
<div class="container-xl">
<a class="navbar-brand" href="../../index.html">
<img src="../../_static/spark-logo-reverse.png" class="logo" alt="logo" />
</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbar-menu" aria-controls="navbar-menu" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar-menu" class="col-lg-9 collapse navbar-collapse">
<ul id="navbar-main-elements" class="navbar-nav mr-auto">
<li class="nav-item ">
<a class="nav-link" href="../../getting_started/index.html">Getting Started</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../user_guide/index.html">User Guide</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../index.html">API Reference</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../development/index.html">Development</a>
</li>
<li class="nav-item ">
<a class="nav-link" href="../../migration_guide/index.html">Migration Guide</a>
</li>
</ul>
<ul class="navbar-nav">
</ul>
</div>
</div>
</nav>
<div class="container-xl">
<div class="row">
<div class="col-12 col-md-3 bd-sidebar"><form class="bd-search d-flex align-items-center" action="../../search.html" method="get">
<i class="icon fas fa-search"></i>
<input type="search" class="form-control" name="q" id="search-input" placeholder="Search the docs ..." aria-label="Search the docs ..." autocomplete="off" >
</form>
<nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
<div class="bd-toc-item active">
<ul class="nav bd-sidenav">
<li class="">
<a href="../pyspark.sql/index.html">Spark SQL</a>
</li>
<li class="">
<a href="../pyspark.pandas/index.html">Pandas API on Spark</a>
</li>
<li class="">
<a href="../pyspark.ss/index.html">Structured Streaming</a>
</li>
<li class="">
<a href="../pyspark.ml.html">MLlib (DataFrame-based)</a>
</li>
<li class="active">
<a href="../pyspark.streaming.html">Spark Streaming</a>
</li>
<li class="">
<a href="../pyspark.mllib.html">MLlib (RDD-based)</a>
</li>
<li class="">
<a href="../pyspark.html">Spark Core</a>
</li>
<li class="">
<a href="../pyspark.resource.html">Resource Management</a>
</li>
</ul>
</nav>
</div>
<div class="d-none d-xl-block col-xl-2 bd-toc">
<nav id="bd-toc-nav">
<ul class="nav section-nav flex-column">
</ul>
</nav>
</div>
<main class="col-12 col-md-9 col-xl-7 py-md-5 pl-md-5 pr-md-4 bd-content" role="main">
<div>
<div class="section" id="pyspark-streaming-kinesis-kinesisutils-createstream">
<h1>pyspark.streaming.kinesis.KinesisUtils.createStream<a class="headerlink" href="#pyspark-streaming-kinesis-kinesisutils-createstream" title="Permalink to this headline"></a></h1>
<dl class="py method">
<dt id="pyspark.streaming.kinesis.KinesisUtils.createStream">
<em class="property">static </em><code class="sig-prename descclassname">KinesisUtils.</code><code class="sig-name descname">createStream</code><span class="sig-paren">(</span><em class="sig-param">ssc: pyspark.streaming.context.StreamingContext, kinesisAppName: str, streamName: str, endpointUrl: str, regionName: str, initialPositionInStream: str, checkpointInterval: int, storageLevel: pyspark.storagelevel.StorageLevel = StorageLevel(True, True, False, False, 2), awsAccessKeyId: Optional[str] = None, awsSecretKey: Optional[str] = None, decoder: Union[Callable[[Optional[bytes]], T], Callable[[Optional[bytes]], Optional[str]]] = &lt;function utf8_decoder&gt;, stsAssumeRoleArn: Optional[str] = None, stsSessionName: Optional[str] = None, stsExternalId: Optional[str] = None</em><span class="sig-paren">)</span> &#x2192; Union[pyspark.streaming.dstream.DStream[Union[T, str, None]], pyspark.streaming.dstream.DStream[T]]<a class="reference internal" href="../../_modules/pyspark/streaming/kinesis.html#KinesisUtils.createStream"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#pyspark.streaming.kinesis.KinesisUtils.createStream" title="Permalink to this definition"></a></dt>
<dd><p>Create an input stream that pulls messages from a Kinesis stream. This uses the
Kinesis Client Library (KCL) to pull messages from Kinesis.</p>
<dl class="field-list">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><dl>
<dt><strong>ssc</strong><span class="classifier"><code class="xref py py-class docutils literal notranslate"><span class="pre">StreamingContext</span></code></span></dt><dd><p>StreamingContext object</p>
</dd>
<dt><strong>kinesisAppName</strong><span class="classifier">str</span></dt><dd><p>Kinesis application name used by the Kinesis Client Library (KCL) to
update DynamoDB</p>
</dd>
<dt><strong>streamName</strong><span class="classifier">str</span></dt><dd><p>Kinesis stream name</p>
</dd>
<dt><strong>endpointUrl</strong><span class="classifier">str</span></dt><dd><p>Url of Kinesis service (e.g., <a class="reference external" href="https://kinesis.us-east-1.amazonaws.com">https://kinesis.us-east-1.amazonaws.com</a>)</p>
</dd>
<dt><strong>regionName</strong><span class="classifier">str</span></dt><dd><p>Name of region used by the Kinesis Client Library (KCL) to update
DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)</p>
</dd>
<dt><strong>initialPositionInStream</strong><span class="classifier">int</span></dt><dd><p>In the absence of Kinesis checkpoint info, this is the
worker’s initial starting position in the stream. The
values are either the beginning of the stream per Kinesis’
limit of 24 hours (InitialPositionInStream.TRIM_HORIZON) or
the tip of the stream (InitialPositionInStream.LATEST).</p>
</dd>
<dt><strong>checkpointInterval</strong><span class="classifier">int</span></dt><dd><p>Checkpoint interval(in seconds) for Kinesis checkpointing. See the Kinesis
Spark Streaming documentation for more details on the different
types of checkpoints.</p>
</dd>
<dt><strong>storageLevel</strong><span class="classifier"><a class="reference internal" href="pyspark.StorageLevel.html#pyspark.StorageLevel" title="pyspark.StorageLevel"><code class="xref py py-class docutils literal notranslate"><span class="pre">pyspark.StorageLevel</span></code></a>, optional</span></dt><dd><p>Storage level to use for storing the received objects (default is
StorageLevel.MEMORY_AND_DISK_2)</p>
</dd>
<dt><strong>awsAccessKeyId</strong><span class="classifier">str, optional</span></dt><dd><p>AWS AccessKeyId (default is None. If None, will use
DefaultAWSCredentialsProviderChain)</p>
</dd>
<dt><strong>awsSecretKey</strong><span class="classifier">str, optional</span></dt><dd><p>AWS SecretKey (default is None. If None, will use
DefaultAWSCredentialsProviderChain)</p>
</dd>
<dt><strong>decoder</strong><span class="classifier">function, optional</span></dt><dd><p>A function used to decode value (default is utf8_decoder)</p>
</dd>
<dt><strong>stsAssumeRoleArn</strong><span class="classifier">str, optional</span></dt><dd><p>ARN of IAM role to assume when using STS sessions to read from
the Kinesis stream (default is None).</p>
</dd>
<dt><strong>stsSessionName</strong><span class="classifier">str, optional</span></dt><dd><p>Name to uniquely identify STS sessions used to read from Kinesis
stream, if STS is being used (default is None).</p>
</dd>
<dt><strong>stsExternalId</strong><span class="classifier">str, optional</span></dt><dd><p>External ID that can be used to validate against the assumed IAM
role’s trust policy, if STS is being used (default is None).</p>
</dd>
</dl>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><dl class="simple">
<dt>A DStream object</dt><dd></dd>
</dl>
</dd>
</dl>
<p class="rubric">Notes</p>
<p>The given AWS credentials will get saved in DStream checkpoints if checkpointing
is enabled. Make sure that your checkpoint directory is secure.</p>
</dd></dl>
</div>
</div>
<div class='prev-next-bottom'>
<a class='left-prev' id="prev-link" href="pyspark.streaming.DStream.window.html" title="previous page">pyspark.streaming.DStream.window</a>
<a class='right-next' id="next-link" href="pyspark.streaming.kinesis.InitialPositionInStream.LATEST.html" title="next page">pyspark.streaming.kinesis.InitialPositionInStream.LATEST</a>
</div>
</main>
</div>
</div>
<script src="../../_static/js/index.3da636dd464baa7582d2.js"></script>
<footer class="footer mt-5 mt-md-0">
<div class="container">
<p>
&copy; Copyright .<br/>
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 3.0.4.<br/>
</p>
</div>
</footer>
</body>
</html>