blob: 6b9eba5a869d612d87716c02dc78b0d712d797da [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<title>PLC4X &#x2013; </title>
<script src="../../js/jquery.slim.min.js" type="text/javascript"></script>
<!--script src="../../js/popper.min.js" type="javascript"></script-->
<script src="../../js/bootstrap.bundle.min.js" type="text/javascript"></script>
<!-- The tooling for adding images and links to Apache events -->
<script src="https://www.apachecon.com/event-images/snippet.js" type="text/javascript"></script>
<!-- FontAwesome -->
<link rel="stylesheet" href="../../css/all.min.css" type="text/css"/>
<!-- Bootstrap -->
<link rel="stylesheet" href="../../css/bootstrap.min.css" type="text/css"/>
<!-- Some Maven Site defaults -->
<link rel="stylesheet" href="../../css/maven-base.css" type="text/css"/>
<link rel="stylesheet" href="../../css/maven-theme.css" type="text/css"/>
<!-- The PLC4X version of a bootstrap theme -->
<link rel="stylesheet" href="../../css/themes/plc4x.css" type="text/css" id="pagestyle"/>
<!-- A custom style for printing content -->
<link rel="stylesheet" href="../../css/print.css" type="text/css" media="print"/>
<meta http-equiv="Content-Language" content="en"/>
</head>
<body class="composite">
<nav class="navbar navbar-light navbar-expand-md bg-faded justify-content-center border-bottom">
<!--a href="/" class="navbar-brand d-flex w-50 mr-auto">Navbar 3</a-->
<a href="https://plc4x.apache.org/" id="bannerLeft"><img src="../../images/apache_plc4x_logo_small.png" alt="Apache PLC4X"/></a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#collapsingNavbar3">
<span class="navbar-toggler-icon"></span>
</button>
<div class="navbar-collapse collapse w-100" id="collapsingNavbar3">
<ul class="navbar-nav w-100 justify-content-center">
<li class="nav-item">
<a class="nav-link" href="../../index.html">Home</a>
</li>
<li class="nav-item active">
<a class="nav-link" href="../../users/index.html">Users</a>
</li>
<li class="nav-item">
<a class="nav-link" href="../../developers/index.html">Developers</a>
</li>
<li class="nav-item">
<a class="nav-link" href="../../apache/index.html">Apache</a>
</li>
</ul>
<ul class="nav navbar-nav ml-auto justify-content-end">
<li class="nav-item row valign-middle">
<a class="acevent" data-format="wide" data-mode="light" data-event="random" style="width:240px;height:60px;"></a>
</li>
</ul>
</div>
</nav>
<div class="container-fluid">
<div class="row h-100">
<nav class="col-sm-push col-md-2 pt-3 sidebar">
<div class="sidebar-sticky">
<ul class="nav flex-column">
<li class="nav-item">
<a href="../../users/index.html" class="nav-link">Section Home</a>
</li>
<li class="nav-item">
<a href="../../users/download.html" class="nav-link">Download</a>
</li>
<li class="nav-item">
<a href="../../users/adopters.html" class="nav-link">Adopters</a>
</li>
<li class="nav-item">
<a href="../../users/commercial-support.html" class="nav-link">Commercial support</a>
</li>
<li class="nav-item">
<a href="../../users/gettingstarted.html" class="nav-link">Getting Started</a>
<ul class="flex-column pl-4 nav">
<li class="nav-item">
<a href="../../users/getting-started/plc4go.html" class="nav-link">Go</a>
</li>
<li class="nav-item">
<a href="../../users/getting-started/plc4j.html" class="nav-link">Java</a>
</li>
<li class="nav-item">
<a href="../../users/getting-started/using-snapshots.html" class="nav-link">Using SNAPSHOTS</a>
</li>
<li class="nav-item">
<a href="../../users/getting-started/general-concepts.html" class="nav-link">General Concepts</a>
</li>
<li class="nav-item">
<a href="../../users/getting-started/virtual-modbus.html" class="nav-link">Virtual Modbus</a>
</li>
</ul>
</li>
<li class="nav-item">
<a href="../../users/blogs-videos-and-slides.html" class="nav-link">Blogs, Videos and Slides</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/index.html" class="nav-link">Protocols</a>
<ul class="flex-column pl-4 nav">
<li class="nav-item">
<a href="../../users/protocols/ab-eth.html" class="nav-link">AB-ETH</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/ads.html" class="nav-link">ADS/AMS</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/bacnetip.html" class="nav-link">BACnet/IP</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/canopen.html" class="nav-link">CANopen</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/deltav.html" class="nav-link">DeltaV</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/df1.html" class="nav-link">DF1</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/ethernet-ip.html" class="nav-link">EtherNet/IP</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/firmata.html" class="nav-link">Firmata</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/knxnetip.html" class="nav-link">KNXnet/IP</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/modbus.html" class="nav-link">Modbus</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/opc-ua.html" class="nav-link">OPC UA</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/s7.html" class="nav-link">S7 (Step7)</a>
</li>
<li class="nav-item">
<a href="../../users/protocols/simulated.html" class="nav-link">Simulated</a>
</li>
</ul>
</li>
<li class="nav-item">
<a href="../../users/transports/index.html" class="nav-link">Transports</a>
<ul class="flex-column pl-4 nav">
<li class="nav-item">
<a href="../../users/transports/tcp.html" class="nav-link">TCP</a>
</li>
<li class="nav-item">
<a href="../../users/transports/udp.html" class="nav-link">UDP</a>
</li>
<li class="nav-item">
<a href="../../users/transports/serial.html" class="nav-link">Serial</a>
</li>
<li class="nav-item">
<a href="../../users/transports/socketcan.html" class="nav-link">SocketCAN</a>
</li>
<li class="nav-item">
<a href="../../users/transports/raw-socket.html" class="nav-link">Raw Socket</a>
</li>
<li class="nav-item">
<a href="../../users/transports/pcap-replay.html" class="nav-link">PCAP Replay</a>
</li>
</ul>
</li>
<li class="nav-item">
<a href="../../users/integrations/index.html" class="nav-link">Integrations</a>
<ul class="flex-column pl-4 nav">
<li class="nav-item">
<a href="../../users/integrations/apache-calcite.html" class="nav-link">Apache Calcite</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-camel.html" class="nav-link">Apache Camel</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-edgent.html" class="nav-link">Apache Edgent</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-iotdb.html" class="nav-link">Apache IoTDB</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-kafka.html" class="nav-link">Apache Kafka</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-nifi.html" class="nav-link">Apache NiFi</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/apache-streampipes.html" class="nav-link">Apache StreamPipes</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/eclipse-ditto.html" class="nav-link">Eclipse Ditto</a>
</li>
<li class="nav-item">
<a href="../../users/integrations/eclipse-milo.html" class="nav-link">Eclipse Milo OPC UA Server</a>
</li>
</ul>
</li>
<li class="nav-item">
<a href="../../users/tools/index.html" class="nav-link">Tools</a>
<ul class="flex-column pl-4 nav">
<li class="nav-item">
<a href="../../users/tools/capture-replay.html" class="nav-link">Capture Replay</a>
</li>
<li class="nav-item">
<a href="../../users/tools/connection-pool.html" class="nav-link">Connection Pool</a>
</li>
<li class="nav-item">
<a href="../../users/tools/connection-cache.html" class="nav-link">Connection Cache</a>
</li>
<li class="nav-item">
<a href="../../users/tools/opm.html" class="nav-link">Object PLC Mapping (OPM)</a>
</li>
<li class="nav-item">
<strong class="nav-link">Scraper</strong>
</li>
<li class="nav-item">
<a href="../../users/tools/testing.html" class="nav-link">PLC4X without a PLC and Unit Testing</a>
</li>
</ul>
</li>
<li class="nav-item">
<a href="../../users/industry40.html" class="nav-link">Industry 4.0 with Apache</a>
</li>
<li class="nav-item">
<a href="../../users/security.html" class="nav-link">Security</a>
</li>
</ul>
</div>
</nav>
<main role="main" class="ml-sm-auto px-4 col-sm-pull col-md-9 col-lg-10 h-100">
<div class="sect1">
<h2 id="scraper">Scraper</h2>
<div class="sectionbody">
<div class="paragraph">
<p>While the Apache PLC4X API allows simple access to PLC resources, if you want to continuously monitor some values and have them retrieved in a pre-defined interval, the core PLC4X API method is a little bit uncomfortable.</p>
</div>
<div class="paragraph">
<p>Especially when you have multiple batches of data you want to have refreshed in different intervals.</p>
</div>
<div class="paragraph">
<p>In this case you need to take care of the scheduling of queries, need to manage the connection state (Check if the connection is still available and to apply countermeasures, if there are problems)</p>
</div>
<div class="paragraph">
<p>As we have encountered exactly the same problem for about every integration module we created, the Apache PLC4X team has created a tool called the <code>Scraper</code>.</p>
</div>
<div class="paragraph">
<p>This tool automatically handles all of the tasks mentioned above.</p>
</div>
<div class="sect2">
<h3 id="getting_started_with_the_scraper">Getting started with the <code>Scraper</code></h3>
<div class="paragraph">
<p>In general, you need 3 parts to work with the <code>Scraper</code>:</p>
</div>
<div class="paragraph">
<p>1) A <code>Scraper</code> Configuration
2) A <code>Scraper</code> Implementation
3) A Handler to handle the results of <code>Scraper</code> jobs</p>
</div>
<div class="paragraph">
<p>In the <code>Scraper</code> Configuration you define the so-called <code>jobs</code>.</p>
</div>
<div class="sect3">
<h4 id="sources">Sources</h4>
<div class="paragraph">
<p>Sources define connections to PLCs using PLC4X drivers.</p>
</div>
<div class="paragraph">
<p>Generally you can think of a <code>Source</code> as a PLC4X connection string, given an alias name.</p>
</div>
</div>
<div class="sect3">
<h4 id="jobs">Jobs</h4>
<div class="paragraph">
<p>A <code>Job</code> defines which resources (PLC Addresses) should be collected from which <code>Sources</code> with a given <code>Trigger</code>.</p>
</div>
<div class="paragraph">
<p>All resources in a job will be collected as a batch.</p>
</div>
<div class="paragraph">
<p>Generally multiple types of triggers could theoretically be supported, but for now only a time triggered job (Aka <code>SCHEDULED</code>) is actually supported.</p>
</div>
<div class="paragraph">
<p>In the near future we&#8217;re hoping that we will be able to support:
- External triggers
- Triggering collection based upon PLC-values</p>
</div>
<div class="paragraph">
<p>But, as to now, this has not been implemented yet.</p>
</div>
</div>
</div>
<div class="sect2">
<h3 id="configuration_using_the_java_api">Configuration using the Java API</h3>
<div class="paragraph">
<p>The core of the Scraper configuration is the <code>ScraperConfigurationTriggeredImplBuilder</code> class.
Use this to build the configuration objects used to bootstrap the Scraper.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>ScraperConfigurationTriggeredImplBuilder builder = new ScraperConfigurationTriggeredImplBuilder();</pre>
</div>
</div>
<div class="paragraph">
<p>As soon as you have your <code>builder</code> instance, you should add at least one <code>source</code> to it.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>builder.addSource({connectionName}, {plc4xConnectionString});</pre>
</div>
</div>
<div class="paragraph">
<p>The <code>connectionName</code> will be what we use when configuring the job to reference which source it should use to collect.</p>
</div>
<div class="paragraph">
<p>In order to configure a <code>job</code> we have to get an instance of a <code>JobConfigurationTriggeredImplBuilder</code>.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>JobConfigurationTriggeredImplBuilder jobBuilder = builder.job({jobName}, {triggerCommand});</pre>
</div>
</div>
<div class="paragraph">
<p>This creates a new <code>job</code> with a given name which is executed based on the information in the <code>triggerCommand</code>.</p>
</div>
<div class="paragraph">
<p>As mentioned above, we currently only support a time-scheduled collection.</p>
</div>
<div class="paragraph">
<p>This genrally just requires one parameter: The number of <code>milliseconds</code> between each collection.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>SCHEDULED:1000</pre>
</div>
</div>
<div class="paragraph">
<p>Above would schedule a collection every 1000ms - so once every second.</p>
</div>
<div class="paragraph">
<p>Up to now this job would not be run anywhere, and it would also not collect anything.
So in order to have the job actually do something, we should assign it a <code>source</code> to collect from.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>jobBuilder.source({connectionName});</pre>
</div>
</div>
<div class="paragraph">
<p>Here we could theoretically collect on multiple sources, by simply calling the <code>source()</code> method multiple times.</p>
</div>
<div class="paragraph">
<p>All sources would be collected at the same time, whenever the trigger tells it to.</p>
</div>
<div class="paragraph">
<p>So the last thing we need to configure our first <code>Scraper</code> job, is to add a few fields for it to collect.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>jobBuilder.field({fieldName}, {fieldAddress});</pre>
</div>
</div>
<div class="paragraph">
<p>The <code>field</code> method has to be called for every field we want to add to the current job configuration.
It gives a PLC4X address string an easy to understand string name, just like when using the core PLC4X API.</p>
</div>
<div class="paragraph">
<p>As soon as we&#8217;re done adding fields, we configure the job by calling the <code>build</code> method.</p>
</div>
<div class="listingblock">
<div class="content">
<pre>jobBuilder.build();</pre>
</div>
</div>
<div class="paragraph">
<p>This configures the finished job and attaches that to the overall <code>Scraper</code> configuration of the scraper configuration.</p>
</div>
<div class="paragraph">
<p>As soon as we&#8217;re done configuring jobs, we need to create the <code>Scraper</code> configuration by calling the <code>buiold</code> method on the <code>builder</code>:</p>
</div>
<div class="listingblock">
<div class="content">
<pre>ScraperConfigurationTriggeredImpl scraperConfig = builder.build();</pre>
</div>
</div>
</div>
<div class="sect2">
<h3 id="running_the_scraper">Running the <code>Scraper</code></h3>
<div class="paragraph">
<p>In order to run the <code>Scraper</code>, the following boilerplate code is needed.</p>
</div>
<div class="listingblock">
<div class="content">
<pre> try {
PlcDriverManager plcDriverManager = new PooledPlcDriverManager();
TriggerCollector triggerCollector = new TriggerCollectorImpl(plcDriverManager);
TriggeredScraperImpl scraper = new TriggeredScraperImpl(scraperConfig, (jobName, sourceName, results) -&gt; {
...
}, triggerCollector);
scraper.start();
triggerCollector.start();
} catch (ScraperException e) {
log.error("Error starting the scraper", e);
}</pre>
</div>
</div>
<div class="paragraph">
<p>At first a new <code>PooledPlcDriverManager</code> is created (It actually doesn&#8217;t have to be the pooled version, but we strongly suggest you use it as for some protocols the connection process is stressfull for the connected PLC).</p>
</div>
<div class="paragraph">
<p>With this <code>plcDriverManager</code> we can then create a so-called <code>TriggerCollector</code>, which we pass in the driver manager as argument.</p>
</div>
<div class="paragraph">
<p>Next comes the probably most important part: We configure the scraper, by binding a <code>Scraper Configuration</code>, a <code>ResultHandler</code> and a <code>TriggerCollector</code> together.</p>
</div>
<div class="paragraph">
<p>After this, the scraper is ready to start, which is then done by calling <code>start</code> on the <code>scraper</code> as well as the <code>triggerCollector</code>.</p>
</div>
<div class="paragraph">
<p>For the sake of clarity, here comes the definition of the <code>ResultHandler</code> interface:</p>
</div>
<div class="listingblock">
<div class="content">
<pre>@FunctionalInterface
public interface ResultHandler {
/**
* Callback handler.
* @param jobName name of the job (from config)
* @param connectionName alias of the connection (&lt;b&gt;not&lt;/b&gt; connection String)
* @param results Results in the form alias to result value
*/
void handle(String jobName, String connectionName, Map&lt;String, Object&gt; results);
}</pre>
</div>
</div>
</div>
<div class="sect2">
<h3 id="configuration_using_a_json_or_yaml_file">Configuration using a <code>JSON</code> or <code>YAML</code> file</h3>
<div class="paragraph">
<p>As an alternative to using the Java API, the Scraper Configuration can also be read from a <code>JSON</code> or <code>YAML</code> document.</p>
</div>
<div class="paragraph">
<p>Here come some examples:</p>
</div>
<div class="paragraph">
<p>JSON:</p>
</div>
<div class="listingblock">
<div class="content">
<pre>{
"sources": {
"connectionName": "connectionString"
},
"jobs": [
{
"name": "jobName",
"triggerConfig": (SCHEDULED,10000)
"sources": [
"connectionName"
],
"fields": {
"a": "{address-a}",
"b": "{address-b}"
}
}
]
}</pre>
</div>
</div>
<div class="paragraph">
<p>YAML:</p>
</div>
<div class="listingblock">
<div class="content">
<pre>---
sources:
connectionName: connectionString
jobs:
- name: jobName
triggerConfig: (SCHEDULED,10000)
sources:
- connectionName
fields:
a: {address-a}
b: {address-b}</pre>
</div>
</div>
<div class="paragraph">
<p>In both cases, you can create the <code>ScraperConfiguration</code> with the following code:</p>
</div>
<div class="listingblock">
<div class="content">
<pre>ScraperConfiguration conf = ScraperConfiguration.fromFile("{path to the JSON or YAML file}", ScraperConfigurationTriggeredImpl.class);</pre>
</div>
</div>
</div>
</div>
</div>
</main>
<footer class="pt-4 my-md-5 pt-md-5 w-100 border-top">
<div class="row justify-content-md-center" style="font-size: 13px">
<div class="col col-6 text-center">
Copyright &#169; 2017&#x2013;2022 <a href="https://www.apache.org/">The Apache Software Foundation</a>.
All rights reserved.<br/>
Apache PLC4X, PLC4X, Apache, the Apache feather logo, and the Apache PLC4X project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries. All other marks mentioned may be trademarks or registered trademarks of their respective owners.
<br/><div style="text-align:center;">Home screen image taken from <a
href="https://flic.kr/p/chEftd">Flickr</a>, "Tesla Robot Dance" by Steve Jurvetson, licensed
under <a href="https://creativecommons.org/licenses/by/2.0/">CC BY 2.0 Generic</a>, image cropped
and blur effect added.</div>
</div>
</div>
</footer>
</div>
</div>
<!-- Bootstrap core JavaScript
================================================== -->
<!-- Placed at the end of the document so the pages load faster -->
<script src="../../js/jquery.slim.min.js"></script>
<script src="../../js/popper.min.js"></script>
<script src="../../js/bootstrap.min.js"></script>
<script type="text/javascript">
$('.carousel .carousel-item').each(function(){
var next = $(this).next();
if (!next.length) {
next = $(this).siblings(':first');
}
next.children(':first-child').clone().appendTo($(this));
for (let i = 0; i < 3; i++) {
next=next.next();
if (!next.length) {
next = $(this).siblings(':first');
}
next.children(':first-child').clone().appendTo($(this));
}
});
</script>
</body>
</html>