blob: 5512d5a8c8857fb485596fcba17e061a19c63958 [file] [log] [blame]
<!doctype html><html lang=en class=no-js><head><meta charset=utf-8><meta http-equiv=x-ua-compatible content="IE=edge"><meta name=viewport content="width=device-width,initial-scale=1"><title>Nexmark benchmark suite</title><meta name=description content="Apache Beam is an open source, unified model and set of language-specific SDKs for defining and executing data processing workflows, and also data ingestion and integration flows, supporting Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). Dataflow pipelines simplify the mechanics of large-scale batch and streaming data processing and can run on a number of runtimes like Apache Flink, Apache Spark, and Google Cloud Dataflow (a cloud service). Beam also brings DSL in different languages, allowing users to easily implement their data integration processes."><link href="https://fonts.googleapis.com/css?family=Roboto:100,300,400,500,700" rel=stylesheet><link rel=preload href=/scss/main.min.408fddfe3e8a45f87a5a8c9a839d77db667c1c534e5e5cd0d957ffc3dd6c14cf.css as=style><link href=/scss/main.min.408fddfe3e8a45f87a5a8c9a839d77db667c1c534e5e5cd0d957ffc3dd6c14cf.css rel=stylesheet integrity><script src=https://code.jquery.com/jquery-2.2.4.min.js></script><style>.body__contained img{max-width:100%}</style><script type=text/javascript src=/js/bootstrap.min.2979f9a6e32fc42c3e7406339ee9fe76b31d1b52059776a02b4a7fa6a4fd280a.js defer></script>
<script type=text/javascript src=/js/language-switch-v2.min.121952b7980b920320ab229551857669209945e39b05ba2b433a565385ca44c6.js defer></script>
<script type=text/javascript src=/js/fix-menu.min.039174b67107465f2090a493f91e126f7aa797f29420f9edab8a54d9dd4b3d2d.js defer></script>
<script type=text/javascript src=/js/section-nav.min.1405fd5e70fab5f6c54037c269b1d137487d8f3d1b3009032525f6db3fbce991.js defer></script>
<script type=text/javascript src=/js/page-nav.min.af231204c9c52c5089d53a4c02739eacbb7f939e3be1c6ffcc212e0ac4dbf879.js defer></script>
<script type=text/javascript src=/js/expandable-list.min.75a4526624a3b8898fe7fb9e3428c205b581f8b38c7926922467aef17eac69f2.js defer></script>
<script type=text/javascript src=/js/copy-to-clipboard.min.364c06423d7e8993fc42bb4abc38c03195bc8386db26d18774ce775d08d5b18d.js defer></script>
<script type=text/javascript src=/js/calendar.min.336664054fa0f52b08bbd4e3c59b5cb6d63dcfb2b4d602839746516b0817446b.js defer></script>
<script type=text/javascript src=/js/fix-playground-nested-scroll.min.0283f1037cb1b9d5074c6eaf041292b524a8148a7cdb803d5ccd6d1fc4eb3253.js defer></script>
<script type=text/javascript src=/js/anchor-content-jump-fix.min.22d3240f81632e4c11179b9d2aaf37a40da9414333c43aa97344e8b21a7df0e4.js defer></script>
<link rel=alternate type=application/rss+xml title="Apache Beam" href=/feed.xml><link rel=canonical href=/documentation/sdks/java/testing/nexmark/ data-proofer-ignore><link rel="shortcut icon" type=image/x-icon href=/images/favicon.ico><link rel=stylesheet href=https://use.fontawesome.com/releases/v5.4.1/css/all.css integrity=sha384-5sAR7xN1Nv6T6+dT2mhtzEpVJvfS3NScPQTrOxhwjIuvcA67KV2R5Jz6kr4abQsz crossorigin=anonymous><link rel=stylesheet href=https://unpkg.com/swiper@8/swiper-bundle.min.css><script async src=https://platform.twitter.com/widgets.js></script>
<script>(function(e,t,n,s,o,i,a){e.GoogleAnalyticsObject=o,e[o]=e[o]||function(){(e[o].q=e[o].q||[]).push(arguments)},e[o].l=1*new Date,i=t.createElement(n),a=t.getElementsByTagName(n)[0],i.async=1,i.src=s,a.parentNode.insertBefore(i,a)})(window,document,"script","//www.google-analytics.com/analytics.js","ga"),ga("create","UA-73650088-1","auto"),ga("send","pageview")</script><script>(function(e,t,n,s,o,i){e.hj=e.hj||function(){(e.hj.q=e.hj.q||[]).push(arguments)},e._hjSettings={hjid:2182187,hjsv:6},o=t.getElementsByTagName("head")[0],i=t.createElement("script"),i.async=1,i.src=n+e._hjSettings.hjid+s+e._hjSettings.hjsv,o.appendChild(i)})(window,document,"https://static.hotjar.com/c/hotjar-",".js?sv=")</script></head><body class=body data-spy=scroll data-target=.page-nav data-offset=0><nav class="navigation-bar-mobile header navbar navbar-fixed-top"><div class=navbar-header><a href=/ class=navbar-brand><img alt=Brand style=height:46px;width:43px src=/images/beam_logo_navbar_mobile.png></a>
<a class=navbar-link href=/get-started/>Get Started</a>
<a class=navbar-link href=/documentation/>Documentation</a>
<button type=button class="navbar-toggle menu-open" aria-expanded=false aria-controls=navbar onclick=openMenu()>
<span class=sr-only>Toggle navigation</span>
<span class=icon-bar></span>
<span class=icon-bar></span>
<span class=icon-bar></span></button></div><div class="navbar-mask closed"></div><div id=navbar class="navbar-container closed"><button type=button class=navbar-toggle aria-expanded=false aria-controls=navbar id=closeMenu>
<span class=sr-only>Toggle navigation</span>
<span class=icon-bar></span>
<span class=icon-bar></span>
<span class=icon-bar></span></button><ul class="nav navbar-nav"><li><div class=searchBar-mobile><script>(function(){var t,n="012923275103528129024:4emlchv9wzi",e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="https://cse.google.com/cse.js?cx="+n,t=document.getElementsByTagName("script")[0],t.parentNode.insertBefore(e,t)})()</script><gcse:search></gcse:search></div></li><li><a class=navbar-link href=/about>About</a></li><li><a class=navbar-link href=/get-started/>Get Started</a></li><li><span class=navbar-link>Documentation</span><ul><li><a href=/documentation/>General</a></li><li><a href=/documentation/sdks/java/>Languages</a></li><li><a href=/documentation/runners/capability-matrix/>Runners</a></li><li><a href=/documentation/io/connectors/>I/O Connectors</a></li></ul></li><li><a class=navbar-link href=/roadmap/>Roadmap</a></li><li><a class=navbar-link href=/community/>Community</a></li><li><a class=navbar-link href=/contribute/>Contribute</a></li><li><a class=navbar-link href=/blog/>Blog</a></li><li><a class=navbar-link href=/case-studies/>Case Studies</a></li></ul><ul class="nav navbar-nav navbar-right"><li><a href=https://github.com/apache/beam/edit/master/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md data-proofer-ignore><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M4.543 20h4l10.5-10.5c.53-.53.828-1.25.828-2s-.298-1.47-.828-2-1.25-.828-2-.828-1.47.298-2 .828L4.543 16v4zm9.5-13.5 4 4"/></svg></a></li><li class=dropdown><a href=# class=dropdown-toggle id=apache-dropdown data-toggle=dropdown role=button aria-haspopup=true aria-expanded=false><img src=https://www.apache.org/foundation/press/kit/feather_small.png alt="Apache Logo" style=height:20px>
&nbsp;Apache
<span class=arrow-icon><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="none" viewBox="0 0 20 20"><circle cx="10" cy="10" r="10" fill="#ff6d00"/><path stroke="#fff" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8.535 5.28l4.573 4.818-4.573 4.403"/></svg></span></a><ul class="dropdown-menu dropdown-menu-right"><li><a target=_blank href=https://www.apache.org/>ASF Homepage</a></li><li><a target=_blank href=https://www.apache.org/licenses/>License</a></li><li><a target=_blank href=https://www.apache.org/security/>Security</a></li><li><a target=_blank href=https://www.apache.org/foundation/thanks.html>Thanks</a></li><li><a target=_blank href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a></li><li><a target=_blank href=https://www.apache.org/foundation/policies/conduct>Code of Conduct</a></li></ul></li></ul></div></nav><nav class=navigation-bar-desktop><a href=/ class=navbar-logo><img src=/images/beam_logo_navbar.png alt="Beam Logo"></a><div class=navbar-bar-left><div class=navbar-links><a class=navbar-link href=/about>About</a>
<a class=navbar-link href=/get-started/>Get Started</a><li class="dropdown navbar-dropdown navbar-dropdown-documentation"><a href=# class="dropdown-toggle navbar-link" role=button aria-haspopup=true aria-expanded=false>Documentation
<span><svg xmlns="http://www.w3.org/2000/svg" width="12" height="11" fill="none" viewBox="0 0 12 11"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10.666 4.535 5.847 9.108 1.444 4.535"/></svg></span></a><ul class=dropdown-menu><li><a class=navbar-dropdown-menu-link href=/documentation/>General</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/sdks/java/>Languages</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/runners/capability-matrix/>Runners</a></li><li><a class=navbar-dropdown-menu-link href=/documentation/io/connectors/>I/O Connectors</a></li></ul></li><a class=navbar-link href=/roadmap/>Roadmap</a>
<a class=navbar-link href=/community/>Community</a>
<a class=navbar-link href=/contribute/>Contribute</a>
<a class=navbar-link href=/blog/>Blog</a>
<a class=navbar-link href=/case-studies/>Case Studies</a></div><div id=iconsBar><a type=button onclick=showSearch()><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M10.191 17c3.866.0 7-3.134 7-7s-3.134-7-7-7-7 3.134-7 7 3.134 7 7 7zm11 4-6-6"/></svg></a><a target=_blank href=https://github.com/apache/beam/edit/master/website/www/site/content/en/documentation/sdks/java/testing/nexmark.md data-proofer-ignore><svg xmlns="http://www.w3.org/2000/svg" width="25" height="24" fill="none" viewBox="0 0 25 24"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M4.543 20h4l10.5-10.5c.53-.53.828-1.25.828-2s-.298-1.47-.828-2-1.25-.828-2-.828-1.47.298-2 .828L4.543 16v4zm9.5-13.5 4 4"/></svg></a><li class="dropdown navbar-dropdown navbar-dropdown-apache"><a href=# class=dropdown-toggle role=button aria-haspopup=true aria-expanded=false><img src=https://www.apache.org/foundation/press/kit/feather_small.png alt="Apache Logo" style=height:20px>
&nbsp;Apache
<span class=arrow-icon><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" fill="none" viewBox="0 0 20 20"><circle cx="10" cy="10" r="10" fill="#ff6d00"/><path stroke="#fff" stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M8.535 5.28l4.573 4.818-4.573 4.403"/></svg></span></a><ul class=dropdown-menu><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/>ASF Homepage</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/licenses/>License</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/security/>Security</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/thanks.html>Thanks</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/sponsorship.html>Sponsorship</a></li><li><a class=navbar-dropdown-menu-link target=_blank href=https://www.apache.org/foundation/policies/conduct>Code of Conduct</a></li></ul></li></div><div class="searchBar disappear"><script>(function(){var t,n="012923275103528129024:4emlchv9wzi",e=document.createElement("script");e.type="text/javascript",e.async=!0,e.src="https://cse.google.com/cse.js?cx="+n,t=document.getElementsByTagName("script")[0],t.parentNode.insertBefore(e,t)})()</script><gcse:search></gcse:search>
<a type=button onclick=endSearch()><svg xmlns="http://www.w3.org/2000/svg" width="25" height="25" fill="none" viewBox="0 0 25 25"><path stroke="#ff6d00" stroke-linecap="round" stroke-linejoin="round" stroke-width="2.75" d="M21.122 20.827 4.727 4.432M21.122 4.43 4.727 20.827"/></svg></a></div></div></nav><div class=header-push></div><div class="top-banners swiper"><div class=swiper-wrapper><div class=swiper-slide><a href=https://tour.beam.apache.org><img class=banner-img-desktop src=/images/banners/tour-of-beam/tour-of-beam-desktop.png alt="Start Tour of Beam">
<img class=banner-img-mobile src=/images/banners/tour-of-beam/tour-of-beam-mobile.png alt="Start Tour of Beam"></a></div><div class=swiper-slide><a href=https://beam.apache.org/documentation/ml/overview/><img class=banner-img-desktop src=/images/banners/machine-learning/machine-learning-desktop.jpg alt="Machine Learning">
<img class=banner-img-mobile src=/images/banners/machine-learning/machine-learning-mobile.jpg alt="Machine Learning"></a></div></div><div class=swiper-pagination></div><div class=swiper-button-prev></div><div class=swiper-button-next></div></div><script src=/js/swiper-bundle.min.min.e0e8f81b0b15728d35ff73c07f42ddbb17a108d6f23df4953cb3e60df7ade675.js></script>
<script src=/js/sliders/top-banners.min.afa7d0a19acf7a3b28ca369490b3d401a619562a2a4c9612577be2f66a4b9855.js></script>
<script>function showSearch(){addPlaceholder();var e,t=document.querySelector(".searchBar");t.classList.remove("disappear"),e=document.querySelector("#iconsBar"),e.classList.add("disappear")}function addPlaceholder(){$("input:text").attr("placeholder","What are you looking for?")}function endSearch(){var e,t=document.querySelector(".searchBar");t.classList.add("disappear"),e=document.querySelector("#iconsBar"),e.classList.remove("disappear")}function blockScroll(){$("body").toggleClass("fixedPosition")}function openMenu(){addPlaceholder(),blockScroll()}</script><div class="clearfix container-main-content"><div class="section-nav closed" data-offset-top=90 data-offset-bottom=500><span class="section-nav-back glyphicon glyphicon-menu-left"></span><nav><ul class=section-nav-list data-section-nav><li><span class=section-nav-list-main-title>Languages</span></li><li><span class=section-nav-list-title>Java</span><ul class=section-nav-list><li><a href=/documentation/sdks/java/>Java SDK overview</a></li><li><a href=https://beam.apache.org/releases/javadoc/2.56.0/ target=_blank>Java SDK API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li><li><a href=/documentation/sdks/java-dependencies/>Java SDK dependencies</a></li><li><a href=/documentation/sdks/java-extensions/>Java SDK extensions</a></li><li><a href=/documentation/sdks/java-thirdparty/>Java 3rd party extensions</a></li><li><a href=/documentation/sdks/java/testing/nexmark/>Nexmark benchmark suite</a></li><li><a href=/documentation/sdks/java/testing/tpcds/>TPC-DS benchmark suite</a></li><li><a href=/documentation/sdks/java-multi-language-pipelines/>Java multi-language pipelines quickstart</a></li></ul></li><li><span class=section-nav-list-title>Python</span><ul class=section-nav-list><li><a href=/documentation/sdks/python/>Python SDK overview</a></li><li><a href=https://beam.apache.org/releases/pydoc/2.56.0/ target=_blank>Python SDK API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li><li><a href=/documentation/sdks/python-dependencies/>Python SDK dependencies</a></li><li><a href=/documentation/sdks/python-streaming/>Python streaming pipelines</a></li><li><a href=/documentation/sdks/python-type-safety/>Ensuring Python type safety</a></li><li><a href=/documentation/sdks/python-machine-learning/>Machine Learning</a></li><li><a href=/documentation/sdks/python-pipeline-dependencies/>Managing pipeline dependencies</a></li><li><a href=/documentation/sdks/python-multi-language-pipelines/>Python multi-language pipelines quickstart</a></li><li><a href=/documentation/sdks/python-unrecoverable-errors/>Python Unrecoverable Errors</a></li></ul></li><li><span class=section-nav-list-title>Go</span><ul class=section-nav-list><li><a href=/documentation/sdks/go/>Go SDK overview</a></li><li><a href=https://pkg.go.dev/github.com/apache/beam/sdks/v2/go/pkg/beam target=_blank>Go SDK API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a><li><a href=/documentation/sdks/go-dependencies/>Go SDK dependencies</a></li><li><a href=/documentation/sdks/go-cross-compilation/>Cross compilation</a></li></li></ul></li><li><span class=section-nav-list-title>Typescript</span><ul class=section-nav-list><li><a href=/documentation/sdks/typescript/>Typescript SDK overview</a></li><li><a href=https://beam.apache.org/releases/typedoc/current/ target=_blank>Typescript SDK API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li></ul></li><li><span class=section-nav-list-title>Scala</span><ul class=section-nav-list><li><a href=/documentation/sdks/scala/>Scio</a></li><li><a href=https://spotify.github.io/scio/api/com/spotify/scio/index.html target=_blank>Scio SDK API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li></ul></li><li><span class=section-nav-list-title>Yaml</span><ul class=section-nav-list><li><a href=/documentation/sdks/yaml/>Yaml overview</a></li><li><a href=/documentation/sdks/yaml-udf/>Yaml User Defined Functions</a></li><li><a href=/documentation/sdks/yaml-combine/>Yaml Aggregation</a></li><li><a href=/documentation/sdks/yaml-errors/>Error handling</a></li><li><a href=/documentation/sdks/yaml-inline-python/>Inlining Python</a></li><li><a href=https://beam.apache.org/releases/yamldoc/current/ target=_blank>YAML API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></ul></li><li><span class=section-nav-list-title>SQL</span><ul class=section-nav-list><li><a href=/documentation/dsls/sql/overview/>Overview</a></li><li><a href=/documentation/dsls/sql/walkthrough/>Walkthrough</a></li><li><a href=/documentation/dsls/sql/shell/>Shell</a></li><li class=section-nav-item--collapsible><span class=section-nav-list-title>Apache Calcite dialect</span><ul class=section-nav-list><li><a href=/documentation/dsls/sql/calcite/overview/>Calcite support overview</a></li><li><a href=/documentation/dsls/sql/calcite/query-syntax/>Query syntax</a></li><li><a href=/documentation/dsls/sql/calcite/lexical/>Lexical structure</a></li><li><a href=/documentation/dsls/sql/calcite/data-types/>Data types</a></li><li><a href=/documentation/dsls/sql/calcite/scalar-functions/>Scalar functions</a></li><li><a href=/documentation/dsls/sql/calcite/aggregate-functions/>Aggregate functions</a></li></ul></li><li class=section-nav-item--collapsible><span class=section-nav-list-title>ZetaSQL dialect</span><ul class=section-nav-list><li><a href=/documentation/dsls/sql/zetasql/overview/>ZetaSQL support overview</a></li><li><a href=/documentation/dsls/sql/zetasql/syntax/>Function call rules</a></li><li><a href=/documentation/dsls/sql/zetasql/conversion-rules/>Conversion rules</a></li><li><a href=/documentation/dsls/sql/zetasql/query-syntax/>Query syntax</a></li><li><a href=/documentation/dsls/sql/zetasql/lexical/>Lexical structure</a></li><li><a href=/documentation/dsls/sql/zetasql/data-types/>Data types</a></li><li><a href=/documentation/dsls/sql/zetasql/operators/>Operators</a></li><li class=section-nav-item--collapsible><span class=section-nav-list-title>Scalar functions</span><ul class=section-nav-list><li><a href=/documentation/dsls/sql/zetasql/string-functions/>String functions</a></li><li><a href=/documentation/dsls/sql/zetasql/math-functions/>Mathematical functions</a></li><li><a href=/documentation/dsls/sql/zetasql/conditional-expressions/>Conditional expressions</a></li></ul></li><li><a href=/documentation/dsls/sql/zetasql/aggregate-functions/>Aggregate functions</a></li></ul></li><li class=section-nav-item--collapsible><span class=section-nav-list-title>Beam SQL extensions</span><ul class=section-nav-list><li><a href=/documentation/dsls/sql/extensions/create-external-table/>CREATE EXTERNAL TABLE</a></li><li><a href=/documentation/dsls/sql/extensions/windowing-and-triggering/>Windowing & triggering</a></li><li><a href=/documentation/dsls/sql/extensions/joins/>Joins</a></li><li><a href=/documentation/dsls/sql/extensions/user-defined-functions/>User-defined functions</a></li><li><a href=/documentation/dsls/sql/extensions/set/>SET pipeline options</a></li></ul></li></ul></li><li><span class=section-nav-list-title>DataFrames</span><ul class=section-nav-list><li><a href=/documentation/dsls/dataframes/overview/>Overview</a></li><li><a href=/documentation/dsls/dataframes/differences-from-pandas/>Differences from pandas</a></li><li><a href=https://github.com/apache/beam/tree/master/sdks/python/apache_beam/examples/dataframe target=_blank>Example pipelines <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li><li><a href=https://beam.apache.org/releases/pydoc/2.56.0/apache_beam.dataframe.html target=_blank>DataFrame API reference <img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></li></ul></li></ul></nav></div><nav class="page-nav clearfix" data-offset-top=90 data-offset-bottom=500><nav id=TableOfContents><ul><li><a href=#what-it-is>What it is</a></li><li><a href=#the-queries>The queries</a></li><li><a href=#benchmark-workload-configuration>Benchmark workload configuration</a><ul><li><a href=#events-generation-defaults>Events generation (defaults)</a></li><li><a href=#windows-defaults>Windows (defaults)</a></li><li><a href=#events-proportions-defaults>Events Proportions (defaults)</a></li><li><a href=#technical>Technical</a></li></ul></li><li><a href=#nexmark-output>Nexmark output</a></li><li><a href=#benchmark-launch-configuration>Benchmark launch configuration</a><ul><li><a href=#common-configuration-parameters>Common configuration parameters</a></li><li><a href=#available-suites>Available Suites</a></li><li><a href=#google-cloud-dataflow-runner-specific-configuration>Google Cloud Dataflow runner specific configuration</a></li><li><a href=#direct-runner-specific-configuration>Direct runner specific configuration</a></li><li><a href=#flink-runner-specific-configuration>Flink runner specific configuration</a></li><li><a href=#spark-runner-specific-configuration>Spark runner specific configuration</a></li><li><a href=#kafka-sourcesink-configuration-parameters>Kafka source/sink configuration parameters</a></li></ul></li><li><a href=#current-status>Current status</a><ul><li><a href=#batch--synthetic--local>Batch / Synthetic / Local</a></li><li><a href=#streaming--synthetic--local>Streaming / Synthetic / Local</a></li><li><a href=#batch--synthetic--cluster>Batch / Synthetic / Cluster</a></li><li><a href=#streaming--synthetic--cluster>Streaming / Synthetic / Cluster</a></li></ul></li><li><a href=#running-nexmark>Running Nexmark</a><ul><li><a href=#running-smoke-suite-on-the-directrunner-local>Running SMOKE suite on the DirectRunner (local)</a></li><li><a href=#running-smoke-suite-on-the-sparkrunner-local>Running SMOKE suite on the SparkRunner (local)</a></li><li><a href=#running-smoke-suite-on-the-flinkrunner-local>Running SMOKE suite on the FlinkRunner (local)</a></li><li><a href=#running-smoke-suite-on-google-cloud-dataflow>Running SMOKE suite on Google Cloud Dataflow</a></li><li><a href=#running-query-0-on-a-spark-cluster-with-apache-hadoop-yarn>Running query 0 on a Spark cluster with Apache Hadoop YARN</a></li></ul></li><li><a href=#nexmark-dashboards>Nexmark dashboards</a><ul><li><a href=#dashboards-content>Dashboards content</a></li></ul></li></ul></nav></nav><div class="body__contained body__section-nav"><h1 id=nexmark-benchmark-suite>Nexmark benchmark suite</h1><h2 id=what-it-is>What it is</h2><p>Nexmark is a suite of pipelines inspired by the &lsquo;continuous data stream&rsquo;
queries in <a href=https://web.archive.org/web/20100620010601/http://datalab.cs.pdx.edu/niagaraST/NEXMark/>Nexmark research
paper</a></p><p>These are multiple queries over a three entities model representing on online
auction system:</p><ul><li><strong>Person</strong> represents a person submitting an item for auction and/or making
a bid on an auction.</li><li><strong>Auction</strong> represents an item under auction.</li><li><strong>Bid</strong> represents a bid for an item under auction.</li></ul><h2 id=the-queries>The queries</h2><p>The queries exercise many aspects of Beam model:</p><ul><li><strong>Query1</strong> or <strong>CURRENCY_CONVERSION</strong>: What are the bid values in Euro&rsquo;s?
Illustrates a simple map.</li><li><strong>Query2</strong> or <strong>SELECTION</strong>: What are the auctions with particular auction numbers?
Illustrates a simple filter.</li><li><strong>Query3</strong> or <strong>LOCAL_ITEM_SUGGESTION</strong>: Who is selling in particular US states?
Illustrates an incremental join (using per-key state and timer) and filter.</li><li><strong>Query4</strong> or <strong>AVERAGE_PRICE_FOR_CATEGORY</strong>: What is the average selling price for each auction
category?
Illustrates complex join (using custom window functions) and
aggregation.</li><li><strong>Query5</strong> or <strong>HOT_ITEMS</strong>: Which auctions have seen the most bids in the last period?
Illustrates sliding windows and combiners.</li><li><strong>Query6</strong> or <strong>AVERAGE_SELLING_PRICE_BY_SELLER</strong>: What is the average selling price per seller for their
last 10 closed auctions.
Shares the same &lsquo;winning bids&rsquo; core as for <strong>Query4</strong>, and
illustrates a specialized combiner.</li><li><strong>Query7</strong> or <strong>HIGHEST_BID</strong>: What are the highest bids per period?
Deliberately implemented using a side input to illustrate fanout.</li><li><strong>Query8</strong> or <strong>MONITOR_NEW_USERS</strong>: Who has entered the system and created an auction in
the last period?
Illustrates a simple join.</li></ul><p>We have augmented the original queries with five more:</p><ul><li><strong>Query0</strong> or <strong>PASSTHROUGH</strong>: Pass-through.
Allows us to measure the monitoring overhead.</li><li><strong>Query9</strong> or <strong>WINNING_BIDS</strong>: Winning-bids.
A common sub-query shared by <strong>Query4</strong> and <strong>Query6</strong>.</li><li><strong>Query10</strong> or <strong>LOG_TO_SHARDED_FILES</strong>: Log all events to GCS files.
Illustrates windows with large side effects on firing.</li><li><strong>Query11</strong> or <strong>USER_SESSIONS</strong>: How many bids did a user make in each session they
were active?
Illustrates session windows.</li><li><strong>Query12</strong> or <strong>PROCESSING_TIME_WINDOWS</strong>: How many bids does a user make within a fixed
processing time limit?
Illustrates working in processing time in the Global window, as
compared with event time in non-Global windows for all the other
queries.</li><li><strong>BOUNDED_SIDE_INPUT_JOIN</strong>: Joins a stream to a bounded side input, modeling basic stream enrichment.</li></ul><h2 id=benchmark-workload-configuration>Benchmark workload configuration</h2><p>Here are some of the knobs of the benchmark workload (see
<a href=https://github.com/apache/beam/blob/master/sdks/java/testing/nexmark/src/main/java/org/apache/beam/sdk/nexmark/NexmarkConfiguration.java>NexmarkConfiguration.java</a>).</p><p>These configuration items can be passed to the launch command line.</p><h3 id=events-generation-defaults>Events generation (defaults)</h3><ul><li>100 000 events generated</li><li>100 generator threads</li><li>Event rate in SIN curve</li><li>Initial event rate of 10 000</li><li>Event rate step of 10 000</li><li>100 concurrent auctions</li><li>1000 concurrent persons bidding / creating auctions</li></ul><h3 id=windows-defaults>Windows (defaults)</h3><ul><li>size 10s</li><li>sliding period 5s</li><li>watermark hold for 0s</li></ul><h3 id=events-proportions-defaults>Events Proportions (defaults)</h3><ul><li>Hot Auctions = ½</li><li>Hot Bidders =¼</li><li>Hot Sellers=¼</li></ul><h3 id=technical>Technical</h3><ul><li>Artificial CPU load</li><li>Artificial IO load</li></ul><h2 id=nexmark-output>Nexmark output</h2><p>Here is an example output of the Nexmark benchmark run in streaming mode with
the SMOKE suite on the (local) direct runner:</p><pre>
Performance:
Conf Runtime(sec) Events(/sec) Results
0000 5,5 18138,9 100000
0001 4,2 23657,4 92000
0002 2,2 45683,0 351
0003 3,9 25348,5 444
0004 1,6 6207,3 40
0005 5,0 20173,5 12
0006 0,9 11376,6 401
0007 121,4 823,5 1
0008 2,5 40273,9 6000
0009 0,9 10695,2 298
0010 4,0 25025,0 1
0011 4,4 22655,2 1919
0012 3,5 28208,7 1919
</pre><h2 id=benchmark-launch-configuration>Benchmark launch configuration</h2><p>The Nexmark launcher accepts the <code>--runner</code> argument as usual for programs that
use Beam PipelineOptions to manage their command line arguments. In addition
to this, the necessary dependencies must be configured.</p><p>When running via Gradle, the following two parameters control the execution:</p><pre><code>-P nexmark.args
The command line to pass to the Nexmark main program.
-P nexmark.runner
The Gradle project name of the runner, such as &quot;:runners:direct-java&quot; or
&quot;:runners:flink:1.13. The project names can be found in the root
`settings.gradle.kts`.
</code></pre><p>Test data is deterministically synthesized on demand. The test
data may be synthesized in the same pipeline as the query itself,
or may be published to Pub/Sub or Kafka.</p><p>The query results may be:</p><ul><li>Published to Pub/Sub or Kafka.</li><li>Written to text files as plain text.</li><li>Written to text files using an Avro encoding.</li><li>Sent to BigQuery.</li><li>Discarded.</li></ul><h3 id=common-configuration-parameters>Common configuration parameters</h3><p>Decide if batch or streaming:</p><pre><code>--streaming=true
</code></pre><p>Number of events generators:</p><pre><code>--numEventGenerators=4
</code></pre><p>Queries can be run by their name or by their number (number is still there for backward compatibility, only the queries 0 to 12 have a number)</p><p>Run query <strong>N</strong>:</p><pre><code>--query=N
</code></pre><p>Run query called <strong>PASSTHROUGH</strong>:</p><pre><code>--query=PASSTHROUGH
</code></pre><h3 id=available-suites>Available Suites</h3><p>The suite to run can be chosen using this configuration parameter:</p><pre><code>--suite=SUITE
</code></pre><p>Available suites are:</p><ul><li>DEFAULT: Test default configuration with query 0.<ul><li>SMOKE: Run all the queries with the default configuration.</li></ul></li><li>STRESS: Like smoke but for 1m events.</li><li>FULL_THROTTLE: Like SMOKE but 100m events.</li></ul><h3 id=google-cloud-dataflow-runner-specific-configuration>Google Cloud Dataflow runner specific configuration</h3><pre><code>--manageResources=false --monitorJobs=true \
--enforceEncodability=false --enforceImmutability=false
--project=&lt;your project&gt; \
--zone=&lt;your zone&gt; \
--workerMachineType=n1-highmem-8 \
--stagingLocation=gs://&lt;a gs path for staging&gt; \
--runner=DataflowRunner \
--tempLocation=gs://&lt;a gs path for temporary files&gt; \
--filesToStage=target/beam-sdks-java-nexmark-2.56.0.jar
</code></pre><h3 id=direct-runner-specific-configuration>Direct runner specific configuration</h3><pre><code>--manageResources=false --monitorJobs=true \
--enforceEncodability=false --enforceImmutability=false
</code></pre><h3 id=flink-runner-specific-configuration>Flink runner specific configuration</h3><pre><code>--manageResources=false --monitorJobs=true \
--flinkMaster=[local] --parallelism=#numcores
</code></pre><h3 id=spark-runner-specific-configuration>Spark runner specific configuration</h3><pre><code>--manageResources=false --monitorJobs=true \
--sparkMaster=local \
-Dspark.ui.enabled=false -DSPARK_LOCAL_IP=localhost -Dsun.io.serialization.extendedDebugInfo=true
</code></pre><h3 id=kafka-sourcesink-configuration-parameters>Kafka source/sink configuration parameters</h3><p>Set Kafka host/ip (for example, &ldquo;localhost:9092&rdquo;):</p><pre><code>--bootstrapServers=&lt;kafka host/ip&gt;
</code></pre><p>Write results into Kafka topic:</p><pre><code>--sinkType=KAFKA
</code></pre><p>Set topic name which will be used for benchmark results:</p><pre><code>--kafkaResultsTopic=&lt;topic name&gt;
</code></pre><p>Write or/and read events into/from Kafka topic:</p><pre><code>--sourceType=KAFKA
</code></pre><p>Set topic name which will be used for benchmark events:</p><pre><code>--kafkaTopic=&lt;topic name&gt;
</code></pre><h2 id=current-status>Current status</h2><p>These tables contain statuses of the queries runs in the different runners. Google Cloud Dataflow status is yet to come.</p><h3 id=batch--synthetic--local>Batch / Synthetic / Local</h3><table class="table table-bordered"><tr><th>Query</th><th>Direct</th><th>Spark</th><th>Flink</th></tr><tr><td>0</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>1</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>2</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>3</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>4</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>5</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>6</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>7</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>8</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>9</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>10</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>11</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>12</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>BOUNDED_SIDE_INPUT_JOIN</td><td>ok</td><td>ok</td><td>ok</td></tr></table><h3 id=streaming--synthetic--local>Streaming / Synthetic / Local</h3><table class="table table-bordered"><tr><th>Query</th><th>Direct</th><th>Spark <a href=https://github.com/apache/beam/issues/18416>Issue 18416</a></th><th>Flink</th></tr><tr><td>0</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>1</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>2</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>3</td><td>ok</td><td><a href=https://github.com/apache/beam/issues/18074>Issue 18074</a>, <a href=https://issues.apache.org/jira/browse/BEAM-3961>BEAM-3961</a></td><td>ok</td></tr><tr><td>4</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>5</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>6</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>7</td><td>ok</td><td><a href=https://issues.apache.org/jira/browse/BEAM-2112>BEAM-2112</a></td><td>ok</td></tr><tr><td>8</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>9</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>10</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>11</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>12</td><td>ok</td><td>ok</td><td>ok</td></tr><tr><td>BOUNDED_SIDE_INPUT_JOIN</td><td>ok</td><td><a href=https://issues.apache.org/jira/browse/BEAM-2112>BEAM-2112</a></td><td>ok</td></tr></table><h3 id=batch--synthetic--cluster>Batch / Synthetic / Cluster</h3><p>Yet to come</p><h3 id=streaming--synthetic--cluster>Streaming / Synthetic / Cluster</h3><p>Yet to come</p><h2 id=running-nexmark>Running Nexmark</h2><h3 id=running-smoke-suite-on-the-directrunner-local>Running SMOKE suite on the DirectRunner (local)</h3><p>The DirectRunner is default, so it is not required to pass <code>-Pnexmark.runner</code>.
Here we do it for maximum clarity.</p><p>The direct runner does not have separate batch and streaming modes, but the
Nexmark launch does.</p><p>These parameters leave on many of the DirectRunner&rsquo;s extra safety checks so the
SMOKE suite can make sure there is nothing broken in the Nexmark suite.</p><p>Batch Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:direct-java&quot; \
-Pnexmark.args=&quot;
--runner=DirectRunner
--streaming=false
--suite=SMOKE
--manageResources=false
--monitorJobs=true
--enforceEncodability=true
--enforceImmutability=true&quot;
</code></pre><p>Streaming Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:direct-java&quot; \
-Pnexmark.args=&quot;
--runner=DirectRunner
--streaming=true
--suite=SMOKE
--manageResources=false
--monitorJobs=true
--enforceEncodability=true
--enforceImmutability=true&quot;
</code></pre><h3 id=running-smoke-suite-on-the-sparkrunner-local>Running SMOKE suite on the SparkRunner (local)</h3><p>The SparkRunner is special-cased in the Nexmark gradle launch. The task will
provide the version of Spark that the SparkRunner is built against, and
configure logging.</p><p>Batch Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:spark:3&quot; \
-Pnexmark.args=&quot;
--runner=SparkRunner
--suite=SMOKE
--streamTimeout=60
--streaming=false
--manageResources=false
--monitorJobs=true&quot;
</code></pre><p>Streaming Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:spark:3&quot; \
-Pnexmark.args=&quot;
--runner=SparkRunner
--suite=SMOKE
--streamTimeout=60
--streaming=true
--manageResources=false
--monitorJobs=true&quot;
</code></pre><h3 id=running-smoke-suite-on-the-flinkrunner-local>Running SMOKE suite on the FlinkRunner (local)</h3><p>Batch Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:flink:1.13&quot; \
-Pnexmark.args=&quot;
--runner=FlinkRunner
--suite=SMOKE
--streamTimeout=60
--streaming=false
--manageResources=false
--monitorJobs=true
--flinkMaster=[local]&quot;
</code></pre><p>Streaming Mode:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:flink:1.13&quot; \
-Pnexmark.args=&quot;
--runner=FlinkRunner
--suite=SMOKE
--streamTimeout=60
--streaming=true
--manageResources=false
--monitorJobs=true
--flinkMaster=[local]&quot;
</code></pre><h3 id=running-smoke-suite-on-google-cloud-dataflow>Running SMOKE suite on Google Cloud Dataflow</h3><p>Set these up first so the below command is valid</p><pre><code>PROJECT=&lt;your project&gt;
ZONE=&lt;your zone&gt;
STAGING_LOCATION=gs://&lt;a GCS path for staging&gt;
PUBSUB_TOPCI=&lt;existing pubsub topic&gt;
</code></pre><p>Launch:</p><pre><code>./gradlew :sdks:java:testing:nexmark:run \
-Pnexmark.runner=&quot;:runners:google-cloud-dataflow-java&quot; \
-Pnexmark.args=&quot;
--runner=DataflowRunner
--suite=SMOKE
--streamTimeout=60
--streaming=true
--manageResources=false
--monitorJobs=true
--project=${PROJECT}
--zone=${ZONE}
--workerMachineType=n1-highmem-8
--stagingLocation=${STAGING_LOCATION}
--sourceType=PUBSUB
--pubSubMode=PUBLISH_ONLY
--pubsubTopic=${PUBSUB_TOPIC}
--resourceNameMode=VERBATIM
--manageResources=false
--numEventGenerators=64
--numWorkers=16
--maxNumWorkers=16
--firstEventRate=100000
--nextEventRate=100000
--ratePeriodSec=3600
--isRateLimited=true
--avgPersonByteSize=500
--avgAuctionByteSize=500
--avgBidByteSize=500
--probDelayedEvent=0.000001
--occasionalDelaySec=3600
--numEvents=0
--useWallclockEventTime=true
--usePubsubPublishTime=true
--experiments=enable_custom_pubsub_sink&quot;
</code></pre><h3 id=running-query-0-on-a-spark-cluster-with-apache-hadoop-yarn>Running query 0 on a Spark cluster with Apache Hadoop YARN</h3><p>Building package:</p><pre><code>./gradlew :sdks:java:testing:nexmark:assemble
</code></pre><p>Submit to the cluster:</p><pre><code>spark-submit \
--class org.apache.beam.sdk.nexmark.Main \
--master yarn-client \
--driver-memory 512m \
--executor-memory 512m \
--executor-cores 1 \
sdks/java/testing/nexmark/build/libs/beam-sdks-java-nexmark-2.56.0-spark.jar \
--runner=SparkRunner \
--query=0 \
--streamTimeout=60 \
--streaming=false \
--manageResources=false \
--monitorJobs=true&quot;
</code></pre><h2 id=nexmark-dashboards>Nexmark dashboards</h2><p>Below dashboards are used as a CI mechanism to detect no-regression on the Beam components. They are not supposed to be benchmark comparison of the runners or engines. Especially because:</p><ul><li>Parameters of the runners are not the same</li><li>Nexmark is run with the runners in local (most of the time embedded) mode</li><li>Nexmark runs on a shared machine that also run all the CI and build.</li><li>Runners have different support of the Beam model</li><li>Runners have different strengths that make comparison difficult:<ul><li>Some runners were designed to be batch oriented, others streaming oriented</li><li>Some are designed towards sub-second latency, others support auto-scaling</li></ul></li></ul><h3 id=dashboards-content>Dashboards content</h3><p>At each commit on master, Nexmark suites are run and plots are created on the graphs. All metrics dashboards are hosted at <a href=http://metrics.beam.apache.org/>metrics.beam.apache.org</a>.</p><p>There are 2 kinds of dashboards:</p><ul><li>one for performances (run times of the queries)</li><li>one for the size of the output PCollection (which should be constant)</li></ul><p>There are dashboards for these runners (others to come):</p><ul><li>spark</li><li>flink</li><li>direct runner</li><li>Dataflow</li></ul><p>Each dashboard contains:</p><ul><li>graphs in batch mode</li><li>graphs in streaming mode</li><li>graphs for all the queries.</li></ul></div></div><footer class=footer><div class=footer__contained><div class=footer__cols><div class="footer__cols__col footer__cols__col__logos"><div class=footer__cols__col__logo><img src=/images/beam_logo_circle.svg class=footer__logo alt="Beam logo"></div><div class=footer__cols__col__logo><img src=/images/apache_logo_circle.svg class=footer__logo alt="Apache logo"></div></div><div class=footer-wrapper><div class=wrapper-grid><div class=footer__cols__col><div class=footer__cols__col__title>Start</div><div class=footer__cols__col__link><a href=/get-started/beam-overview/>Overview</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-java/>Quickstart (Java)</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-py/>Quickstart (Python)</a></div><div class=footer__cols__col__link><a href=/get-started/quickstart-go/>Quickstart (Go)</a></div><div class=footer__cols__col__link><a href=/get-started/downloads/>Downloads</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Docs</div><div class=footer__cols__col__link><a href=/documentation/programming-guide/>Concepts</a></div><div class=footer__cols__col__link><a href=/documentation/pipelines/design-your-pipeline/>Pipelines</a></div><div class=footer__cols__col__link><a href=/documentation/runners/capability-matrix/>Runners</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Community</div><div class=footer__cols__col__link><a href=/contribute/>Contribute</a></div><div class=footer__cols__col__link><a href=https://projects.apache.org/committee.html?beam target=_blank>Team<img src=/images/external-link-icon.png width=14 height=14 alt="External link."></a></div><div class=footer__cols__col__link><a href=/community/presentation-materials/>Media</a></div><div class=footer__cols__col__link><a href=/community/in-person/>Events/Meetups</a></div><div class=footer__cols__col__link><a href=/community/contact-us/>Contact Us</a></div></div><div class=footer__cols__col><div class=footer__cols__col__title>Resources</div><div class=footer__cols__col__link><a href=/blog/>Blog</a></div><div class=footer__cols__col__link><a href=https://github.com/apache/beam>GitHub</a></div></div></div><div class=footer__bottom>&copy;
<a href=https://www.apache.org>The Apache Software Foundation</a>
| <a href=/privacy_policy>Privacy Policy</a>
| <a href=/feed.xml>RSS Feed</a><br><br>Apache Beam, Apache, Beam, the Beam logo, and the Apache feather logo are either registered trademarks or trademarks of The Apache Software Foundation. All other products or name brands are trademarks of their respective holders, including The Apache Software Foundation.</div></div><div class="footer__cols__col footer__cols__col__logos"><div class=footer__cols__col--group><div class=footer__cols__col__logo><a href=https://github.com/apache/beam><img src=/images/logos/social-icons/github-logo-150.png class=footer__logo alt="Github logo"></a></div><div class=footer__cols__col__logo><a href=https://www.linkedin.com/company/apache-beam/><img src=/images/logos/social-icons/linkedin-logo-150.png class=footer__logo alt="Linkedin logo"></a></div></div><div class=footer__cols__col--group><div class=footer__cols__col__logo><a href=https://twitter.com/apachebeam><img src=/images/logos/social-icons/twitter-logo-150.png class=footer__logo alt="Twitter logo"></a></div><div class=footer__cols__col__logo><a href=https://www.youtube.com/channel/UChNnb_YO_7B0HlW6FhAXZZQ><img src=/images/logos/social-icons/youtube-logo-150.png class=footer__logo alt="Youtube logo"></a></div></div></div></div></div></footer></body></html>