blob: 26ed881addf3cfbe86645fbe16503cbbf12b2150 [file] [log] [blame]
<!doctype html>
<html lang="en" dir="ltr" class="docs-wrapper plugin-docs plugin-id-default docs-version-current docs-doc-page docs-doc-id-guide/getting-started" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v3.1.1">
<title data-rh="true">Getting started | Apache Wayang (incubating)</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:url" content="https://wayang.apache.org/docs/guide/getting-started"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docusaurus_version" content="current"><meta data-rh="true" name="docusaurus_tag" content="docs-default-current"><meta data-rh="true" name="docsearch:version" content="current"><meta data-rh="true" name="docsearch:docusaurus_tag" content="docs-default-current"><meta data-rh="true" property="og:title" content="Getting started | Apache Wayang (incubating)"><meta data-rh="true" name="description" content="&lt;!--"><meta data-rh="true" property="og:description" content="&lt;!--"><link data-rh="true" rel="icon" href="/img/wayang-logo.jpg"><link data-rh="true" rel="canonical" href="https://wayang.apache.org/docs/guide/getting-started"><link data-rh="true" rel="alternate" href="https://wayang.apache.org/docs/guide/getting-started" hreflang="en"><link data-rh="true" rel="alternate" href="https://wayang.apache.org/docs/guide/getting-started" hreflang="x-default"><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="Apache Wayang (incubating) RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="Apache Wayang (incubating) Atom Feed"><link rel="stylesheet" href="/assets/css/styles.ecf70413.css">
<script src="/assets/js/runtime~main.db1fac0d.js" defer="defer"></script>
<script src="/assets/js/main.f50bad53.js" defer="defer"></script>
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const a=new URLSearchParams(window.location.search).entries();for(var[t,e]of a)if(t.startsWith("docusaurus-data-")){var n=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(n,e)}}catch(t){}}(),document.documentElement.setAttribute("data-announcement-bar-initially-dismissed",function(){try{return"true"===localStorage.getItem("docusaurus.announcement.dismiss")}catch(t){}return!1}())</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><div class="announcementBar_mb4j" style="background-color:#fafbfc;color:#091E42" role="banner"><div class="announcementBarPlaceholder_vyr4"></div><div class="content_knG7 announcementBarContent_xLdY">⭐️ If you like Apache Wayang, give it a star on <a target="_blank" href="https://github.com/apache/incubator-wayang">GitHub</a>! ⭐ </div><button type="button" aria-label="Close" class="clean-btn close closeButton_CVFx announcementBarClose_gvF7"><svg viewBox="0 0 15 15" width="14" height="14"><g stroke="currentColor" stroke-width="3.1"><path d="M.75.75l13.5 13.5M14.25.75L.75 14.25"></path></g></svg></button></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/wayang.png" alt="Wayang Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/img/wayang.png" alt="Wayang Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div><b class="navbar__title text--truncate"></b></a></div><div class="navbar__items navbar__items--right"><a class="navbar__item navbar__link" href="/docs/start/download">Download</a><a class="navbar__item navbar__link" href="/docs/introduction/about">About</a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/docs/guide/installation">Developers</a><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Community</a><ul class="dropdown__menu"><li><a class="dropdown__link" href="/blog/">Blog</a></li><li><a class="dropdown__link" href="/docs/community/mailinglist">Project</a></li></ul></div><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">ASF</a><ul class="dropdown__menu"><li><a href="https://www.apache.org/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Foundation</a></li><li><a href="https://www.apache.org/licenses/" target="_blank" rel="noopener noreferrer" class="dropdown__link">License</a></li><li><a href="https://www.apache.org/events/current-event.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Events</a></li><li><a href="https://privacy.apache.org/policies/privacy-policy-public.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Privacy</a></li><li><a href="https://www.apache.org/security/" target="_blank" rel="noopener noreferrer" class="dropdown__link">Security</a></li><li><a href="https://www.apache.org/foundation/sponsorship.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Sponsorship</a></li><li><a href="https://www.apache.org/foundation/thanks.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Thanks</a></li><li><a href="https://www.apache.org/foundation/policies/conduct.html" target="_blank" rel="noopener noreferrer" class="dropdown__link">Code of Conduct</a></li></ul></div><a href="https://github.com/apache/incubator-wayang" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type="button" disabled="" title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live="polite"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><div class="navbar__search"><span aria-label="expand searchbar" role="button" class="search-icon" tabindex="0"></span><input id="search_input_react" type="search" placeholder="Loading..." aria-label="Search" class="navbar__search-input search-bar" disabled=""></div></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0"><div class="docsWrapper_hBAB"><button aria-label="Scroll back to top" class="clean-btn theme-back-to-top-button backToTopButton_sjWU" type="button"></button><div class="docRoot_UBD9"><aside class="theme-doc-sidebar-container docSidebarContainer_YfHR"><div class="sidebarViewport_aRkj"><div class="sidebar_njMd"><nav aria-label="Docs sidebar" class="menu thin-scrollbar menu_SIkG menuWithAnnouncementBar_GW3s"><ul class="theme-doc-sidebar-menu menu__list"><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/installation">How to build Wayang</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link menu__link--active" aria-current="page" href="/docs/guide/getting-started">Getting started</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/ml4all">Scalable Machine Learning</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/api-documentation">API documentation</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/examples">Usage Examples</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/developing-in-wayang">Developing in Wayang</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/adding-operators">Adding an operator in Wayang</a></li><li class="theme-doc-sidebar-item-link theme-doc-sidebar-item-link-level-1 menu__list-item"><a class="menu__link" href="/docs/guide/optimizer">Data processing optimizer</a></li></ul></nav></div></div></aside><main class="docMainContainer_TBSr"><div class="container padding-top--md padding-bottom--lg"><div class="row"><div class="col docItemCol_VOVn"><div class="docItemContainer_Djhp"><article><nav class="theme-doc-breadcrumbs breadcrumbsContainer_Z_bl" aria-label="Breadcrumbs"><ul class="breadcrumbs" itemscope="" itemtype="https://schema.org/BreadcrumbList"><li class="breadcrumbs__item"><a aria-label="Home page" class="breadcrumbs__link" href="/"><svg viewBox="0 0 24 24" class="breadcrumbHomeIcon_YNFT"><path d="M10 19v-5h4v5c0 .55.45 1 1 1h3c.55 0 1-.45 1-1v-7h1.7c.46 0 .68-.57.33-.87L12.67 3.6c-.38-.34-.96-.34-1.34 0l-8.36 7.53c-.34.3-.13.87.33.87H5v7c0 .55.45 1 1 1h3c.55 0 1-.45 1-1z" fill="currentColor"></path></svg></a></li><li itemscope="" itemprop="itemListElement" itemtype="https://schema.org/ListItem" class="breadcrumbs__item breadcrumbs__item--active"><span class="breadcrumbs__link" itemprop="name">Getting started</span><meta itemprop="position" content="1"></li></ul></nav><div class="tocCollapsible_ETCw theme-doc-toc-mobile tocMobile_ITEo"><button type="button" class="clean-btn tocCollapsibleButton_TO0P">On this page</button></div><div class="theme-doc-markdown markdown"><header><h1>Getting started</h1></header><h2 class="anchor anchorWithStickyNavbar_LWe7" id="requirements">Requirements<a href="#requirements" class="hash-link" aria-label="Direct link to Requirements" title="Direct link to Requirements"></a></h2>
<p>Apache Wayang (incubating) is built upon the foundations of Java 11 and Scala 2.12, providing a robust and versatile platform for data processing applications. If you intend to build Wayang from source, you will also need to have Apache Maven, the popular build automation tool, installed on your system. Additionally, be mindful that some of the processing platforms supported by Wayang may have their own specific installation requirements.</p>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="get-wayang">Get Wayang<a href="#get-wayang" class="hash-link" aria-label="Direct link to Get Wayang" title="Direct link to Get Wayang"></a></h3>
<p>Apache Wayang is readily available through Maven Central, facilitating seamless integration into your development workflow. For instance, to utilize Wayang in your Maven-based project, simply add the following dependency to your project&#x27;s POM file:</p>
<div class="language-xml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-xml codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">dependency</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">groupId</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">org.apache.wayang</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">groupId</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">artifactId</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">wayang-***</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">artifactId</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">version</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">0.7.1</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">version</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">dependency</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p>Note the <code>***</code>: Wayang ships with multiple modules that can be included in your app, depending on how you want to use it:</p>
<ul>
<li><code>wayang-core</code>: provides core data structures and the optimizer (required)</li>
<li><code>wayang-basic</code>: provides common operators and data types for your apps (recommended)</li>
<li><code>wayang-api</code>: provides an easy-to-use Scala and Java API to assemble Wayang plans (recommended)</li>
<li><code>wayang-java</code>, <code>wayang-spark</code>, <code>wayang-graphchi</code>, <code>wayang-sqlite3</code>, <code>wayang-postgres</code>: adapters for the various supported processing platforms</li>
<li><code>wayang-profiler</code>: provides functionality to learn operator and UDF cost functions from historical execution data</li>
</ul>
<p>For the sake of version flexibility, you still have to include your Hadoop (<code>hadoop-hdfs</code> and <code>hadoop-common</code>) and Spark (<code>spark-core</code> and <code>spark-graphx</code>) version of choice.</p>
<p>In addition, you can obtain the most recent snapshot version of Wayang via Sonatype&#x27;s snapshot repository. Just included</p>
<div class="language-xml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-xml codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">repositories</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">repository</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">id</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">sonatype-snapshots</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">id</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">name</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">Sonatype Snapshot Repository</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">name</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">url</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain">https://oss.sonatype.org/content/repositories/snapshots</span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">url</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token tag punctuation" style="color:#393A34">&lt;/</span><span class="token tag" style="color:#00009f">repository</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token tag punctuation" style="color:#393A34">&lt;</span><span class="token tag" style="color:#00009f">repositories</span><span class="token tag punctuation" style="color:#393A34">&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p>If you need to rebuild Wayang, e.g., to use a different Scala version, you can simply do so via Maven:</p>
<ol>
<li>Adapt the version variables (e.g., <code>spark.version</code>) in the main <code>pom.xml</code> file.</li>
<li>Build Wayang with the adapted versions.<!-- -->
<div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ mvn clean </span><span class="token function" style="color:#d73a49">install</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<!-- -->Note the <code>standalone</code> profile to fix Hadoop and Spark versions, so that Wayang apps do not explicitly need to declare the corresponding dependencies.
Also, note the <code>distro</code> profile, which assembles a binary Wayang distribution.
To activate these profiles, you need to specify them when running maven, i.e.,<!-- -->
<div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">mvn clean </span><span class="token function" style="color:#d73a49">install</span><span class="token plain"> -P</span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain">profile name</span><span class="token operator" style="color:#393A34">&gt;</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
</li>
</ol>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="configure-wayang">Configure Wayang<a href="#configure-wayang" class="hash-link" aria-label="Direct link to Configure Wayang" title="Direct link to Configure Wayang"></a></h3>
<p>To enable Apache Wayang&#x27;s smooth operation, you need to equip it with details about your processing platforms&#x27; capabilities and how to interact with them. A default configuration is available for initial testing, but creating a properties file is generally preferable for fine-tuning the configuration to suit your specific requirements. To harness this personalized configuration effortlessly, launch your application via</p>
<div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">$ </span><span class="token function" style="color:#d73a49">java</span><span class="token plain"> </span><span class="token parameter variable" style="color:#36acaa">-Dwayang.configuration</span><span class="token operator" style="color:#393A34">=</span><span class="token plain">url://to/my/wayang.properties </span><span class="token punctuation" style="color:#393A34">..</span><span class="token plain">.</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p>Essential configuration settings:</p>
<ul>
<li>General settings<!-- -->
<ul>
<li><code>wayang.core.log.enabled (= true)</code>: whether to log execution statistics to allow learning better cardinality and cost estimators for the optimizer</li>
<li><code>wayang.core.log.executions (= ~/.wayang/executions.json)</code> where to log execution times of operator groups</li>
<li><code>wayang.core.log.cardinalities (= ~/.wayang/cardinalities.json)</code> where to log cardinality measurements</li>
<li><code>wayang.core.optimizer.instrumentation (= org.apache.wayang.core.profiling.OutboundInstrumentationStrategy)</code>: where to measure cardinalities in Wayang plans; other options are <code>org.apache.wayang.core.profiling.NoInstrumentationStrategy</code> and <code>org.apache.wayang.core.profiling.FullInstrumentationStrategy</code></li>
<li><code>wayang.core.optimizer.reoptimize (= false)</code>: whether to progressively optimize Wayang plans</li>
<li><code>wayang.basic.tempdir (= file:///tmp)</code>: where to store temporary files, in particular for inter-platform communication</li>
</ul>
</li>
<li>Java Streams<!-- -->
<ul>
<li><code>wayang.java.cpu.mhz (= 2700)</code>: clock frequency of processor the JVM runs on in MHz</li>
<li><code>wayang.java.hdfs.ms-per-mb (= 2.7)</code>: average throughput from HDFS to JVM in ms/MB</li>
</ul>
</li>
<li>Apache Spark<!-- -->
<ul>
<li><code>spark.master (= local)</code>: Spark master<!-- -->
<ul>
<li>various other Spark settings are supported, e.g., <code>spark.executor.memory</code>, <code>spark.serializer</code>, ...</li>
</ul>
</li>
<li><code>wayang.spark.cpu.mhz (= 2700)</code>: clock frequency of processor the Spark workers run on in MHz</li>
<li><code>wayang.spark.hdfs.ms-per-mb (= 2.7)</code>: average throughput from HDFS to the Spark workers in ms/MB</li>
<li><code>wayang.spark.network.ms-per-mb (= 8.6)</code>: average network throughput of the Spark workers in ms/MB</li>
<li><code>wayang.spark.init.ms (= 4500)</code>: time it takes Spark to initialize in ms</li>
</ul>
</li>
<li>GraphChi<!-- -->
<ul>
<li><code>wayang.graphchi.cpu.mhz (= 2700)</code>: clock frequency of processor GraphChi runs on in MHz</li>
<li><code>wayang.graphchi.cpu.cores (= 2)</code>: number of cores GraphChi runs on</li>
<li><code>wayang.graphchi.hdfs.ms-per-mb (= 2.7)</code>: average throughput from HDFS to GraphChi in ms/MB</li>
</ul>
</li>
<li>SQLite<!-- -->
<ul>
<li><code>wayang.sqlite3.jdbc.url</code>: JDBC URL to use SQLite</li>
<li><code>wayang.sqlite3.jdbc.user</code>: optional user name</li>
<li><code>wayang.sqlite3.jdbc.password</code>: optional password</li>
<li><code>wayang.sqlite3.cpu.mhz (= 2700)</code>: clock frequency of processor SQLite runs on in MHz</li>
<li><code>wayang.sqlite3.cpu.cores (= 2)</code>: number of cores SQLite runs on</li>
</ul>
</li>
<li>PostgreSQL<!-- -->
<ul>
<li><code>wayang.postgres.jdbc.url</code>: JDBC URL to use PostgreSQL</li>
<li><code>wayang.postgres.jdbc.user</code>: optional user name</li>
<li><code>wayang.postgres.jdbc.password</code>: optional password</li>
<li><code>wayang.postgres.cpu.mhz (= 2700)</code>: clock frequency of processor PostgreSQL runs on in MHz</li>
<li><code>wayang.postgres.cpu.cores (= 2)</code>: number of cores PostgreSQL runs on</li>
</ul>
</li>
</ul>
<p>To effectively define your applications with Apache Wayang, utilize its Scala or Java API, conveniently found within the <code>wayang-api</code> module. For clear illustrations, refer to the provided examples below.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="cost-functions">Cost Functions<a href="#cost-functions" class="hash-link" aria-label="Direct link to Cost Functions" title="Direct link to Cost Functions"></a></h2>
<p>Wayang provides a utility to learn cost functions from historical execution data. Specifically, Wayang can learn configurations for load profile estimators (that estimate CPU load, disk load etc.) for both operators and UDFs, as long as the configuration provides a template for those estimators.</p>
<p>As an example, the <code>JavaMapOperator</code> draws its load profile estimator configuration via the configuration key <code>wayang.java.map.load</code>.
Now, it is possible to specify a load profile estimator template in the configuration under the key <code>&lt;original key&gt;.template</code>, e.g.:</p>
<div class="language-xml codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-xml codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token plain">wayang.java.map.load.template = {\</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> &quot;in&quot;:1, &quot;out&quot;:1,\</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> &quot;cpu&quot;:&quot;?*in0&quot;\</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p>This template encapsulates a load profile estimator that requires at minimum one input cardinality and one output cardinality. Furthermore, it simulates CPU load by assuming a direct relationship with the input cardinality. However, more complex functions are possible.</p>
<p>In particular, you can use</p>
<ul>
<li>the variables <code>in0</code>, <code>in1</code>, ... and <code>out0</code>, <code>out1</code>, ... to incorporate the input and output cardinalities, respectively;</li>
<li>operator properties, such as <code>numIterations</code> for the <code>PageRankOperator</code> implementations;</li>
<li>the operators <code>+</code>, <code>-</code>, <code>*</code>, <code>/</code>, <code>%</code>, <code>^</code>, and parantheses;</li>
<li>the functions <code>min(x0, x1, ...))</code>, <code>max(x0, x1, ...)</code>, <code>abs(x)</code>, <code>log(x, base)</code>, <code>ln(x)</code>, <code>ld(x)</code>;</li>
<li>and the constants <code>e</code> and <code>pi</code>.</li>
</ul>
<p>While Apache Wayang provides templates for all execution operators, you will need to explicitly define your user-defined functions (UDFs) by specifying their cost functions, which are based on configuration parameters. This involves creating an initial specification and template for each UDF.
As soon as execution data has been collected, you can initiate:</p>
<div class="language-shell codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-shell codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token function" style="color:#d73a49">java</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">..</span><span class="token plain">. org.apache.wayang.profiler.ga.GeneticOptimizerApp </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain">configuration URL </span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain">execution log</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">]</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<p>This tool will attempt to determine suitable values for the question marks (<code>?</code>) within the load profile estimator templates, aligning them with the collected execution data and pre-defined configuration entries for the load profile estimators. These optimized values can then be directly incorporated into your configuration.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="examples">Examples<a href="#examples" class="hash-link" aria-label="Direct link to Examples" title="Direct link to Examples"></a></h2>
<p>For some executable examples, have a look at <a href="https://github.com/sekruse/rheem-examples" target="_blank" rel="noopener noreferrer">this repository</a>.</p>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="wordcount">WordCount<a href="#wordcount" class="hash-link" aria-label="Direct link to WordCount" title="Direct link to WordCount"></a></h3>
<h4 class="anchor anchorWithStickyNavbar_LWe7" id="java-api">Java API<a href="#java-api" class="hash-link" aria-label="Direct link to Java API" title="Direct link to Java API"></a></h4>
<div class="language-java codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-java codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">api</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">JavaPlanBuilder</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">basic</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">data</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Tuple2</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">core</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">api</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Configuration</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">core</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">api</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">WayangContext</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">core</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">optimizer</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">cardinality</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">DefaultCardinalityEstimator</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">java</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Java</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">org</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">apache</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">wayang</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">spark</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Spark</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">java</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">util</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Collection</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token import namespace" style="opacity:0.7">java</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import namespace" style="opacity:0.7">util</span><span class="token import namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token import class-name">Arrays</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">public</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">class</span><span class="token plain"> </span><span class="token class-name">WordcountJava</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">public</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">static</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">void</span><span class="token plain"> </span><span class="token function" style="color:#d73a49">main</span><span class="token punctuation" style="color:#393A34">(</span><span class="token class-name">String</span><span class="token punctuation" style="color:#393A34">[</span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"> args</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Settings</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">String</span><span class="token plain"> inputUrl </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;file:/tmp.txt&quot;</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Get a plan builder.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">WayangContext</span><span class="token plain"> wayangContext </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">WayangContext</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">Configuration</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token class-name">Java</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">basicPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token class-name">Spark</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">basicPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">JavaPlanBuilder</span><span class="token plain"> planBuilder </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">JavaPlanBuilder</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">wayangContext</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withJobName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token class-name">String</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">format</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;WordCount (%s)&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> inputUrl</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withUdfJarOf</span><span class="token punctuation" style="color:#393A34">(</span><span class="token class-name">WordcountJava</span><span class="token punctuation" style="color:#393A34">.</span><span class="token keyword" style="color:#00009f">class</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Start building the WayangPlan.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">Collection</span><span class="token generics punctuation" style="color:#393A34">&lt;</span><span class="token generics class-name">Tuple2</span><span class="token generics punctuation" style="color:#393A34">&lt;</span><span class="token generics class-name">String</span><span class="token generics punctuation" style="color:#393A34">,</span><span class="token generics"> </span><span class="token generics class-name">Integer</span><span class="token generics punctuation" style="color:#393A34">&gt;</span><span class="token generics punctuation" style="color:#393A34">&gt;</span><span class="token plain"> wordcounts </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> planBuilder</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Read the text file.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">readTextFile</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">inputUrl</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Load file&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Split each line by non-word characters.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">flatMap</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">line </span><span class="token operator" style="color:#393A34">-&gt;</span><span class="token plain"> </span><span class="token class-name">Arrays</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">asList</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">line</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">split</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;\\W+&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withSelectivity</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">10</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.9</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Split words&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Filter empty tokens.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">filter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">token </span><span class="token operator" style="color:#393A34">-&gt;</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">!</span><span class="token plain">token</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">isEmpty</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withSelectivity</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">0.99</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.99</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.99</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Filter empty words&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Attach counter to each word.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">map</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">word </span><span class="token operator" style="color:#393A34">-&gt;</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">Tuple2</span><span class="token generics punctuation" style="color:#393A34">&lt;</span><span class="token generics punctuation" style="color:#393A34">&gt;</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">word</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">toLowerCase</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;To lower case, add counter&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Sum up counters for every word.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">reduceByKey</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">Tuple2</span><span class="token operator" style="color:#393A34">::</span><span class="token function" style="color:#d73a49">getField0</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">t1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> t2</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">-&gt;</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">Tuple2</span><span class="token generics punctuation" style="color:#393A34">&lt;</span><span class="token generics punctuation" style="color:#393A34">&gt;</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">t1</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">getField0</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> t1</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">getField1</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> t2</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">getField1</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withCardinalityEstimator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> </span><span class="token class-name">DefaultCardinalityEstimator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">0.9</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token boolean" style="color:#36acaa">false</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> in </span><span class="token operator" style="color:#393A34">-&gt;</span><span class="token plain"> </span><span class="token class-name">Math</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">round</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">0.01</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">*</span><span class="token plain"> in</span><span class="token punctuation" style="color:#393A34">[</span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Add counters&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Execute the plan and collect the results.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">collect</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token class-name">System</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">out</span><span class="token punctuation" style="color:#393A34">.</span><span class="token function" style="color:#d73a49">println</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">wordcounts</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h4 class="anchor anchorWithStickyNavbar_LWe7" id="scala-api">Scala API<a href="#scala-api" class="hash-link" aria-label="Direct link to Scala API" title="Direct link to Scala API"></a></h4>
<div class="language-scala codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-scala codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">api</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">_</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">core</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">api</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">Configuration</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> WayangContext</span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">java</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">Java</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">spark</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">Spark</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">object</span><span class="token plain"> WordcountScala </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> main</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">args</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> Array</span><span class="token punctuation" style="color:#393A34">[</span><span class="token builtin">String</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Settings</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> inputUrl </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;file:/tmp.txt&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Get a plan builder.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> wayangContext </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> WayangContext</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> Configuration</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">Java</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">basicPlugin</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">Spark</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">basicPlugin</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> planBuilder </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> PlanBuilder</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">wayangContext</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withJobName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string-interpolation id function" style="color:#d73a49">s</span><span class="token string-interpolation string" style="color:#e3116c">&quot;WordCount (</span><span class="token string-interpolation interpolation punctuation" style="color:#393A34">$</span><span class="token string-interpolation interpolation expression">inputUrl</span><span class="token string-interpolation string" style="color:#e3116c">)&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withUdfJarsOf</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">getClass</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> wordcounts </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> planBuilder</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Read the text file.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">readTextFile</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">inputUrl</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Load file&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Split each line by non-word characters.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">flatMap</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">split</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;\\W+&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> selectivity </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">10</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Split words&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Filter empty tokens.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">filter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">nonEmpty</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> selectivity </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.99</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Filter empty words&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Attach counter to each word.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">map</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">word </span><span class="token keyword" style="color:#00009f">=&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">word</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">toLowerCase</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;To lower case, add counter&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Sum up counters for every word.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">reduceByKey</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">_1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">c1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> c2</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">=&gt;</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">c1</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">_1</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> c1</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">_2 </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> c2</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">_2</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Add counters&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withCardinalityEstimator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">in</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Long</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">=&gt;</span><span class="token plain"> math</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">round</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">in </span><span class="token operator" style="color:#393A34">*</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.01</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Execute the plan and collect the results.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">collect</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> println</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">wordcounts</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="k-means">k-means<a href="#k-means" class="hash-link" aria-label="Direct link to k-means" title="Direct link to k-means"></a></h3>
<p>Wayang is also capable of iterative processing, which is, e.g., very important for machine learning algorithms, such as k-means.</p>
<h4 class="anchor anchorWithStickyNavbar_LWe7" id="scala-api-1">Scala API<a href="#scala-api-1" class="hash-link" aria-label="Direct link to Scala API" title="Direct link to Scala API"></a></h4>
<div class="language-scala codeBlockContainer_Ckt0 theme-code-block" style="--prism-color:#393A34;--prism-background-color:#f6f8fa"><div class="codeBlockContent_biex"><pre tabindex="0" class="prism-code language-scala codeBlock_bY9V thin-scrollbar" style="color:#393A34;background-color:#f6f8fa"><code class="codeBlockLines_e6Vv"><span class="token-line" style="color:#393A34"><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">api</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">_</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">core</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">api</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain">Configuration</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> WayangContext</span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">core</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">function</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">FunctionDescriptor</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">ExtendedSerializableFunction</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">core</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">function</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">ExecutionContext</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">core</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">optimizer</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">costs</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">LoadProfileEstimators</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">java</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">Java</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">org</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">apache</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">wayang</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">spark</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">Spark</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">scala</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">util</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">Random</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">import</span><span class="token plain"> </span><span class="token namespace" style="opacity:0.7">scala</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token namespace" style="opacity:0.7">collection</span><span class="token namespace punctuation" style="opacity:0.7;color:#393A34">.</span><span class="token plain">JavaConversions</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">_</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token keyword" style="color:#00009f">object</span><span class="token plain"> kmeans </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> main</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">args</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> Array</span><span class="token punctuation" style="color:#393A34">[</span><span class="token builtin">String</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Settings</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> inputUrl </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;file:/kmeans.txt&quot;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> k </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">5</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> iterations </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">100</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> configuration </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> Configuration</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Get a plan builder.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> wayangContext </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> WayangContext</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> Configuration</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">Java</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">basicPlugin</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withPlugin</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">Spark</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">basicPlugin</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> planBuilder </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> PlanBuilder</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">wayangContext</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withJobName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string-interpolation id function" style="color:#d73a49">s</span><span class="token string-interpolation string" style="color:#e3116c">&quot;k-means (</span><span class="token string-interpolation interpolation punctuation" style="color:#393A34">$</span><span class="token string-interpolation interpolation expression">inputUrl</span><span class="token string-interpolation string" style="color:#e3116c">, k=</span><span class="token string-interpolation interpolation punctuation" style="color:#393A34">$</span><span class="token string-interpolation interpolation expression">k</span><span class="token string-interpolation string" style="color:#e3116c">, </span><span class="token string-interpolation interpolation punctuation" style="color:#393A34">$</span><span class="token string-interpolation interpolation expression">iterations</span><span class="token string-interpolation string" style="color:#e3116c"> iterations)&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withUdfJarsOf</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">getClass</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">case</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">class</span><span class="token plain"> Point</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">x</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> y</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">case</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">class</span><span class="token plain"> TaggedPoint</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">x</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> y</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> cluster</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Int</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">case</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">class</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">x</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> y</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> cluster</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Int</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> count</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> </span><span class="token builtin">Long</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> add_points</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">that</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">x </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> that</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">x</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">y </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> that</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">y</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">cluster</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">this</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">count </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> that</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">count</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> average </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">x </span><span class="token operator" style="color:#393A34">/</span><span class="token plain"> count</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> y </span><span class="token operator" style="color:#393A34">/</span><span class="token plain"> count</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> cluster</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Read and parse the input file(s).</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> points </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> planBuilder</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">readTextFile</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">inputUrl</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Read file&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">map </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"> line </span><span class="token keyword" style="color:#00009f">=&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> fields </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> line</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">split</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;,&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> Point</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">fields</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">toDouble</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> fields</span><span class="token punctuation" style="color:#393A34">(</span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">toDouble</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Create points&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Create initial centroids.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> random </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> Random</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> initialCentroids </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> planBuilder</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">loadCollection</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">for</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">i </span><span class="token keyword" style="color:#00009f">&lt;-</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token plain"> to </span><span class="token namespace" style="opacity:0.7">k</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">yield</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">random</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">nextGaussian</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> random</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">nextGaussian</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> i</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Load random centroids&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Declare UDF to select centroid for each data point.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">class</span><span class="token plain"> SelectNearestCentroid </span><span class="token keyword" style="color:#00009f">extends</span><span class="token plain"> ExtendedSerializableFunction</span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain">Point</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">/** Keeps the broadcasted centroids. */</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">var</span><span class="token plain"> centroids</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> Iterable</span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain">TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">]</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> _</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">override</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">open</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">executionCtx</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> ExecutionContext</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> centroids </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> executionCtx</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">getBroadcast</span><span class="token punctuation" style="color:#393A34">[</span><span class="token plain">TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">]</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;centroids&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">override</span><span class="token plain"> </span><span class="token keyword" style="color:#00009f">def</span><span class="token plain"> apply</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">point</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> Point</span><span class="token punctuation" style="color:#393A34">)</span><span class="token operator" style="color:#393A34">:</span><span class="token plain"> TaggedPointCounter </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">var</span><span class="token plain"> minDistance </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token builtin">Double</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">PositiveInfinity</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">var</span><span class="token plain"> nearestCentroidId </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">-</span><span class="token number" style="color:#36acaa">1</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">for</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">centroid </span><span class="token keyword" style="color:#00009f">&lt;-</span><span class="token plain"> centroids</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> distance </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> Math</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">pow</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">Math</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">pow</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">point</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">x </span><span class="token operator" style="color:#393A34">-</span><span class="token plain"> centroid</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">x</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token operator" style="color:#393A34">+</span><span class="token plain"> Math</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">pow</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">point</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">y </span><span class="token operator" style="color:#393A34">-</span><span class="token plain"> centroid</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">y</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">2</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">0.5</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">if</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">distance </span><span class="token operator" style="color:#393A34">&lt;</span><span class="token plain"> minDistance</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> minDistance </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> distance</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> nearestCentroidId </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> centroid</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">cluster</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> TaggedPointCounter</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">point</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">x</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> point</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">y</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> nearestCentroidId</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token number" style="color:#36acaa">1</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Do the k-means loop.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token keyword" style="color:#00009f">val</span><span class="token plain"> finalCentroids </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> initialCentroids</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">repeat</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">iterations</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">{</span><span class="token plain"> currentCentroids </span><span class="token keyword" style="color:#00009f">=&gt;</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> points</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">mapJava</span><span class="token punctuation" style="color:#393A34">(</span><span class="token keyword" style="color:#00009f">new</span><span class="token plain"> SelectNearestCentroid</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> udfLoad </span><span class="token operator" style="color:#393A34">=</span><span class="token plain"> LoadProfileEstimators</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">createFromSpecification</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;my.udf.costfunction.key&quot;</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> configuration</span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withBroadcast</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">currentCentroids</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> </span><span class="token string" style="color:#e3116c">&quot;centroids&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Find nearest centroid&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">reduceByKey</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">cluster</span><span class="token punctuation" style="color:#393A34">,</span><span class="token plain"> _</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">add_points</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Add up points&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withCardinalityEstimator</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">k</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">map</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">_</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">average</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Average points&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token punctuation" style="color:#393A34">)</span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">withName</span><span class="token punctuation" style="color:#393A34">(</span><span class="token string" style="color:#e3116c">&quot;Loop&quot;</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token comment" style="color:#999988;font-style:italic">// Collect the results.</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">.</span><span class="token plain">collect</span><span class="token punctuation" style="color:#393A34">(</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain" style="display:inline-block"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> println</span><span class="token punctuation" style="color:#393A34">(</span><span class="token plain">finalCentroids</span><span class="token punctuation" style="color:#393A34">)</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"> </span><span class="token punctuation" style="color:#393A34">}</span><span class="token plain"></span><br></span><span class="token-line" style="color:#393A34"><span class="token plain"></span><span class="token punctuation" style="color:#393A34">}</span><br></span></code></pre><div class="buttonGroup__atx"><button type="button" aria-label="Copy code to clipboard" title="Copy" class="clean-btn"><span class="copyButtonIcons_eSgA" aria-hidden="true"><svg viewBox="0 0 24 24" class="copyButtonIcon_y97N"><path fill="currentColor" d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"></path></svg><svg viewBox="0 0 24 24" class="copyButtonSuccessIcon_LjdS"><path fill="currentColor" d="M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z"></path></svg></span></button></div></div></div></div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Docs pages"><a class="pagination-nav__link pagination-nav__link--prev" href="/docs/guide/installation"><div class="pagination-nav__sublabel">Previous</div><div class="pagination-nav__label">How to build Wayang</div></a><a class="pagination-nav__link pagination-nav__link--next" href="/docs/guide/ml4all"><div class="pagination-nav__sublabel">Next</div><div class="pagination-nav__label">Scalable Machine Learning</div></a></nav></div></div><div class="col col--3"><div class="tableOfContents_bqdL thin-scrollbar theme-doc-toc-desktop"><ul class="table-of-contents table-of-contents__left-border"><li><a href="#requirements" class="table-of-contents__link toc-highlight">Requirements</a><ul><li><a href="#get-wayang" class="table-of-contents__link toc-highlight">Get Wayang</a></li><li><a href="#configure-wayang" class="table-of-contents__link toc-highlight">Configure Wayang</a></li></ul></li><li><a href="#cost-functions" class="table-of-contents__link toc-highlight">Cost Functions</a></li><li><a href="#examples" class="table-of-contents__link toc-highlight">Examples</a><ul><li><a href="#wordcount" class="table-of-contents__link toc-highlight">WordCount</a></li><li><a href="#k-means" class="table-of-contents__link toc-highlight">k-means</a></li></ul></li></ul></div></div></div></div></main></div></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://lists.apache.org/list.html?dev@wayang.apache.org" target="_blank" rel="noopener noreferrer" class="footer__link-item">Mailing list<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.youtube.com/@apachewayang" target="_blank" rel="noopener noreferrer" class="footer__link-item">YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/apachewayang" target="_blank" rel="noopener noreferrer" class="footer__link-item">LinkedIn<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.reddit.com/r/ApacheWayang" target="_blank" rel="noopener noreferrer" class="footer__link-item">Reddit<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://twitter.com/apachewayang" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/start/download">Install</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/introduction/features">Features</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/introduction/benchmark">Benchmark</a></li></ul></div><div class="col footer__col"><div class="footer__title">Repositories</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/apache/incubator-wayang" target="_blank" rel="noopener noreferrer" class="footer__link-item">Wayang<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://github.com/apache/incubator-wayang-website" target="_blank" rel="noopener noreferrer" class="footer__link-item">Website<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="margin-bottom--sm"><a href="https://incubator.apache.org/" rel="noopener noreferrer" class="footerLogoLink_BH7S"><img src="/img/apache-incubator.svg" alt="Apache Incubator logo" class="footer__logo themedComponent_mlkZ themedComponent--light_NVdE" width="200"><img src="/img/apache-incubator.svg" alt="Apache Incubator logo" class="footer__logo themedComponent_mlkZ themedComponent--dark_xIcU" width="200"></a></div><div class="footer__copyright"><div>
<p> Apache Wayang is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF. </p>
<p>
Copyright © 2024 The Apache Software Foundation, Licensed under the Apache License, Version 2.0. <br>
Apache, the names of Apache projects, and the feather logo are either registered trademarks or trademarks of the Apache Software Foundation in the United States and/or other countries.
</p>
</div></div></div></div></footer></div>
</body>
</html>