blob: b613cf959d7fd9f954421f615cf8af7f45f69654 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=UA-61232409-1"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'UA-61232409-1');
</script>
<!-- Yandex.Metrika counter -->
<script type="text/javascript" >
(function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)};
m[i].l=1*new Date();k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)})
(window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym");
ym(72949126, "init", {
clickmap:true,
trackLinks:true,
accurateTrackBounce:true,
webvisor:true
});
</script>
<noscript><div><img src="https://mc.yandex.ru/watch/72949126" style="position:absolute; left:-9999px;" alt="" /></div></noscript>
<!-- /Yandex.Metrika counter -->
<!-- LuckyOrange code -->
<script type='text/javascript'>
window.__lo_site_id = 284467;
(function() {
var wa = document.createElement('script'); wa.type = 'text/javascript'; wa.async = true;
wa.src = 'https://d10lpsik1i8c69.cloudfront.net/w.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(wa, s);
})();
</script>
<!-- /LuckyOrange code -->
<!-- Bugyard widget embed -->
<script type="text/javascript">!function(){if("function"!=typeof window.bugyard){var a=function(){a.c(arguments)};a.q=[],a.c=function(b){a.q.push(b)},window.bugyard=a;var b=document.createElement("script");b.setAttribute("data-bugyard","610961912c35ff001493163a"),b.setAttribute("async","async"),b.setAttribute("defer","defer"),b.setAttribute("src","https://widget.bugyard.io/bugyard.min.js"),document.getElementsByTagName("head")[0].appendChild(b)}}(); window.bugyard("hideButton"); </script>
<link rel="preload" href='/assets/js/code-tabs.js?1' as="script" crossorigin>
<link rel="preload" href='/assets/js/page-nav.js' as="script" crossorigin>
<link rel="preload" href='/assets/js/docs-menu.js?20201005' as="script" crossorigin>
<style>:root{--gg-red:#ec1c24;--gg-orange:#ec1c24;--gg-orange-dark:#bc440b;--gg-orange-filter:invert(47%) sepia(61%) saturate(1950%) hue-rotate(345deg) brightness(100%) contrast(95%);--gg-dark-gray:#333333;--orange-line-thickness:3px;--block-code-background:rgba(241, 241, 241, 20%);--inline-code-background:rgba(241, 241, 241, 90%);--padding-top:25px;--link-color:#ec1c24;--body-background:#fcfcfc}header{min-height:var(--header-height);background:#fff;box-shadow:0 4px 10px 0 #eee,0 0 4px 0 #d5d5d5;z-index:1}header>.container{display:grid;grid-template-columns:auto auto 1fr auto auto auto;grid-template-areas:'left-toggle home nav ver api search lang';grid-template-rows:40px;flex-direction:row;align-items:center;justify-content:flex-start;padding:12px 20px;max-width:1400px;margin:0 auto}header nav>ul{padding:0;margin:0;list-style:none;display:inherit}header .dropdown{display:none;position:fixed;top:calc(var(--header-height) - 12px);width:auto;background:#fff;box-shadow:0 4px 4px 0 rgba(0,0,0,.24),0 0 4px 0 rgba(0,0,0,.12);border-radius:4px;padding-top:10px;padding-bottom:12px;z-index:2}header .dropdown li{display:flex}header .dropdown a{color:grey!important;font-size:16px;padding-top:5px;padding-bottom:4px}header .menu{border:none;background:0 0;width:40px;height:40px;margin-right:12px;grid-area:left-toggle}header .menu img{width:18px;height:12px}header .search-close,header .top-nav-toggle{background:0 0;border:none;padding:0;width:36px;height:36px;display:inline-flex;align-items:center;justify-content:center;color:var(--gg-dark-gray);font-size:26px}header .search-toggle{grid-area:search}header .top-nav-toggle{grid-area:top-toggle}header .home{grid-area:home;margin-right:auto}header .home img{height:36px}header #api-docs{grid-area:api;margin:0;display:flex}header #api-docs .dropdown{padding:.5em 0}header #api-docs a{padding:9px 14px;color:var(--gg-dark-gray)!important;text-decoration:none;white-space:nowrap}header #api-docs .dropdown-item a{font-weight:400;display:block;width:100%;min-width:150px}header #lang-selector li{list-style:none;display:flex;padding:9px 14px}header #lang-selector li a{display:flex;color:#000;align-items:center}header #lang-selector li a span{font-size:10px;margin-left:5px}header #lang-selector li a img{width:25px}header #lang-selector li .dropdown{margin-left:-70px}header #lang-selector li .dropdown .dropdown-item{padding:0 1em;margin-bottom:8px}header #lang-selector li .dropdown .dropdown-item a span{font-size:14px}header .search{margin-left:auto;margin-right:20px;grid-area:search}header .search input[type=search]{color:var(--gg-dark-gray);background:rgba(255,255,255,.8);border:1px solid #ccc;padding:10px 15px;font-family:inherit;max-width:148px;height:37px;font-size:14px;-webkit-appearance:unset;appearance:unset}header #version-selector{list-style:none;grid-area:ver;line-height:28px;border-radius:0;margin-right:10px;border:none;color:var(--gg-dark-gray);padding:5px 16px 5px 10px;white-space:nowrap;font-size:14px;width:auto;text-align:right;box-sizing:border-box;text-align-last:right;-moz-appearance:none;-webkit-appearance:none;appearance:none;direction:rtl}header #version-selector option{direction:ltr}header>nav{grid-area:nav;font-size:18px;display:flex;flex-direction:row;margin:0 20px}header #lang-selector{grid-area:lang}header .search-close{margin-right:10px}@media (max-width:600px){header .search{margin-right:5px}header .search input[type=search]{max-width:110px}}header:not(.narrow-header) .search-close,header:not(.narrow-header) .top-nav-toggle{display:none}@media (max-width:670px){header>.container{grid-template-columns:auto 1fr auto;grid-template-areas:'left-toggle home search' 'ver api lang'}header #lang-selector li{justify-content:flex-end}}pre,pre.rouge{padding:8px 15px;background:var(--block-code-background)!important;border-radius:5px;border:1px solid #e5e5e5;overflow-x:auto;min-height:36px;line-height:18px;color:#545454}code{color:#545454}pre.rouge code{background:0 0!important}:not(pre)>code{background:var(--inline-code-background);padding:.1em .5em;background-clip:padding-box;border-radius:3px;color:#545454;font-size:90%}.listingblock .content{position:relative}.highlight{color:#586e75}.highlight .c1{color:#657b83}.highlight .nt{color:#b58900}.highlight .o{color:#93a1a1}.highlight .k{color:#6c71c4}.highlight .kt{color:#cb4b16}.highlight .s,.highlight .s1{color:#859900}.highlight .nc{color:#b58900}.highlight .na{color:#268bd2}body{font-family:'Open Sans',sans-serif}h1,h2{color:#000;font-weight:400;font-family:'Open Sans'}h1{font-size:36px;line-height:40px}a{text-decoration:none;color:var(--link-color)}section{color:#545454}.admonitionblock .icon .title{display:none}body{--header-height:64px;--promotion-bar-height:35px;--footer-height:104px;--footer-gap:60px;padding:0;margin:0;display:flex;flex-direction:column;min-height:100vh;background-color:var(--body-background);font-family:'Open Sans',sans-serif}body>section{flex:1}header{position:-webkit-sticky;position:sticky;top:0;z-index:2}*{box-sizing:border-box}@media (max-width:670px){body{--header-height:97px}}.left-nav{padding:10px 20px;width:289px;overflow-y:auto;top:calc(var(--header-height) + var(--promotion-bar-height));height:calc(100vh - var(--header-height) - var(--promotion-bar-height));font-family:'Open Sans';padding-top:var(--padding-top);background-color:var(--body-background)}.left-nav li{list-style:none}.left-nav a,.left-nav button{text-decoration:none;color:#757575;font-size:16px;display:inline-flex;width:100%;margin:2px 0;padding:.25em .375em;background:0 0;border:none;font:inherit;text-align:left}.left-nav a.active{color:var(--link-color)}.left-nav .nav-group{margin-left:6px;font-size:14px}.left-nav nav{border-left:2px solid #ddd;margin-bottom:5px}.left-nav nav.collapsed{display:none}.left-nav nav>li>a,.left-nav nav>li>button{padding-left:20px;text-align:left}.left-nav nav>li>a.active{border-left:var(--orange-line-thickness) solid var(--active-color);padding-left:calc(20px - var(--orange-line-thickness))}.left-nav nav.sub_pages{border:none}.left-nav nav.sub_pages a{padding-left:32px}.left-nav .state-indicator{margin-left:auto;margin-top:5px;width:6.2px;height:10px;flex:0 0 auto;filter:invert(49%) sepia(4%) saturate(5%) hue-rotate(23deg) brightness(92%) contrast(90%)}.left-nav button.expanded .state-indicator{transform:rotate(90deg)}.right-nav{width:289px;padding:12px 26px;overflow-y:auto;height:calc(100vh - var(--header-height));top:0;position:-webkit-sticky;position:sticky;display:flex;flex-direction:column;font-family:'Open sans';padding-top:var(--padding-top);background-color:#fff}.right-nav ul{list-style:none;padding:0;margin:0}.right-nav li{padding:0}.right-nav a{--border-width:0px;font-size:14px;color:#757575;padding-left:calc(15px * var(--nesting-level) + 8px - var(--border-width));margin:.3em 0;display:inline-block}.right-nav .sectlevel1{border-left:2px solid #ddd}.right-nav .sectlevel1{--nesting-level:0}.right-nav .sectlevel2{--nesting-level:1}.right-nav .sectlevel3{--nesting-level:2}@media (max-width:1200px){.right-nav{width:230px}}.right-nav footer{font-size:12px;padding:calc(var(--footer-gap) * .3) 0 5px;text-align:left;margin:auto 0 0}section.page-docs{display:grid;grid-template-columns:auto 1fr auto;grid-template-rows:100%;grid-template-areas:'left-nav content right-nav';line-height:20px;max-width:1440px;margin:auto;width:100%}section.page-docs>article{border-left:1px solid #eee;background-color:#fff;padding:0 50px 30px;grid-area:content;overflow:hidden;font-family:sans-serif;font-size:16px;color:#545454;line-height:1.6em}section.page-docs>article h1,section.page-docs>article h2{font-family:'Open Sans'}@media (max-width:800px){section.page-docs>article{padding-left:15px;padding-right:15px}}section.page-docs .edit-link{position:relative;top:10px;right:10px;float:right;padding-top:calc(var(--header-height) + var(--padding-top));margin-top:calc((-1 * var(--header-height)))}section.page-docs h1,section.page-docs h2{margin-bottom:0}section.page-docs h2[id]{margin-top:var(--margin-top);margin-bottom:calc(var(--margin-top) * .5);z-index:-1}section.page-docs .title{font-style:italic}section.page-docs h2[id]{--margin-top:1.2em}.left-nav{bottom:0;position:-webkit-sticky;position:sticky}.left-nav{grid-area:left-nav}.right-nav{grid-area:right-nav}.left-nav__overlay{display:none;background:rgba(0,0,0,.5);z-index:1;position:fixed;top:var(--header-height);bottom:0;left:0;right:0}@media (max-width:990px){body:not(.hide-left-nav) .left-nav__overlay{display:block}nav.left-nav{background:#fafafa;grid-area:left-nav;box-shadow:0 4px 4px 0 rgba(0,0,0,.24),0 0 4px 0 rgba(0,0,0,.12);min-height:calc(100vh - var(--header-height));max-height:calc(100vh - var(--header-height));position:fixed;bottom:0;top:var(--header-height);z-index:2}section.page-docs>article{grid-column-start:left-nav;grid-column-end:content;grid-row:content}}@media (max-width:800px){nav.right-nav{display:none}}:target:before{content:"";display:block;margin-top:calc(var(--header-height) * -1);height:var(--header-height);width:1px}@media (min-width:600px) and (max-width:900px){:target:before{content:"";display:block;width:1px;margin-top:-150px;height:150px}}
#header #promotion-bar { background-color: #333333; padding: 8px; }
#header #promotion-bar p { font-size: 14px; line-height: 1.4em; font-weight: 600; padding: 0; margin: 0; color: #f0f0f0; text-align: center;}
#header #promotion-bar p a { color: #FCB903; } </style>
<meta name="ignite-version" content="2.11.0" />
<title>Preprocessing | Ignite Documentation</title>
<link rel="canonical" href="/docs/latest/machine-learning/preprocessing" />
<link rel="shortcut icon" href="/favicon.ico">
<meta name='viewport' content='width=device-width, height=device-height, initial-scale=1.0, minimum-scale=1.0'>
<link rel="preload" as="style" href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap" />
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap" media="print" onload="this.media='all'">
<noscript>
<link href="https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap" rel="stylesheet">
</noscript>
<script>
// AnchorJS - v4.2.0 - 2019-01-01
// https://github.com/bryanbraun/anchorjs
// Copyright (c) 2019 Bryan Braun; Licensed MIT
!function(A,e){"use strict";"function"==typeof define&&define.amd?define([],e):"object"==typeof module&&module.exports?module.exports=e():(A.AnchorJS=e(),A.anchors=new A.AnchorJS)}(this,function(){"use strict";return function(A){function f(A){A.icon=A.hasOwnProperty("icon")?A.icon:"",A.visible=A.hasOwnProperty("visible")?A.visible:"hover",A.placement=A.hasOwnProperty("placement")?A.placement:"right",A.ariaLabel=A.hasOwnProperty("ariaLabel")?A.ariaLabel:"Anchor",A.class=A.hasOwnProperty("class")?A.class:"",A.base=A.hasOwnProperty("base")?A.base:"",A.truncate=A.hasOwnProperty("truncate")?Math.floor(A.truncate):64,A.titleText=A.hasOwnProperty("titleText")?A.titleText:""}function p(A){var e;if("string"==typeof A||A instanceof String)e=[].slice.call(document.querySelectorAll(A));else{if(!(Array.isArray(A)||A instanceof NodeList))throw new Error("The selector provided to AnchorJS was invalid.");e=[].slice.call(A)}return e}this.options=A||{},this.elements=[],f(this.options),this.isTouchDevice=function(){return!!("ontouchstart"in window||window.DocumentTouch&&document instanceof DocumentTouch)},this.add=function(A){var e,t,i,n,o,s,a,r,c,h,l,u,d=[];if(f(this.options),"touch"===(l=this.options.visible)&&(l=this.isTouchDevice()?"always":"hover"),A||(A="h2, h3, h4, h5, h6"),0===(e=p(A)).length)return this;for(function(){if(null===document.head.querySelector("style.anchorjs")){var A,e=document.createElement("style");e.className="anchorjs",e.appendChild(document.createTextNode("")),void 0===(A=document.head.querySelector('[rel="stylesheet"], style'))?document.head.appendChild(e):document.head.insertBefore(e,A),e.sheet.insertRule(" .anchorjs-link { opacity: 0; text-decoration: none; -webkit-font-smoothing: antialiased; -moz-osx-font-smoothing: grayscale; }",e.sheet.cssRules.length),e.sheet.insertRule(" *:hover > .anchorjs-link, .anchorjs-link:focus { opacity: 1; }",e.sheet.cssRules.length),e.sheet.insertRule(" [data-anchorjs-icon]::after { content: attr(data-anchorjs-icon); }",e.sheet.cssRules.length),e.sheet.insertRule(' @font-face { font-family: "anchorjs-icons"; src: url(data:n/a;base64,AAEAAAALAIAAAwAwT1MvMg8yG2cAAAE4AAAAYGNtYXDp3gC3AAABpAAAAExnYXNwAAAAEAAAA9wAAAAIZ2x5ZlQCcfwAAAH4AAABCGhlYWQHFvHyAAAAvAAAADZoaGVhBnACFwAAAPQAAAAkaG10eASAADEAAAGYAAAADGxvY2EACACEAAAB8AAAAAhtYXhwAAYAVwAAARgAAAAgbmFtZQGOH9cAAAMAAAAAunBvc3QAAwAAAAADvAAAACAAAQAAAAEAAHzE2p9fDzz1AAkEAAAAAADRecUWAAAAANQA6R8AAAAAAoACwAAAAAgAAgAAAAAAAAABAAADwP/AAAACgAAA/9MCrQABAAAAAAAAAAAAAAAAAAAAAwABAAAAAwBVAAIAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAMCQAGQAAUAAAKZAswAAACPApkCzAAAAesAMwEJAAAAAAAAAAAAAAAAAAAAARAAAAAAAAAAAAAAAAAAAAAAQAAg//0DwP/AAEADwABAAAAAAQAAAAAAAAAAAAAAIAAAAAAAAAIAAAACgAAxAAAAAwAAAAMAAAAcAAEAAwAAABwAAwABAAAAHAAEADAAAAAIAAgAAgAAACDpy//9//8AAAAg6cv//f///+EWNwADAAEAAAAAAAAAAAAAAAAACACEAAEAAAAAAAAAAAAAAAAxAAACAAQARAKAAsAAKwBUAAABIiYnJjQ3NzY2MzIWFxYUBwcGIicmNDc3NjQnJiYjIgYHBwYUFxYUBwYGIwciJicmNDc3NjIXFhQHBwYUFxYWMzI2Nzc2NCcmNDc2MhcWFAcHBgYjARQGDAUtLXoWOR8fORYtLTgKGwoKCjgaGg0gEhIgDXoaGgkJBQwHdR85Fi0tOAobCgoKOBoaDSASEiANehoaCQkKGwotLXoWOR8BMwUFLYEuehYXFxYugC44CQkKGwo4GkoaDQ0NDXoaShoKGwoFBe8XFi6ALjgJCQobCjgaShoNDQ0NehpKGgobCgoKLYEuehYXAAAADACWAAEAAAAAAAEACAAAAAEAAAAAAAIAAwAIAAEAAAAAAAMACAAAAAEAAAAAAAQACAAAAAEAAAAAAAUAAQALAAEAAAAAAAYACAAAAAMAAQQJAAEAEAAMAAMAAQQJAAIABgAcAAMAAQQJAAMAEAAMAAMAAQQJAAQAEAAMAAMAAQQJAAUAAgAiAAMAAQQJAAYAEAAMYW5jaG9yanM0MDBAAGEAbgBjAGgAbwByAGoAcwA0ADAAMABAAAAAAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAAH//wAP) format("truetype"); }',e.sheet.cssRules.length)}}(),t=document.querySelectorAll("[id]"),i=[].map.call(t,function(A){return A.id}),o=0;o<e.length;o++)if(this.hasAnchorJSLink(e[o]))d.push(o);else{if(e[o].hasAttribute("id"))n=e[o].getAttribute("id");else if(e[o].hasAttribute("data-anchor-id"))n=e[o].getAttribute("data-anchor-id");else{for(c=r=this.urlify(e[o].textContent),a=0;void 0!==s&&(c=r+"-"+a),a+=1,-1!==(s=i.indexOf(c)););s=void 0,i.push(c),e[o].setAttribute("id",c),n=c}n.replace(/-/g," "),(h=document.createElement("a")).className="anchorjs-link "+this.options.class,h.setAttribute("aria-label",this.options.ariaLabel),h.setAttribute("data-anchorjs-icon",this.options.icon),this.options.titleText&&(h.title=this.options.titleText),u=document.querySelector("base")?window.location.pathname+window.location.search:"",u=this.options.base||u,h.href=u+"#"+n,"always"===l&&(h.style.opacity="1"),""===this.options.icon&&(h.style.font="1em/1 anchorjs-icons","left"===this.options.placement&&(h.style.lineHeight="inherit")),"left"===this.options.placement?(h.style.position="absolute",h.style.marginLeft="-1em",h.style.paddingRight="0.5em",e[o].insertBefore(h,e[o].firstChild)):(h.style.paddingLeft="0.375em",e[o].appendChild(h))}for(o=0;o<d.length;o++)e.splice(d[o]-o,1);return this.elements=this.elements.concat(e),this},this.remove=function(A){for(var e,t,i=p(A),n=0;n<i.length;n++)(t=i[n].querySelector(".anchorjs-link"))&&(-1!==(e=this.elements.indexOf(i[n]))&&this.elements.splice(e,1),i[n].removeChild(t));return this},this.removeAll=function(){this.remove(this.elements)},this.urlify=function(A){return this.options.truncate||f(this.options),A.trim().replace(/\'/gi,"").replace(/[& +$,:;=?@"#{}|^~[`%!'<>\]\.\/\(\)\*\\\n\t\b\v]/g,"-").replace(/-{2,}/g,"-").substring(0,this.options.truncate).replace(/^-+|-+$/gm,"").toLowerCase()},this.hasAnchorJSLink=function(A){var e=A.firstChild&&-1<(" "+A.firstChild.className+" ").indexOf(" anchorjs-link "),t=A.lastChild&&-1<(" "+A.lastChild.className+" ").indexOf(" anchorjs-link ");return e||t||!1}}});
</script>
</head>
<body>
<header>
<!--#include virtual="/includes/promotion_banner.html" -->
<div class="container">
<button type='button' class='menu' title='Docs menu'>
<img src="/assets/images/menu-icon.svg" width="18" height="12" alt="menu icon" />
</button>
<div class='home'>
<a href="/" class='home' title='Apache Ignite home'>
<img src="/assets/images/apache_ignite_logo.svg" alt="Apache Ignite logo" width="103" height="36" >
</a>
</div>
<select id="version-selector">
<option value="2.11.0">2.11.0</option>
</select>
<nav id="api-docs"><ul>
<li><a href="#">APIs</a>
<nav class='dropdown'>
<ul>
<li class="dropdown-item"><a href="/releases/latest/javadoc/index.html">Java</a></li>
<li class="dropdown-item"><a href="/releases/latest/dotnetdoc/api/">C#/.NET</a></li>
<li class="dropdown-item"><a href="/releases/latest/cppdoc/index.html">C++</a></li>
<li class="dropdown-item"><a href="/releases/latest/scaladoc/scalar/index.html#org.apache.ignite.scalar.scalar$">Scala</a></li>
</ul>
</nav>
</li>
<li><a href="#">Examples</a>
<nav class="dropdown">
<ul>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/examples" target="_blank" rel="noopener" title="Apache Ignite Java examples">Java</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/dotnet/examples" target="_blank" rel="noopener" title="Apache Ignite C#/.NET examples">C#/.NET</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/cpp/examples" target="_blank" rel="noopener" title="Apache Ignite C++ examples">C++</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/python/examples" target="_blank" rel="noopener" title="Apache Ignite Python examples">Python</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/nodejs/examples" target="_blank" rel="noopener" title="Apache Ignite NodeJS examples">NodeJS</a></li>
<li class="dropdown-item"><a href="https://github.com/apache/ignite/tree/master/modules/platforms/php/examples" target="_blank" rel="noopener" title="Apache Ignite PHP examples">PHP</a></li>
</ul>
</nav>
</li></ul>
</nav>
<form class='search'>
<button class="search-close" type='button'><img src='/assets/images/cancel.svg' alt="close" width="10" height="10" /></button>
<input type="search" placeholder="Search…" id="search-input">
</form>
<nav id="lang-selector"><ul>
<li><a href="#"><img src="/assets/images/icon_lang_en_75x75.jpg" alt="English language icon" width="25" height="25" /><span></span></a>
<nav class="dropdown">
<li class="dropdown-item"><a href="/docs/latest/" ><img src="/assets/images/icon_lang_en_75x75.jpg" alt="English language icon" width="25" height="25" /><span>English</span></a></li>
<li class="dropdown-item"><a href="https://www.ignite-service.cn/doc/java/" target="_blank" rel="noopener"><img src="/assets/images/icon_lang_cn_75x75.jpg" width="25" height="25" alt="Chinese language icon" /><span>Chinese</span></a></li>
</nav>
</li></ul>
</nav>
<button type='button' class='top-nav-toggle'></button>
</div>
</header>
<link rel="stylesheet" href="/assets/css/docs.css">
<section class='page-docs'>
<nav class='left-nav' data-swiftype-index='false'>
<li>
<a href="/docs/latest/index" class='' >Documentation Overview</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Quick Start Guides<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/quick-start/java"
class=''
>Java</a>
</li>
<li>
<a href="/docs/latest/quick-start/dotnet"
class=''
>.NET/C#</a>
</li>
<li>
<a href="/docs/latest/quick-start/cpp"
class=''
>C++</a>
</li>
<li>
<a href="/docs/latest/quick-start/python"
class=''
>Python</a>
</li>
<li>
<a href="/docs/latest/quick-start/nodejs"
class=''
>Node.JS</a>
</li>
<li>
<a href="/docs/latest/quick-start/sql"
class=''
>SQL</a>
</li>
<li>
<a href="/docs/latest/quick-start/php"
class=''
>PHP</a>
</li>
<li>
<a href="/docs/latest/quick-start/restapi"
class=''
>REST API</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Installation<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/installation/installing-using-zip"
class=''
>Installing Using ZIP Archive</a>
</li>
<li>
<a href="/docs/latest/installation/installing-using-docker"
class=''
>Installing Using Docker</a>
</li>
<li>
<a href="/docs/latest/installation/deb-rpm"
class=''
>Installing DEB or RPM package</a>
</li>
<li>
<button
type='button'
class='collapsed '>Kubernetes<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/installation/kubernetes/amazon-eks-deployment" class=''>Amazon EKS</a></li>
<li><a href="/docs/latest/installation/kubernetes/azure-deployment" class=''>Azure Kubernetes Service</a></li>
<li><a href="/docs/latest/installation/kubernetes/gke-deployment" class=''>Google Kubernetes Engine</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/installation/vmware-installation"
class=''
>VMWare</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Setting Up<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/understanding-configuration"
class=''
>Understanding Configuration</a>
</li>
<li>
<a href="/docs/latest/setup"
class=''
>Setting Up</a>
</li>
<li>
<a href="/docs/latest/logging"
class=''
>Configuring Logging</a>
</li>
<li>
<a href="/docs/latest/resources-injection"
class=''
>Resources Injection</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/starting-nodes" class='' >Starting and Stopping Nodes</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/clustering/clustering"
class=''
>Overview</a>
</li>
<li>
<a href="/docs/latest/clustering/tcp-ip-discovery"
class=''
>TCP/IP Discovery</a>
</li>
<li>
<a href="/docs/latest/clustering/zookeeper-discovery"
class=''
>ZooKeeper Discovery</a>
</li>
<li>
<a href="/docs/latest/clustering/discovery-in-the-cloud"
class=''
>Discovery in the Cloud</a>
</li>
<li>
<a href="/docs/latest/clustering/network-configuration"
class=''
>Network Configuration</a>
</li>
<li>
<a href="/docs/latest/clustering/connect-client-nodes"
class=''
>Connecting Client Nodes</a>
</li>
<li>
<a href="/docs/latest/clustering/baseline-topology"
class=''
>Baseline Topology</a>
</li>
<li>
<a href="/docs/latest/clustering/running-client-nodes-behind-nat"
class=''
>Running Client Nodes Behind NAT</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Thin Clients<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/thin-clients/getting-started-with-thin-clients"
class=''
>Thin Clients Overview</a>
</li>
<li>
<a href="/docs/latest/thin-clients/java-thin-client"
class=''
>Java Thin Client</a>
</li>
<li>
<a href="/docs/latest/thin-clients/dotnet-thin-client"
class=''
>.NET Thin Client</a>
</li>
<li>
<a href="/docs/latest/thin-clients/cpp-thin-client"
class=''
>C++ Thin Client</a>
</li>
<li>
<a href="/docs/latest/thin-clients/python-thin-client"
class=''
>Python Thin Client</a>
</li>
<li>
<a href="/docs/latest/thin-clients/php-thin-client"
class=''
>PHP Thin Client</a>
</li>
<li>
<a href="/docs/latest/thin-clients/nodejs-thin-client"
class=''
>Node.js Thin Client</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Client Protocol<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/binary-client-protocol/binary-client-protocol" class=''>Binary Client Protocol</a></li>
<li><a href="/docs/latest/binary-client-protocol/data-format" class=''>Data Format</a></li>
<li><a href="/docs/latest/binary-client-protocol/key-value-queries" class=''>Key-Value Queries</a></li>
<li><a href="/docs/latest/binary-client-protocol/sql-and-scan-queries" class=''>SQL and Scan Queries</a></li>
<li><a href="/docs/latest/binary-client-protocol/binary-type-metadata" class=''>Binary Types Metadata</a></li>
<li><a href="/docs/latest/binary-client-protocol/cache-configuration" class=''>Cache Configuration</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Data Modeling<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/data-modeling/data-modeling"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/latest/data-modeling/data-partitioning"
class=''
>Data Partitioning</a>
</li>
<li>
<a href="/docs/latest/data-modeling/affinity-collocation"
class=''
>Affinity Colocation</a>
</li>
<li>
<a href="/docs/latest/data-modeling/binary-marshaller"
class=''
>Binary Marshaller</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Memory<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/memory-architecture"
class=''
>Memory Architecture</a>
</li>
<li>
<a href="/docs/latest/memory-configuration/data-regions"
class=''
>Configuring Data Regions</a>
</li>
<li>
<a href="/docs/latest/memory-configuration/eviction-policies"
class=''
>Eviction Policies</a>
</li>
<li>
<a href="/docs/latest/memory-configuration/replacement-policies"
class=''
>Replacement Policies</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Persistence<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/persistence/native-persistence"
class=''
>Ignite Persistence</a>
</li>
<li>
<a href="/docs/latest/persistence/external-storage"
class=''
>External Storage</a>
</li>
<li>
<a href="/docs/latest/persistence/swap"
class=''
>Swapping</a>
</li>
<li>
<a href="/docs/latest/persistence/custom-cache-store"
class=''
>Implementing Custom Cache Store</a>
</li>
<li>
<a href="/docs/latest/persistence/snapshot-directory"
class=''
>Configuring Snapshot Directory</a>
</li>
<li>
<a href="/docs/latest/persistence/disk-compression"
class=''
>Disk Compression</a>
</li>
<li>
<a href="/docs/latest/persistence/persistence-tuning"
class=''
>Tuning Persistence</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/snapshots/snapshots" class='' >Cluster Snapshots</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Configuring Caches<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/configuring-caches/configuration-overview"
class=''
>Cache Configuration</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/configuring-backups"
class=''
>Configuring Partition Backups</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/partition-loss-policy"
class=''
>Partition Loss Policy</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/atomicity-modes"
class=''
>Atomicity Modes</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/expiry-policies"
class=''
>Expiry Policy</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/on-heap-caching"
class=''
>On-Heap Caching</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/cache-groups"
class=''
>Cache Groups</a>
</li>
<li>
<a href="/docs/latest/configuring-caches/near-cache"
class=''
>Near Caches</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/data-rebalancing" class='' >Data Rebalancing</a>
</li>
<li>
<a href="/docs/latest/data-streaming" class='' >Data Streaming</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Using Key-Value API<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/key-value-api/basic-cache-operations"
class=''
>Basic Cache Operations</a>
</li>
<li>
<a href="/docs/latest/key-value-api/binary-objects"
class=''
>Working with Binary Objects</a>
</li>
<li>
<a href="/docs/latest/key-value-api/using-scan-queries"
class=''
>Using Scan Queries</a>
</li>
<li>
<a href="/docs/latest/read-repair"
class=''
>Read Repair</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/key-value-api/transactions" class='' >Performing Transactions</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Working with SQL<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/SQL/sql-introduction"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/latest/SQL/schemas"
class=''
>Understanding Schemas</a>
</li>
<li>
<a href="/docs/latest/SQL/indexes"
class=''
>Defining Indexes</a>
</li>
<li>
<a href="/docs/latest/SQL/sql-api"
class=''
>Using SQL API</a>
</li>
<li>
<a href="/docs/latest/SQL/distributed-joins"
class=''
>Distributed Joins</a>
</li>
<li>
<a href="/docs/latest/SQL/sql-transactions"
class=''
>SQL Transactions</a>
</li>
<li>
<a href="/docs/latest/SQL/custom-sql-func"
class=''
>Custom SQL Functions</a>
</li>
<li>
<a href="/docs/latest/SQL/JDBC/jdbc-driver"
class=''
>JDBC Driver</a>
</li>
<li>
<a href="/docs/latest/SQL/JDBC/jdbc-client-driver"
class=''
>JDBC Client Driver</a>
</li>
<li>
<button
type='button'
class='collapsed '>ODBC Driver<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/SQL/ODBC/odbc-driver" class=''>ODBC Driver</a></li>
<li><a href="/docs/latest//SQL/ODBC/connection-string-dsn" class=''>Connection String and DSN</a></li>
<li><a href="/docs/latest/SQL/ODBC/querying-modifying-data" class=''>Querying and Modifying Data</a></li>
<li><a href="/docs/latest/SQL/ODBC/specification" class=''>Specification</a></li>
<li><a href="/docs/latest/SQL/ODBC/data-types" class=''>Data Types</a></li>
<li><a href="/docs/latest/SQL/ODBC/error-codes" class=''>Error Codes</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/transactions/mvcc"
class=''
>Multiversion Concurrency Control</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>SQL Reference<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/sql-reference/sql-conformance"
class=''
>SQL Conformance</a>
</li>
<li>
<a href="/docs/latest/sql-reference/ddl"
class=''
>Data Definition Language (DDL)</a>
</li>
<li>
<a href="/docs/latest/sql-reference/dml"
class=''
>Data Manipulation Language (DML)</a>
</li>
<li>
<a href="/docs/latest/sql-reference/transactions"
class=''
>Transactions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/operational-commands"
class=''
>Operational Commands</a>
</li>
<li>
<a href="/docs/latest/sql-reference/aggregate-functions"
class=''
>Aggregate functions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/numeric-functions"
class=''
>Numeric Functions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/string-functions"
class=''
>String Functions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/date-time-functions"
class=''
>Data and Time Functions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/system-functions"
class=''
>System Functions</a>
</li>
<li>
<a href="/docs/latest/sql-reference/data-types"
class=''
>Data Types</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Distributed Computing<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/distributed-computing/distributed-computing"
class=''
>Distributed Computing API</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/cluster-groups"
class=''
>Cluster Groups</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/executor-service"
class=''
>Executor Service</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/map-reduce"
class=''
>MapReduce API</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/load-balancing"
class=''
>Load Balancing</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/fault-tolerance"
class=''
>Fault Tolerance</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/job-scheduling"
class=''
>Job Scheduling</a>
</li>
<li>
<a href="/docs/latest/distributed-computing/collocated-computations"
class=''
>Colocating Computations with Data</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Code Deployment<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/code-deployment/deploying-user-code"
class=''
>Deploying User Code</a>
</li>
<li>
<a href="/docs/latest/code-deployment/peer-class-loading"
class=''
>Peer Class Loading</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle expanded '>Machine Learning<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group expanded'>
<li>
<a href="/docs/latest/machine-learning/machine-learning"
class=''
>Machine Learning</a>
</li>
<li>
<a href="/docs/latest/machine-learning/partition-based-dataset"
class=''
>Partition Based Dataset</a>
</li>
<li>
<a href="/docs/latest/machine-learning/updating-trained-models"
class=''
>Updating Trained Models</a>
</li>
<li>
<button
type='button'
class='collapsed '>Binary Classification<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/binary-classification/introduction" class=''>Introduction</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/linear-svm" class=''>Linear SVM (Support Vector Machine)</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/decision-trees" class=''>Decision Trees</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/multilayer-perceptron" class=''>Multilayer Perceptron</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/logistic-regression" class=''>Logistic Regression</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/knn-classification" class=''>k-NN Classification</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/ann" class=''>ANN (Approximate Nearest Neighbor)</a></li>
<li><a href="/docs/latest/machine-learning/binary-classification/naive-bayes" class=''>Naive Bayes</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Regression<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/regression/introduction" class=''>Introduction</a></li>
<li><a href="/docs/latest/machine-learning/regression/linear-regression" class=''>Linear Regression</a></li>
<li><a href="/docs/latest/machine-learning/regression/decision-trees-regression" class=''>Decision Trees Regression</a></li>
<li><a href="/docs/latest/machine-learning/regression/knn-regression" class=''>k-NN Regression</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Clustering<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/clustering/introduction" class=''>Introduction</a></li>
<li><a href="/docs/latest/machine-learning/clustering/k-means-clustering" class=''>K-Means Clustering</a></li>
<li><a href="/docs/latest/machine-learning/clustering/gaussian-mixture" class=''>Gaussian mixture (GMM)</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/machine-learning/preprocessing"
class='active'
>Preprocessing</a>
</li>
<li>
<button
type='button'
class='collapsed '>Model Selection<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/model-selection/introduction" class=''>Introduction</a></li>
<li><a href="/docs/latest/machine-learning/model-selection/evaluator" class=''>Evaluator</a></li>
<li><a href="/docs/latest/machine-learning/model-selection/split-the-dataset-on-test-and-train-datasets" class=''>Split the dataset on test and train datasets</a></li>
<li><a href="/docs/latest/machine-learning/model-selection/hyper-parameter-tuning" class=''>Hyper-parameter tuning</a></li>
<li><a href="/docs/latest/machine-learning/model-selection/pipeline-api" class=''>Pipeline API</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/machine-learning/multiclass-classification"
class=''
>Multiclass Classification</a>
</li>
<li>
<button
type='button'
class='collapsed '>Ensemble Methods<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/ensemble-methods/introduction" class=''></a></li>
<li><a href="/docs/latest/machine-learning/ensemble-methods/stacking" class=''>Stacking</a></li>
<li><a href="/docs/latest/machine-learning/ensemble-methods/baggin" class=''>Bagging</a></li>
<li><a href="/docs/latest/machine-learning/ensemble-methods/random-forest" class=''>Random Forest</a></li>
<li><a href="/docs/latest/machine-learning/ensemble-methods/gradient-boosting" class=''>Gradient Boosting</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/machine-learning/recommendation-systems"
class=''
>Recommendation Systems</a>
</li>
<li>
<button
type='button'
class='collapsed '>Importing Model<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/machine-learning/importing-model/introduction" class=''>Introduction</a></li>
<li><a href="/docs/latest/machine-learning/importing-model/model-import-from-gxboost" class=''>Import Model from XGBoost</a></li>
<li><a href="/docs/latest/machine-learning/importing-model/model-import-from-apache-spark" class=''>Import Model from Apache Spark</a></li>
</nav>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/key-value-api/continuous-queries" class='' >Using Continuous Queries</a>
</li>
<li>
<a href="/docs/latest/services/services" class='' >Using Ignite Services</a>
</li>
<li>
<a href="/docs/latest/messaging" class='' >Using Ignite Messaging</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Distributed Data Structures<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/data-structures/queue-and-set"
class=''
>Queue and Set</a>
</li>
<li>
<a href="/docs/latest/data-structures/atomic-types"
class=''
>Atomic Types</a>
</li>
<li>
<a href="/docs/latest/data-structures/countdownlatch"
class=''
>CountDownLatch</a>
</li>
<li>
<a href="/docs/latest/data-structures/atomic-sequence"
class=''
>Atomic Sequence</a>
</li>
<li>
<a href="/docs/latest/data-structures/semaphore"
class=''
>Semaphore</a>
</li>
<li>
<a href="/docs/latest/data-structures/id-generator"
class=''
>ID Generator</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/distributed-locks" class='' >Distributed Locks</a>
</li>
<li>
<a href="/docs/latest/restapi" class='' >REST API</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>.NET Specific<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/net-specific/net-configuration-options"
class=''
>Configuration Options</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-deployment-options"
class=''
>Deployment Options</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-standalone-nodes"
class=''
>Standalone Nodes</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-async"
class=''
>Asynchronous APIs</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-logging"
class=''
>Logging</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-linq"
class=''
>LINQ</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-java-services-execution"
class=''
>Java Services Execution</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-platform-cache"
class=''
>.NET Platform Cache</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-plugins"
class=''
>Plugins</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-serialization"
class=''
>Serialization</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-cross-platform-support"
class=''
>Cross-Platform Support</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-platform-interoperability"
class=''
>Platform Interoperability</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-remote-assembly-loading"
class=''
>Remote Assembly Loading</a>
</li>
<li>
<a href="/docs/latest/net-specific/net-troubleshooting"
class=''
>Troubleshooting</a>
</li>
<li>
<button
type='button'
class='collapsed '>Integrations<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/net-specific/asp-net-output-caching" class=''>ASP.NET Output Caching</a></li>
<li><a href="/docs/latest/net-specific/asp-net-session-state-caching" class=''>ASP.NET Session State Caching</a></li>
<li><a href="/docs/latest/net-specific/net-entity-framework-cache" class=''>Entity Framework 2nd Level Cache</a></li>
</nav>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>C++ Specific<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/cpp-specific/cpp-serialization"
class=''
>Serialization</a>
</li>
<li>
<a href="/docs/latest/cpp-specific/cpp-platform-interoperability"
class=''
>Platform Interoperability</a>
</li>
<li>
<a href="/docs/latest/cpp-specific/cpp-objects-lifetime"
class=''
>Objects Lifetime</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Monitoring<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/monitoring-metrics/intro"
class=''
>Introduction</a>
</li>
<li>
<a href="/docs/latest/monitoring-metrics/cluster-id"
class=''
>Cluster ID and Tag</a>
</li>
<li>
<a href="/docs/latest/monitoring-metrics/cluster-states"
class=''
>Cluster States</a>
</li>
<li>
<button
type='button'
class='collapsed '>Metrics<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/monitoring-metrics/configuring-metrics" class=''>Configuring Metrics</a></li>
<li><a href="/docs/latest/monitoring-metrics/metrics" class=''>JMX Metrics</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>New Metrics System<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/monitoring-metrics/new-metrics-system" class=''>Introduction</a></li>
<li><a href="/docs/latest/monitoring-metrics/new-metrics" class=''>Metrics</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/monitoring-metrics/system-views"
class=''
>System Views</a>
</li>
<li>
<a href="/docs/latest/monitoring-metrics/performance-statistics"
class=''
>Performance Statistics</a>
</li>
<li>
<a href="/docs/latest/monitoring-metrics/tracing"
class=''
>Tracing</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Working with Events<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/events/listening-to-events"
class=''
>Enabling and Listenting to Events</a>
</li>
<li>
<a href="/docs/latest/events/events"
class=''
>Events</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Tools<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/tools/control-script"
class=''
>Control Script</a>
</li>
<li>
<a href="/docs/latest/tools/visor-cmd"
class=''
>Visor CMD</a>
</li>
<li>
<a href="/docs/latest/tools/gg-control-center"
class=''
>GridGain Control Center</a>
</li>
<li>
<a href="/docs/latest/tools/sqlline"
class=''
>SQLLine</a>
</li>
<li>
<a href="/docs/latest/tools/tableau"
class=''
>Tableau</a>
</li>
<li>
<a href="/docs/latest/tools/informatica"
class=''
>Informatica</a>
</li>
<li>
<a href="/docs/latest/tools/pentaho"
class=''
>Pentaho</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Security<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/security/authentication"
class=''
>Authentication</a>
</li>
<li>
<a href="/docs/latest/security/ssl-tls"
class=''
>SSL/TLS</a>
</li>
<li>
<button
type='button'
class='collapsed '>Transparent Data Encryption<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/security/tde" class=''>Introduction</a></li>
<li><a href="/docs/latest/security/master-key-rotation" class=''>Master key rotation</a></li>
<li><a href="/docs/latest/security/cache-encryption-key-rotation" class=''>Cache encryption key rotation</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/security/sandbox"
class=''
>Sandbox</a>
</li>
</nav>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Extensions and Integrations<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<button
type='button'
class='collapsed '>Spring<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/extensions-and-integrations/spring/spring-boot" class=''>Spring Boot</a></li>
<li><a href="/docs/latest/extensions-and-integrations/spring/spring-data" class=''>Spring Data</a></li>
<li><a href="/docs/latest/extensions-and-integrations/spring/spring-caching" class=''>Spring Caching</a></li>
<li><a href="/docs/latest/extensions-and-integrations/spring/spring-tx" class=''>Spring Transactions</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Ignite for Spark<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/overview" class=''>Overview</a></li>
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/ignitecontext-and-rdd" class=''>IgniteContext and IgniteRDD</a></li>
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/ignite-dataframe" class=''>Ignite DataFrame</a></li>
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/installation" class=''>Installation</a></li>
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/spark-shell" class=''>Test Ignite with Spark-shell</a></li>
<li><a href="/docs/latest/extensions-and-integrations/ignite-for-spark/troubleshooting" class=''>Troubleshooting</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/extensions-and-integrations/hibernate-l2-cache"
class=''
>Hibernate L2 Cache</a>
</li>
<li>
<a href="/docs/latest/extensions-and-integrations/mybatis-l2-cache"
class=''
>MyBatis L2 Cache</a>
</li>
<li>
<button
type='button'
class='collapsed '>Streaming<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/extensions-and-integrations/streaming/kafka-streamer" class=''>Kafka Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/camel-streamer" class=''>Camel Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/flink-streamer" class=''>Flink Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/flume-sink" class=''>Flume Sink</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/jms-streamer" class=''>JMS Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/mqtt-streamer" class=''>MQTT Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/rocketmq-streamer" class=''>RocketMQ Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/storm-streamer" class=''>Storm Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/zeromq-streamer" class=''>ZeroMQ Streamer</a></li>
<li><a href="/docs/latest/extensions-and-integrations/streaming/twitter-streamer" class=''>Twitter Streamer</a></li>
</nav>
</li>
<li>
<button
type='button'
class='collapsed '>Cassandra Integration<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class="sub_pages collapsed">
<li><a href="/docs/latest/extensions-and-integrations/cassandra/overview" class=''>Overview</a></li>
<li><a href="/docs/latest/extensions-and-integrations/cassandra/configuration" class=''>Configuration</a></li>
<li><a href="/docs/latest/extensions-and-integrations/cassandra/usage-examples" class=''>Usage Examples</a></li>
<li><a href="/docs/latest/extensions-and-integrations/cassandra/ddl-generator" class=''>DDL Generator</a></li>
</nav>
</li>
<li>
<a href="/docs/latest/extensions-and-integrations/php-pdo"
class=''
>PHP PDO</a>
</li>
<li>
<a href="/docs/latest/extensions-and-integrations/performance-statistics"
class=''
>Performance Statistics</a>
</li>
</nav>
</li>
<li>
<a href="/docs/latest/plugins" class='' >Plugins</a>
</li>
<li>
<button type='button' class='group-toggle collapsed '>Performance and Troubleshooting<img class="state-indicator" src="/assets/images/left-nav-arrow.svg" width="6" height="10"></button>
<nav class='nav-group collapsed'>
<li>
<a href="/docs/latest/perf-and-troubleshooting/general-perf-tips"
class=''
>General Performance Tips</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/memory-tuning"
class=''
>Memory and JVM Tuning</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/persistence-tuning"
class=''
>Persistence Tuning</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/sql-tuning"
class=''
>SQL Tuning</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/thread-pools-tuning"
class=''
>Thread Pools Tuning</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/troubleshooting"
class=''
>Troubleshooting and Debugging</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/handling-exceptions"
class=''
>Handling Exceptions</a>
</li>
<li>
<a href="/docs/latest/perf-and-troubleshooting/yardstick-benchmarking"
class=''
>Benchmarking With Yardstick</a>
</li>
</nav>
</li>
</nav>
<div class="left-nav__overlay"></div>
<article data-swiftype-index='true'>
<a class='edit-link' href="https://github.com/apache/ignite/tree/IGNITE-7595/docs/_docs/machine-learning/preprocessing.adoc" target="_blank">Edit</a>
<h1>Preprocessing</h1>
<div id="preamble">
<div class="sectionbody">
<div class="paragraph">
<p>Preprocessing is required to transform raw data stored in an Ignite cache to the dataset of feature vectors suitable for further use in a machine learning pipeline.</p>
</div>
<div class="paragraph">
<p>This section covers algorithms for working with features, roughly divided into the following groups:</p>
</div>
<div class="ulist">
<ul>
<li>
<p>Extracting features from “raw” data</p>
</li>
<li>
<p>Scaling features</p>
</li>
<li>
<p>Converting features</p>
</li>
<li>
<p>Modifying features</p>
</li>
</ul>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
Usually it starts from label and feature extraction via vectorizer usage and can be complicated with other preprocessing stages.
</td>
</tr>
</table>
</div>
</div>
</div>
<div class="sect1">
<h2 id="normalization-preprocessor">Normalization preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The normal flow is to extract features and labels from Ignite data via a vectorizer​, transform the features and then normalize them.</p>
</div>
<div class="paragraph">
<p>In addition to the ability to build any custom preprocessor, Apache Ignite provides a built-in normalization preprocessor. This preprocessor makes normalization on each vector using p-norm.</p>
</div>
<div class="paragraph">
<p>For normalization, you need to create a NormalizationTrainer and fit a normalization preprocessor as follows:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="c1">// Train the preprocessor on the given data</span>
<span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">preprocessor</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">NormalizationTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;()</span>
<span class="o">.</span><span class="na">withP</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span> <span class="n">data</span><span class="o">,</span> <span class="n">vectorizer</span><span class="o">);</span>
<span class="c1">// Create linear regression trainer.</span>
<span class="nc">LinearRegressionLSQRTrainer</span> <span class="n">trainer</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">LinearRegressionLSQRTrainer</span><span class="o">();</span>
<span class="c1">// Train model.</span>
<span class="nc">LinearRegressionModel</span> <span class="n">mdl</span> <span class="o">=</span> <span class="n">trainer</span><span class="o">.</span><span class="na">fit</span><span class="o">(</span>
<span class="n">ignite</span><span class="o">,</span>
<span class="n">upstreamCache</span><span class="o">,</span>
<span class="n">preprocessor</span>
<span class="o">);</span></code></pre>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="examples">Examples</h2>
<div class="sectionbody">
<div class="paragraph">
<p>To see how the Normalization Preprocessor can be used in practice, try this <a href="https://github.com/apache/ignite/blob/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/NormalizationExample.java">example</a> that is available on GitHub and delivered with every Apache Ignite distribution.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="binarization-preprocessor">Binarization preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>Binarization is the process of thresholding numerical features to binary (0/1) features.
Feature values greater than the threshold are binarized to 1.0; values equal to or less than the threshold are binarized to 0.0.</p>
</div>
<div class="paragraph">
<p>It contains only one significant parameter, which is the threshold.</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="c1">// Create binarization trainer.</span>
<span class="nc">BinarizationTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">binarizationTrainer</span>
<span class="o">=</span> <span class="k">new</span> <span class="nc">BinarizationTrainer</span><span class="o">&lt;&gt;().</span><span class="na">withThreshold</span><span class="o">(</span><span class="mi">40</span><span class="o">);</span>
<span class="c1">// Build the preprocessor.</span>
<span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">preprocessor</span> <span class="o">=</span> <span class="n">binarizationTrainer</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span> <span class="n">data</span><span class="o">,</span> <span class="n">vectorizer</span><span class="o">);</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>To see how the Binarization Preprocessor can be used in practice, try this <a href="https://github.com/apache/ignite/blob/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/BinarizationExample.java">example</a>.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="imputer-preprocessor">Imputer preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The Imputer preprocessor completes missing values in a dataset, either using the mean or another statistic of the column in which the missing values are located. The missing values should be presented as Double.NaN. The input dataset column should be of Double. Currently, the Imputer preprocessor does not support categorical features and possibly creates incorrect values for columns containing categorical features.</p>
</div>
<div class="paragraph">
<p>During the training phase, the Imputer Trainer collects statistics about the preprocessing dataset and in the preprocessing phase it changes the data according to the collected statistics.</p>
</div>
<div class="paragraph">
<p>The Imputer Trainer contains only one parameter: <code>imputingStgy</code> that is presented as enum <strong>ImputingStrategy</strong> with two available values (NOTE: future releases may support more values):</p>
</div>
<div class="ulist">
<ul>
<li>
<p>MEAN: The default strategy. If this strategy is chosen, then replace missing values using the mean for the numeric features along the axis.</p>
</li>
<li>
<p>MOST_FREQUENT: If this strategy is chosen, then replace missing values using the most frequent value along the axis.</p>
</li>
</ul>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="c1">// Create imputer trainer.</span>
<span class="nc">ImputerTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;()</span> <span class="n">imputerTrainer</span> <span class="o">=</span>
<span class="k">new</span> <span class="nc">ImputerTrainer</span><span class="o">&lt;&gt;().</span><span class="na">withImputingStrategy</span><span class="o">(</span><span class="nc">ImputingStrategy</span><span class="o">.</span><span class="na">MOST_FREQUENT</span><span class="o">);</span>
<span class="c1">// Train imputer preprocessor.</span>
<span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">preprocessor</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">ImputerTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;()</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span> <span class="n">data</span><span class="o">,</span> <span class="n">vectorizer</span><span class="o">);</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>To see how the Imputer Preprocessor can be used in practice, try <a href="https://github.com/apache/ignite/blob/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/ImputingExample.java">this</a>.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="one-hot-encoder-preprocessor">One-Hot Encoder preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>One-hot encoding maps a categorical feature, represented as a label index (Double or String value), to a binary vector with at most a single one-value indicating the presence of a specific feature value from among the set of all feature values.</p>
</div>
<div class="paragraph">
<p>This preprocessor can transform multiple columns in which indices are handled during the training process. These indexes could be defined via a <code>withEncodedFeature(featureIndex)</code> call.</p>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
<div class="paragraph">
<p>Each one-hot encoded binary vector adds its cells to the end of the current feature vector.</p>
</div>
<div class="ulist">
<ul>
<li>
<p>This preprocessor always creates a separate column for NULL values.</p>
</li>
<li>
<p>The index value associated with NULL will be located in a binary vector according to the frequency of NULL values.</p>
</li>
</ul>
</div>
</td>
</tr>
</table>
</div>
<div class="paragraph">
<p><code>StringEncoderPreprocessor</code> and <code>OneHotEncoderPreprocessor</code> use the same EncoderTraining to collect data about categorial features during the training phase. To preprocess the dataset with the One-Hot Encoder preprocessor, set the <code>encoderType</code> with the value <code>EncoderType.ONE_HOT_ENCODER</code> as shown in the code snippet below:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Object</span><span class="o">[]&gt;</span> <span class="n">encoderPreprocessor</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">EncoderTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Object</span><span class="o">[]&gt;()</span>
<span class="o">.</span><span class="na">withEncoderType</span><span class="o">(</span><span class="nc">EncoderType</span><span class="o">.</span><span class="na">ONE_HOT_ENCODER</span><span class="o">)</span>
<span class="o">.</span><span class="na">withEncodedFeature</span><span class="o">(</span><span class="mi">0</span><span class="o">)</span>
<span class="o">.</span><span class="na">withEncodedFeature</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="o">.</span><span class="na">withEncodedFeature</span><span class="o">(</span><span class="mi">4</span><span class="o">)</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span>
<span class="n">dataCache</span><span class="o">,</span>
<span class="n">vectorizer</span>
<span class="o">);</span></code></pre>
</div>
</div>
</div>
</div>
<div class="sect1">
<h2 id="string-encoder-preprocessor">String Encoder preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The String Encoder encodes string values (categories) to double values in the range [0.0, amountOfCategories) where the most popular value will be presented as 0.0 and the least popular value presented with amountOfCategories-1 value.</p>
</div>
<div class="paragraph">
<p>This preprocessor can transform multiple columns in which indices are handled during the training process. These indexes could be defined via a <code>withEncodedFeature(featureIndex)</code> call.</p>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
It doesn’t add a new column but changes data in-place.
</td>
</tr>
</table>
</div>
<div class="paragraph">
<p><strong>Example</strong></p>
</div>
<div class="paragraph">
<p>Assume that we have the following Dataset with features id and category:</p>
</div>
<table class="tableblock frame-all grid-all stripes-even stretch">
<colgroup>
<col style="width: 50%;">
<col style="width: 50%;">
</colgroup>
<thead>
<tr>
<th class="tableblock halign-left valign-top">Id</th>
<th class="tableblock halign-left valign-top">Category</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">0</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">a</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">b</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">2</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">c</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">3</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">a</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">4</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">a</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">5</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">c</p></td>
</tr>
</tbody>
</table>
<table class="tableblock frame-all grid-all stripes-even stretch">
<colgroup>
<col style="width: 50%;">
<col style="width: 50%;">
</colgroup>
<thead>
<tr>
<th class="tableblock halign-left valign-top">Id</th>
<th class="tableblock halign-left valign-top">Category</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">0</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">0.0</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">2.0</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">2</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">1.0</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">3</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">0.0</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">4</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">0.0</p></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">5</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">1.0</p></td>
</tr>
</tbody>
</table>
<div class="paragraph">
<p>“a” gets index 0 because it is the most frequent, followed by “c” with index 1 and “b” with index 2.</p>
</div>
<div class="admonitionblock note">
<table>
<tr>
<td class="icon">
<div class="title">Note</div>
</td>
<td class="content">
<div class="paragraph">
<p>There is only one strategy regarding how StringEncoder will handle unseen labels when you have to fit a StringEncoder on one dataset and then use it to transform another: put unseen labels in a special additional bucket, at the index equal to <code>amountOfCategories</code>.</p>
</div>
</td>
</tr>
</table>
</div>
<div class="paragraph">
<p><code>StringEncoderPreprocessor</code> and <code>OneHotEncoderPreprocessor</code> use the same EncoderTraining to collect data about categorial features during the training phase. To preprocess the dataset with the <code>StringEncoderPreprocessor</code>, set the <code>encoderType</code> with the value <code>EncoderType.STRING_ENCODER</code> as shown below in the code snippet:</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Object</span><span class="o">[]&gt;</span> <span class="n">encoderPreprocessor</span>
<span class="o">=</span> <span class="k">new</span> <span class="nc">EncoderTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Object</span><span class="o">[]&gt;()</span>
<span class="o">.</span><span class="na">withEncoderType</span><span class="o">(</span><span class="nc">EncoderType</span><span class="o">.</span><span class="na">STRING_ENCODER</span><span class="o">)</span>
<span class="o">.</span><span class="na">withEncodedFeature</span><span class="o">(</span><span class="mi">1</span><span class="o">)</span>
<span class="o">.</span><span class="na">withEncodedFeature</span><span class="o">(</span><span class="mi">4</span><span class="o">)</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span>
<span class="n">dataCache</span><span class="o">,</span>
<span class="n">vectorizer</span>
<span class="o">);</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>To see how the String Encoder or OHE can be used in practice, try <a href="https://github.com/apache/ignite/tree/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/encoding">this</a> example.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="minmax-scaler-preprocessor">MinMax Scaler preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The MinMax Scaler transforms the given dataset, rescaling each feature to a specific range.</p>
</div>
<div class="paragraph">
<p>From a mathematical point of view, it is the following function which is applied to every element in the dataset:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="/docs/2.11.0/images/preprocessing.png" alt="preprocessing">
</div>
</div>
<div class="paragraph">
<p>for all i, where i is a number of column, max_i is the value of the maximum element in this column, min_i is the value of the minimal element in this column.</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="c1">// Create min-max scaler trainer.</span>
<span class="nc">MinMaxScalerTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">trainer</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">MinMaxScalerTrainer</span><span class="o">&lt;&gt;();</span>
<span class="c1">// Build the preprocessor.</span>
<span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">preprocessor</span> <span class="o">=</span> <span class="n">trainer</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span> <span class="n">data</span><span class="o">,</span> <span class="n">vectorizer</span><span class="o">);</span></code></pre>
</div>
</div>
<div class="paragraph">
<p><code>MinMaxScalerTrainer</code> computes summary statistics on a data set and produces a <code>MinMaxScalerPreprocessor</code>
The preprocessor can then transform each feature individually such that it is in the given range.</p>
</div>
<div class="paragraph">
<p>To see how the <code>MinMaxScalerPreprocessor</code> can be used in practice, try <a href="https://github.com/apache/ignite/blob/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MinMaxScalerExample.java">this</a> tutorial example.</p>
</div>
</div>
</div>
<div class="sect1">
<h2 id="maxabsscaler-preprocessor">MaxAbsScaler Preprocessor</h2>
<div class="sectionbody">
<div class="paragraph">
<p>The MaxAbsScaler transforms the given dataset, rescaling each feature to the range [-1, 1] by dividing through the maximum absolute value in each feature.</p>
</div>
<div class="paragraph">
<p>NOTE:It does not shift or center the data, and thus does not destroy any sparsity.</p>
</div>
<div class="listingblock">
<div class="content">
<pre class="rouge highlight"><code data-lang="java"><span class="c1">// Create max-abs trainer.</span>
<span class="nc">MaxAbsScalerTrainer</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">trainer</span> <span class="o">=</span> <span class="k">new</span> <span class="nc">MaxAbsScalerTrainer</span><span class="o">&lt;&gt;();</span>
<span class="c1">// Build the preprocessor.</span>
<span class="nc">Preprocessor</span><span class="o">&lt;</span><span class="nc">Integer</span><span class="o">,</span> <span class="nc">Vector</span><span class="o">&gt;</span> <span class="n">preprocessor</span> <span class="o">=</span> <span class="n">trainer</span>
<span class="o">.</span><span class="na">fit</span><span class="o">(</span><span class="n">ignite</span><span class="o">,</span> <span class="n">data</span><span class="o">,</span> <span class="n">vectorizer</span><span class="o">);</span></code></pre>
</div>
</div>
<div class="paragraph">
<p>From a mathematical point of view it is the following function which is applied to every element in a dataset:</p>
</div>
<div class="imageblock">
<div class="content">
<img src="/docs/2.11.0/images/preprocessing2.png" alt="preprocessing2">
</div>
</div>
<div class="paragraph">
<p>for all i, where i is a number of column, maxabs_i is the value of the absolute maximum element in this column.</p>
</div>
<div class="paragraph">
<p><code>MaxAbsScalerTrainer</code> computes summary statistics on a data set and produces a <code>MaxAbsScalerPreprocessor</code></p>
</div>
<div class="paragraph">
<p>To see how the <code>MaxAbsScalerPreprocessor</code> can be used in practice, try <a href="https://github.com/apache/ignite/blob/master/examples/src/main/java/org/apache/ignite/examples/ml/preprocessing/MaxAbsScalerExample.java">this</a> tutorial example.</p>
</div>
</div>
</div>
<div class="copyright">
© 2021 The Apache Software Foundation.<br/>
Apache, Apache Ignite, the Apache feather and the Apache Ignite logo are either registered trademarks or trademarks of The Apache Software Foundation.
</div>
</article>
<nav class="right-nav" data-swiftype-index='false'>
<div class="toc-wrapper">
<ul class="sectlevel1">
<li><a href="#normalization-preprocessor">Normalization preprocessor</a></li>
<li><a href="#examples">Examples</a></li>
<li><a href="#binarization-preprocessor">Binarization preprocessor</a></li>
<li><a href="#imputer-preprocessor">Imputer preprocessor</a></li>
<li><a href="#one-hot-encoder-preprocessor">One-Hot Encoder preprocessor</a></li>
<li><a href="#string-encoder-preprocessor">String Encoder preprocessor</a></li>
<li><a href="#minmax-scaler-preprocessor">MinMax Scaler preprocessor</a></li>
<li><a href="#maxabsscaler-preprocessor">MaxAbsScaler Preprocessor</a></li>
</ul>
</div>
<nav class="promo-nav">
<!--#include virtual="/includes/docs_rightnav_promotion.html" -->
<a href="#" data-trigger-bugyard-feedback="true" id="doc-feedback-btn">Docs Feedback</a>
</nav>
</nav>
</section>
<script type='module' src='/assets/js/code-copy-to-clipboard.js' async crossorigin></script>
<script>
// inits deep anchors -- needs to be done here because of https://www.bryanbraun.com/anchorjs/#dont-run-it-too-late
anchors.add('.page-docs h1, .page-docs h2, .page-docs h3:not(.discrete), .page-docs h4, .page-docs h5');
anchors.options = {
placement: 'right',
visible: 'always'
};
</script>
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
<script>
docsearch({
// Your apiKey and indexName will be given to you once
// we create your config
apiKey: '3eee686c0ebe39eff3baeb18c56fa5f8',
indexName: 'apache_ignite',
// Replace inputSelector with a CSS selector
// matching your search input
inputSelector: '#search-input',
// algoliaOptions: { 'facetFilters': ["version:$VERSION"] },
// Set debug to true to inspect the dropdown
debug: false,
});
</script>
<script type='module' src='/assets/js/index.js?1634150559' async crossorigin></script>
<script type='module' src='/assets/js/versioning.js?1634150559' async crossorigin></script>
<link rel="stylesheet" href="/assets/css/styles.css?1634150559" media="print" onload="this.media='all'">
<noscript><link media="all" rel="stylesheet" href="/assets/css/styles.css?1634150559"></noscript>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" media="print" onload="this.media='all'">
<noscript><link media="all" rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css"></noscript>
</body>
</html>