blob: 34f229d5d3fd4a370076863abf7213b713c42222 [file] [log] [blame]
<!DOCTYPE html><html><head><title>DASE Components Explained (E-Commerce Recommendation (Java))</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="DASE Components Explained (E-Commerce Recommendation (Java))"/><link rel="canonical" href="https://docs.prediction.io/templates/javaecommercerecommendation/dase/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3598c7d7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="header-nav-options-wrapper"><ul><li><a href="/">Install & Doc</a></li> <li><a href="/support">Support</a></li> </ul></div><div id="pill-wrapper"><a class="pill left" href="//templates.prediction.io/">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>DASE Components Explained (E-Commerce Recommendation (Java))</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="http://templates.prediction.io"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Community Support</span></a></li><li class="level-2"><a class="final" href="/support/#enterprise-support"><span>Enterprise Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="page-title"><h1>DASE Components Explained (E-Commerce Recommendation (Java))</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#the-engine-design">The Engine Design</a> </li> <li> <a href="#data">Data</a> </li> <li> <a href="#algorithm">Algorithm</a> </li> <li> <a href="#serving">Serving</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/templates/javaecommercerecommendation/dase.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="page-title"><h1>DASE Components Explained (E-Commerce Recommendation (Java))</h1></div></div><div class="content"><p>PredictionIO&#39;s DASE architecture brings the separation-of-concerns design principle to predictive engine development. DASE stands for the following components of an engine:</p> <ul> <li><strong>D</strong>ata - includes Data Source and Data Preparator</li> <li><strong>A</strong>lgorithm(s)</li> <li><strong>S</strong>erving</li> <li><strong>E</strong>valuator</li> </ul> <p><p>Let&#39;s look at the code and see how you can customize the engine you built from the E-Commerce Recommendation Engine Template.</p><div class="alert-message note"><p>Evaluator will not be covered in this tutorial.</p></div></p><h2 id='the-engine-design' class='header-anchors'>The Engine Design</h2><p>As you can see from the Quick Start, <em>MyECommerceRecommendation</em> takes a JSON prediction query, e.g. <code>{ &quot;userEntityId&quot;: &quot;u1&quot;, &quot;number&quot;: 4 }</code>, and return a JSON predicted result. The <code>Query</code> class defines the format of such <strong>query</strong>:</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">Query</span> <span class="kd">implements</span> <span class="n">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">String</span> <span class="n">userEntityId</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">int</span> <span class="n">number</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">Set</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">categories</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">Set</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">whitelist</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">Set</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">blacklist</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>The <code>PredictedResult</code> and <code>ItemScore</code> classes define the format of <strong>predicted result</strong>, such as</p><div class="highlight json"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6</pre></td><td class="code"><pre><span class="p">{</span><span class="s2">"itemScores"</span><span class="p">:[</span><span class="w">
</span><span class="p">{</span><span class="s2">"itemEntityId"</span><span class="p">:</span><span class="mi">22</span><span class="p">,</span><span class="s2">"score"</span><span class="p">:</span><span class="mf">4.07</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="s2">"itemEntityId"</span><span class="p">:</span><span class="mi">62</span><span class="p">,</span><span class="s2">"score"</span><span class="p">:</span><span class="mf">4.05</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="s2">"itemEntityId"</span><span class="p">:</span><span class="mi">75</span><span class="p">,</span><span class="s2">"score"</span><span class="p">:</span><span class="mf">4.04</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="s2">"itemEntityId"</span><span class="p">:</span><span class="mi">68</span><span class="p">,</span><span class="s2">"score"</span><span class="p">:</span><span class="mf">3.81</span><span class="p">}</span><span class="w">
</span><span class="p">]}</span><span class="w">
</span></pre></td></tr></tbody></table> </div> <p>with:</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">PredictedResult</span> <span class="kd">implements</span> <span class="n">Serializable</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">List</span><span class="o">&lt;</span><span class="n">ItemScore</span><span class="o">&gt;</span> <span class="n">itemScores</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span>
<span class="kd">public</span> <span class="kd">class</span> <span class="nc">ItemScore</span> <span class="kd">implements</span> <span class="n">Serializable</span><span class="o">,</span> <span class="n">Comparable</span><span class="o">&lt;</span><span class="n">ItemScore</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">String</span> <span class="n">itemEntityId</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">double</span> <span class="n">score</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>Finally, <code>RecommendationEngine</code> is the <em>Engine Factory</em> class that defines the components this engine will use: Data Source, Data Preparator, Algorithm(s) and Serving components.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">RecommendationEngine</span> <span class="kd">extends</span> <span class="n">EngineFactory</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">BaseEngine</span><span class="o">&lt;</span><span class="n">EmptyParams</span><span class="o">,</span> <span class="n">Query</span><span class="o">,</span> <span class="n">PredictedResult</span><span class="o">,</span> <span class="n">Object</span><span class="o">&gt;</span> <span class="n">apply</span><span class="o">()</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">Engine</span><span class="o">&lt;&gt;(</span>
<span class="n">DataSource</span><span class="o">.</span><span class="na">class</span><span class="o">,</span>
<span class="n">Preparator</span><span class="o">.</span><span class="na">class</span><span class="o">,</span>
<span class="n">Collections</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">Class</span><span class="o">&lt;?</span> <span class="kd">extends</span> <span class="n">BaseAlgorithm</span><span class="o">&lt;</span><span class="n">PreparedData</span><span class="o">,</span> <span class="o">?,</span> <span class="n">Query</span><span class="o">,</span> <span class="n">PredictedResult</span><span class="o">&gt;&gt;&gt;</span><span class="n">singletonMap</span><span class="o">(</span><span class="s">"algo"</span><span class="o">,</span> <span class="n">Algorithm</span><span class="o">.</span><span class="na">class</span><span class="o">),</span>
<span class="n">Serving</span><span class="o">.</span><span class="na">class</span>
<span class="o">);</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <h3 id='spark-mllib' class='header-anchors'>Spark MLlib</h3><p>The PredictionIO E-Commerce Recommendation Engine Template integrates Spark&#39;s MLlib ALS algorithm under the DASE architecture. We will take a closer look at the DASE code below.</p><p>The MLlib ALS algorithm takes training data of RDD type, i.e. <code>RDD[Rating]</code> and train a model, which is a <code>MatrixFactorizationModel</code> object.</p><p>You can visit <a href="https://spark.apache.org/docs/latest/mllib-collaborative-filtering.html">here</a> to learn more about MLlib&#39;s ALS collaborative filtering algorithm.</p><h2 id='data' class='header-anchors'>Data</h2><p>In the DASE architecture, data is prepared by 2 components sequentially: <em>DataSource</em> and <em>DataPreparator</em>. They take data from the data store and prepare them for Algorithm.</p><h3 id='data-source' class='header-anchors'>Data Source</h3><p>In <strong><em>DataSource</em></strong> class, the <code>readTraining</code> method reads and selects data from the <em>Event Store</em>. It returns <code>TrainingData</code>.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16</pre></td><td class="code"><pre> <span class="kd">public</span> <span class="n">TrainingData</span> <span class="nf">readTraining</span><span class="p">(</span><span class="n">SparkContext</span> <span class="n">sc</span><span class="o">)</span> <span class="o">{</span>
<span class="c1">// create a JavaPairRDD of (entityID, User)</span>
<span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span><span class="n">User</span><span class="o">&gt;</span> <span class="n">usersRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">aggregateProperties</span><span class="o">(...)</span>
<span class="c1">// create a JavaPairRDD of (entityID, Item)</span>
<span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">Item</span><span class="o">&gt;</span> <span class="n">itemsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">aggregateProperties</span><span class="o">(...)</span>
<span class="c1">// find all view events</span>
<span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">viewEventsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">find</span><span class="o">(...)</span>
<span class="c1">// find all buy events</span>
<span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">buyEventsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">find</span><span class="o">(...)</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">TrainingData</span><span class="o">(</span><span class="n">usersRDD</span><span class="o">,</span> <span class="n">itemsRDD</span><span class="o">,</span> <span class="n">viewEventsRDD</span><span class="o">,</span> <span class="n">buyEventsRDD</span><span class="o">);</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>PredictionIO automatically loads the parameters of <em>datasource</em> specified in MyECommerceRecommendation/<strong><em>engine.json</em></strong>, including <em>appName</em>.</p><p>In <strong><em>engine.json</em></strong>:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9</pre></td><td class="code"><pre><span class="o">{</span>
...
<span class="s2">"datasource"</span>: <span class="o">{</span>
<span class="s2">"params"</span> : <span class="o">{</span>
<span class="s2">"appName"</span>: <span class="s2">"MyApp1"</span>
<span class="o">}</span>
<span class="o">}</span>,
...
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>In <code>readTraining()</code>, <code>PJavaEventStore</code> is an object which provides function to access data that is collected by PredictionIO Event Server.</p><p>This E-Commerce Recommendation Engine Template requires &quot;user&quot; and &quot;item&quot; entities that are set by events.</p><p><code>PJavaEventStore.aggregateProperties(...)</code> aggregates properties of the <code>user</code> and <code>item</code> that are set, unset, or deleted by special events <strong>$set</strong>, <strong>$unset</strong> and <strong>$delete</strong>. Please refer to <a href="/datacollection/eventapi/#note-about-properties">Event API</a> for more details of using these events.</p><p>The following code aggregates the properties of <code>user</code> and then map each result to a <code>User</code> object.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25</pre></td><td class="code"><pre>
<span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span><span class="n">User</span><span class="o">&gt;</span> <span class="n">usersRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">aggregateProperties</span><span class="o">(</span>
<span class="n">dsp</span><span class="o">.</span><span class="na">getAppName</span><span class="o">(),</span>
<span class="s">"user"</span><span class="o">,</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">List</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="na">mapToPair</span><span class="o">(</span><span class="k">new</span> <span class="n">PairFunction</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">PropertyMap</span><span class="o">&gt;,</span> <span class="n">String</span><span class="o">,</span> <span class="n">User</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">User</span><span class="o">&gt;</span> <span class="n">call</span><span class="o">(</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">PropertyMap</span><span class="o">&gt;</span> <span class="n">entityIdProperty</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="n">Set</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">keys</span> <span class="o">=</span> <span class="n">JavaConversions</span><span class="err">$</span><span class="o">.</span><span class="na">MODULE</span><span class="err">$</span><span class="o">.</span><span class="na">setAsJavaSet</span><span class="o">(</span><span class="n">entityIdProperty</span><span class="o">.</span><span class="na">_2</span><span class="o">().</span><span class="na">keySet</span><span class="o">());</span>
<span class="n">Map</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">String</span><span class="o">&gt;</span> <span class="n">properties</span> <span class="o">=</span> <span class="k">new</span> <span class="n">HashMap</span><span class="o">&lt;&gt;();</span>
<span class="k">for</span> <span class="o">(</span><span class="n">String</span> <span class="n">key</span> <span class="o">:</span> <span class="n">keys</span><span class="o">)</span> <span class="o">{</span>
<span class="n">properties</span><span class="o">.</span><span class="na">put</span><span class="o">(</span><span class="n">key</span><span class="o">,</span> <span class="n">entityIdProperty</span><span class="o">.</span><span class="na">_2</span><span class="o">().</span><span class="na">get</span><span class="o">(</span><span class="n">key</span><span class="o">,</span> <span class="n">String</span><span class="o">.</span><span class="na">class</span><span class="o">));</span>
<span class="o">}</span>
<span class="n">User</span> <span class="n">user</span> <span class="o">=</span> <span class="k">new</span> <span class="n">User</span><span class="o">(</span><span class="n">entityIdProperty</span><span class="o">.</span><span class="na">_1</span><span class="o">(),</span> <span class="n">ImmutableMap</span><span class="o">.</span><span class="na">copyOf</span><span class="o">(</span><span class="n">properties</span><span class="o">));</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">Tuple2</span><span class="o">&lt;&gt;(</span><span class="n">user</span><span class="o">.</span><span class="na">getEntityId</span><span class="o">(),</span> <span class="n">user</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">});</span>
</pre></td></tr></tbody></table> </div> <p>In the template, <code>User</code> object is a placeholder for you to customize and expand.</p><p>Similarly, the following code aggregates <code>item</code> properties and then map each result to an <code>Item</code> object. By default, this template assumes each item has an optional property <code>categories</code>, which is a list of String.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18</pre></td><td class="code"><pre> <span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">Item</span><span class="o">&gt;</span> <span class="n">itemsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">aggregateProperties</span><span class="o">(</span>
<span class="n">dsp</span><span class="o">.</span><span class="na">getAppName</span><span class="o">(),</span>
<span class="s">"item"</span><span class="o">,</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">List</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="na">mapToPair</span><span class="o">(</span><span class="k">new</span> <span class="n">PairFunction</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">PropertyMap</span><span class="o">&gt;,</span> <span class="n">String</span><span class="o">,</span> <span class="n">Item</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">Item</span><span class="o">&gt;</span> <span class="n">call</span><span class="o">(</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">PropertyMap</span><span class="o">&gt;</span> <span class="n">entityIdProperty</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="n">List</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">categories</span> <span class="o">=</span> <span class="n">entityIdProperty</span><span class="o">.</span><span class="na">_2</span><span class="o">().</span><span class="na">getStringList</span><span class="o">(</span><span class="s">"categories"</span><span class="o">);</span>
<span class="n">Item</span> <span class="n">item</span> <span class="o">=</span> <span class="k">new</span> <span class="n">Item</span><span class="o">(</span><span class="n">entityIdProperty</span><span class="o">.</span><span class="na">_1</span><span class="o">(),</span> <span class="n">ImmutableSet</span><span class="o">.</span><span class="na">copyOf</span><span class="o">(</span><span class="n">categories</span><span class="o">));</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">Tuple2</span><span class="o">&lt;&gt;(</span><span class="n">item</span><span class="o">.</span><span class="na">getEntityId</span><span class="o">(),</span> <span class="n">item</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">});</span>
</pre></td></tr></tbody></table> </div> <p><code>PJavaEventStore.find(...)</code> specifies the events that you want to read. In this case, &quot;user view item&quot; and &quot;user buy item&quot; events are read</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19</pre></td><td class="code"><pre>
<span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">viewEventsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">find</span><span class="o">(</span>
<span class="n">dsp</span><span class="o">.</span><span class="na">getAppName</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.</span><span class="na">some</span><span class="o">(</span><span class="s">"user"</span><span class="o">),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.</span><span class="na">some</span><span class="o">(</span><span class="n">Collections</span><span class="o">.</span><span class="na">singletonList</span><span class="o">(</span><span class="s">"view"</span><span class="o">)),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">Option</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">Option</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="na">map</span><span class="o">(</span><span class="k">new</span> <span class="n">Function</span><span class="o">&lt;</span><span class="n">Event</span><span class="o">,</span> <span class="n">UserItemEvent</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">UserItemEvent</span> <span class="n">call</span><span class="o">(</span><span class="n">Event</span> <span class="n">event</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">UserItemEvent</span><span class="o">(</span><span class="n">event</span><span class="o">.</span><span class="na">entityId</span><span class="o">(),</span> <span class="n">event</span><span class="o">.</span><span class="na">targetEntityId</span><span class="o">().</span><span class="na">get</span><span class="o">(),</span> <span class="n">event</span><span class="o">.</span><span class="na">eventTime</span><span class="o">().</span><span class="na">getMillis</span><span class="o">(),</span> <span class="n">UserItemEventType</span><span class="o">.</span><span class="na">VIEW</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">});</span>
</pre></td></tr></tbody></table> </div> <p>Similarly, we read buy events from Event Server.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19</pre></td><td class="code"><pre>
<span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">viewEventsRDD</span> <span class="o">=</span> <span class="n">PJavaEventStore</span><span class="o">.</span><span class="na">find</span><span class="o">(</span>
<span class="n">dsp</span><span class="o">.</span><span class="na">getAppName</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">DateTime</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.</span><span class="na">some</span><span class="o">(</span><span class="s">"user"</span><span class="o">),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">String</span><span class="o">&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.</span><span class="na">some</span><span class="o">(</span><span class="n">Collections</span><span class="o">.</span><span class="na">singletonList</span><span class="o">(</span><span class="s">"buy"</span><span class="o">)),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">Option</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">OptionHelper</span><span class="o">.&lt;</span><span class="n">Option</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;&gt;</span><span class="n">none</span><span class="o">(),</span>
<span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="na">map</span><span class="o">(</span><span class="k">new</span> <span class="n">Function</span><span class="o">&lt;</span><span class="n">Event</span><span class="o">,</span> <span class="n">UserItemEvent</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">UserItemEvent</span> <span class="n">call</span><span class="o">(</span><span class="n">Event</span> <span class="n">event</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">UserItemEvent</span><span class="o">(</span><span class="n">event</span><span class="o">.</span><span class="na">entityId</span><span class="o">(),</span> <span class="n">event</span><span class="o">.</span><span class="na">targetEntityId</span><span class="o">().</span><span class="na">get</span><span class="o">(),</span> <span class="n">event</span><span class="o">.</span><span class="na">eventTime</span><span class="o">().</span><span class="na">getMillis</span><span class="o">(),</span> <span class="n">UserItemEventType</span><span class="o">.</span><span class="na">BUY</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">});</span>
</pre></td></tr></tbody></table> </div> <div class="alert-message info"><p>For flexibility, this template is designed to support user ID and item ID in String.</p></div><p><code>TrainingData</code> contains Java RDD of <code>User</code>, <code>Item</code>, <code>View Event</code>, and <code>Buy Event</code>. The class definition of <code>TrainingData</code> is:</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">TrainingData</span> <span class="kd">implements</span> <span class="n">Serializable</span><span class="o">,</span> <span class="n">SanityCheck</span> <span class="o">{</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">User</span><span class="o">&gt;</span> <span class="n">users</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">JavaPairRDD</span><span class="o">&lt;</span><span class="n">String</span><span class="o">,</span> <span class="n">Item</span><span class="o">&gt;</span> <span class="n">items</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">viewEvents</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">&gt;</span> <span class="n">buyEvents</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>PredictionIO then passes the returned <code>TrainingData</code> object to <em>Data Preparator</em>.</p><div class="alert-message note"><p>You could modify the DataSource to read events other than the default <strong>view</strong> or <strong>buy</strong>.</p></div><h3 id='data-preparator' class='header-anchors'>Data Preparator</h3><p>In <strong><em>Preparator</em></strong>, the <code>prepare</code> method takes <code>TrainingData</code> as its input and performs any necessary feature selection and data processing tasks. At the end, it returns <code>PreparedData</code> which should contain the data <em>Algorithm</em> needs.</p><p>By default, <code>prepare</code> simply includes the unprocessed <code>TrainingData</code> in <code>PreparedData</code>:</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">Preparator</span> <span class="kd">extends</span> <span class="n">PJavaPreparator</span><span class="o">&lt;</span><span class="n">TrainingData</span><span class="o">,</span> <span class="n">PreparedData</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">PreparedData</span> <span class="n">prepare</span><span class="o">(</span><span class="n">SparkContext</span> <span class="n">sc</span><span class="o">,</span> <span class="n">TrainingData</span> <span class="n">trainingData</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">PreparedData</span><span class="o">(</span><span class="n">trainingData</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>PredictionIO passes the returned <code>PreparedData</code> object to Algorithm&#39;s <code>train</code> function.</p><h2 id='algorithm' class='header-anchors'>Algorithm</h2><p>In the <strong><em>Algorithm</em></strong> class, the two methods of interest are <code>train</code> and <code>predict</code>. <code>train</code> is responsible for training the predictive model; <code>predict</code> is responsible for using this model to make a prediction.</p><h3 id='algorithm-parameters' class='header-anchors'>Algorithm parameters</h3><p>The algorithm takes the following parameters, as defined by the <code>AlgorithmParams</code> class:</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">AlgorithmParams</span> <span class="kd">implements</span> <span class="n">Params</span><span class="o">{</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">long</span> <span class="n">seed</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">int</span> <span class="n">rank</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">int</span> <span class="n">iteration</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">double</span> <span class="n">lambda</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">String</span> <span class="n">appName</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">List</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">similarItemEvents</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="kt">boolean</span> <span class="n">unseenOnly</span><span class="o">;</span>
<span class="kd">private</span> <span class="kd">final</span> <span class="n">List</span><span class="o">&lt;</span><span class="n">String</span><span class="o">&gt;</span> <span class="n">seenItemEvents</span><span class="o">;</span>
<span class="o">...</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>Parameter description:</p> <ul> <li><strong>appName</strong>: Your App name. Events defined by &quot;seenItemEvents&quot; and &quot;similarItemEvents&quot; will be read from this app during <code>predict</code>.</li> <li><strong>unseenOnly</strong>: true or false. Set to true if you want to recommmend unseen items only. Seen items are defined by <em>seenItemEvents</em> which mean if the user has these events on the items, then it&#39;s treated as <em>seen</em>.</li> <li><strong>seenItemEvents</strong>: A list of user-to-item events which will be treated as <em>seen</em> events. Used when <em>unseenOnly</em> is set to true.</li> <li><strong>similarItemEvents</strong>: A list of user-item-item events which will be used to find similar items to the items which the user has performend these events on.</li> <li><strong>rank</strong>: Parameter of the MLlib ALS algorithm. Number of latent features.</li> <li><strong>iteration</strong>: Parameter of the MLlib ALS algorithm. Number of iterations.</li> <li><strong>lambda</strong>: Regularization parameter of the MLlib ALS algorithm.</li> <li><strong>seed</strong>: A random seed of the MLlib ALS algorithm.</li> </ul> <h3 id='train(...)' class='header-anchors'>train(...)</h3><p><code>train</code> is called when you run <strong>pio train</strong>. This is where MLlib ALS algorithm, i.e. <code>ALS.trainImplicit()</code>, is used to train a predictive model. In addition, we also count the number of items being bought for each item as default model which will be used when there is no ALS model available or other useful information about the user is available during <code>predict</code>.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11</pre></td><td class="code"><pre>
<span class="kd">public</span> <span class="n">Model</span> <span class="nf">train</span><span class="p">(</span><span class="n">SparkContext</span> <span class="n">sc</span><span class="o">,</span> <span class="n">PreparedData</span> <span class="n">preparedData</span><span class="o">)</span> <span class="o">{</span>
<span class="o">...</span>
<span class="n">MatrixFactorizationModel</span> <span class="n">matrixFactorizationModel</span> <span class="o">=</span> <span class="n">ALS</span><span class="o">.</span><span class="na">trainImplicit</span><span class="o">(</span><span class="n">JavaRDD</span><span class="o">.</span><span class="na">toRDD</span><span class="o">(</span><span class="n">ratings</span><span class="o">),</span> <span class="n">ap</span><span class="o">.</span><span class="na">getRank</span><span class="o">(),</span> <span class="n">ap</span><span class="o">.</span><span class="na">getIteration</span><span class="o">(),</span> <span class="n">ap</span><span class="o">.</span><span class="na">getLambda</span><span class="o">(),</span> <span class="o">-</span><span class="mi">1</span><span class="o">,</span> <span class="mf">1.0</span><span class="o">,</span> <span class="n">ap</span><span class="o">.</span><span class="na">getSeed</span><span class="o">());</span>
<span class="o">...</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <h4 id='working-with-spark-mllib&#39;s-als.trainimplicit(....)' class='header-anchors'>Working with Spark MLlib&#39;s ALS.trainImplicit(....)</h4><p>MLlib ALS algorithm does not support <code>String</code> user ID and item ID. <code>ALS.trainImplicit</code> thus also assumes int-only <code>Rating</code> object. A view event is an implicit event that does not have an explicit rating value. <code>ALS.trainImplicit()</code> supports implicit preference. If the <code>Rating</code> has higher rating value, it means higher confidence that the user prefers the item. Hence we can aggregate how many times the user has viewed the item to indicate the confidence level that the user may prefer the item.</p><p>Here are the steps to use MLlib ALS algorithm.</p> <ol> <li>Map user and item string ID of the view event into integer ID, as required by <code>Rating</code>.</li> <li>Filter out the events with invalid user or item ID.</li> <li>Use <code>reduceByKey()</code> to add up all values for events with the same user-item combination.</li> <li>Create a <code>Rating</code> object using user index, item index, and summed up score.</li> </ol> <div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27</pre></td><td class="code"><pre>
<span class="n">JavaRDD</span><span class="o">&lt;</span><span class="n">Rating</span><span class="o">&gt;</span> <span class="n">ratings</span> <span class="o">=</span> <span class="n">data</span><span class="o">.</span><span class="na">getViewEvents</span><span class="o">().</span><span class="na">mapToPair</span><span class="o">(</span><span class="k">new</span> <span class="n">PairFunction</span><span class="o">&lt;</span><span class="n">UserItemEvent</span><span class="o">,</span> <span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;</span> <span class="n">call</span><span class="o">(</span><span class="n">UserItemEvent</span> <span class="n">viewEvent</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="n">Integer</span> <span class="n">userIndex</span> <span class="o">=</span> <span class="n">userIndexMap</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">viewEvent</span><span class="o">.</span><span class="na">getUser</span><span class="o">());</span>
<span class="n">Integer</span> <span class="n">itemIndex</span> <span class="o">=</span> <span class="n">itemIndexMap</span><span class="o">.</span><span class="na">get</span><span class="o">(</span><span class="n">viewEvent</span><span class="o">.</span><span class="na">getItem</span><span class="o">());</span>
<span class="k">return</span> <span class="o">(</span><span class="n">userIndex</span> <span class="o">==</span> <span class="kc">null</span> <span class="o">||</span> <span class="n">itemIndex</span> <span class="o">==</span> <span class="kc">null</span><span class="o">)</span> <span class="o">?</span> <span class="kc">null</span> <span class="o">:</span> <span class="k">new</span> <span class="n">Tuple2</span><span class="o">&lt;&gt;(</span><span class="k">new</span> <span class="n">Tuple2</span><span class="o">&lt;&gt;(</span><span class="n">userIndex</span><span class="o">,</span> <span class="n">itemIndex</span><span class="o">),</span> <span class="mi">1</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">}).</span><span class="na">filter</span><span class="o">(</span><span class="k">new</span> <span class="n">Function</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Boolean</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Boolean</span> <span class="n">call</span><span class="o">(</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;</span> <span class="n">element</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="k">return</span> <span class="o">(</span><span class="n">element</span> <span class="o">!=</span> <span class="kc">null</span><span class="o">);</span>
<span class="o">}</span>
<span class="o">}).</span><span class="na">reduceByKey</span><span class="o">(</span><span class="k">new</span> <span class="n">Function2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Integer</span> <span class="n">call</span><span class="o">(</span><span class="n">Integer</span> <span class="n">integer</span><span class="o">,</span> <span class="n">Integer</span> <span class="n">integer2</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">integer</span> <span class="o">+</span> <span class="n">integer2</span><span class="o">;</span>
<span class="o">}</span>
<span class="o">}).</span><span class="na">map</span><span class="o">(</span><span class="k">new</span> <span class="n">Function</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Rating</span><span class="o">&gt;()</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">Rating</span> <span class="n">call</span><span class="o">(</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Tuple2</span><span class="o">&lt;</span><span class="n">Integer</span><span class="o">,</span> <span class="n">Integer</span><span class="o">&gt;,</span> <span class="n">Integer</span><span class="o">&gt;</span> <span class="n">userItemCount</span><span class="o">)</span> <span class="kd">throws</span> <span class="n">Exception</span> <span class="o">{</span>
<span class="k">return</span> <span class="k">new</span> <span class="n">Rating</span><span class="o">(</span><span class="n">userItemCount</span><span class="o">.</span><span class="na">_1</span><span class="o">().</span><span class="na">_1</span><span class="o">(),</span> <span class="n">userItemCount</span><span class="o">.</span><span class="na">_1</span><span class="o">().</span><span class="na">_2</span><span class="o">(),</span> <span class="n">userItemCount</span><span class="o">.</span><span class="na">_2</span><span class="o">().</span><span class="na">doubleValue</span><span class="o">());</span>
<span class="o">}</span>
<span class="o">});</span>
</pre></td></tr></tbody></table> </div> <p>In addition to <code>RDD[Rating]</code>, <code>ALS.trainImplicit</code> takes the following parameters: <em>rank</em>, <em>iterations</em>, <em>lambda</em> and <em>seed</em>.</p><p>The values of these parameters are specified in <em>algorithms</em> section of <strong><em>engine.json</em></strong>:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19</pre></td><td class="code"><pre><span class="o">{</span>
...
<span class="s2">"algorithms"</span>: <span class="o">[</span>
<span class="o">{</span>
<span class="s2">"name"</span>: <span class="s2">"als"</span>,
<span class="s2">"params"</span>: <span class="o">{</span>
<span class="s2">"appName"</span>: <span class="s2">"MyApp1"</span>,
<span class="s2">"unseenOnly"</span>: <span class="nb">true</span>,
<span class="s2">"seenItemEvents"</span>: <span class="o">[</span><span class="s2">"buy"</span>, <span class="s2">"view"</span><span class="o">]</span>,
<span class="s2">"similarItemEvents"</span> : <span class="o">[</span><span class="s2">"view"</span><span class="o">]</span>
<span class="s2">"rank"</span>: 10,
<span class="s2">"iteration"</span> : 20,
<span class="s2">"lambda"</span>: 0.01,
<span class="s2">"seed"</span>: 3
<span class="o">}</span>
<span class="o">}</span>
<span class="o">]</span>
...
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>The parameters <code>appName</code>, <code>unseenOnly</code>, <code>seenItemEvents</code> and <code>similarItemEvents</code> are used during <code>predict()</code>, which will be explained later.</p><p>PredictionIO will automatically loads these values into the <code>AlgorithmParams</code> field of the <code>Algorithm</code>.</p><p>The <code>seed</code> parameter is used by MLlib ALS algorithm internally to generate random values. Specify a fixed value for the <code>seed</code> if you want to have deterministic result (For example, when you are testing).</p><p><code>ALS.trainImplicit()</code> returns a <code>MatrixFactorizationModel</code> model which contains two RDDs: userFeatures and productFeatures. They correspond to the user X latent features matrix and item X latent features matrix, respectively.</p><p>In addition to the latent feature vector, the item properties (e.g. categories) and popular count are also used during <code>predict()</code>. Hence, we also save these data along with the feature vector.</p><p>PredictionIO will store the returned model after training.</p><h3 id='predict(...)' class='header-anchors'>predict(...)</h3><p><code>predict</code> is called when you send a JSON query to <a href="http://localhost:8000/queries.json">http://localhost:8000/queries.json</a>. PredictionIO converts the query, such as <code>{ &quot;userEntityId&quot;: &quot;u1&quot;, &quot;number&quot;: 4 }</code> to the <code>Query</code> class we defined previously.</p><p>We can use the user features and item features stored in the model to calculate the scores of items for the user.</p><p>This template also supports additional business logic features, such as filtering items by categories, recommending items in the whitelist, excluding items in the blacklist, recommend unseen items only, and exclude unavailable items defined in constraint event.</p><p>The <code>predict()</code> function does the following:</p> <ol> <li>Get the user feature vector from the model.</li> <li>If there is feature vector for the user, recommend top N items based on the user feature and item features.</li> <li>If there is no feature vector for the user, use the recent items acted on by the user (defined by <code>similarItemEvents</code> parameter) to recommend similar items.</li> <li>If there is no recent <code>similarItemEvents</code> available for the user, popular items are recommended.</li> </ol> <p>Only items which satisfy the following conditions will be recommended. By default, an item will be recommended if:</p> <ul> <li>it belongs to one of the categories defined in query.</li> <li>it is one of the whitelisted items if a whitelist is defined.</li> <li>it is not on the blacklist.</li> <li>it is available.</li> </ul> <div class="alert-message info"><p>You can easily modify <code>validScores()</code> if you have different requirements or conditions to determine if an item should be recommended.</p></div><p>PredictionIO passes the returned <code>PredictedResult</code> object to <em>Serving</em>.</p><h2 id='serving' class='header-anchors'>Serving</h2><p>The <code>serve</code> method of class <code>Serving</code> processes predicted result. It is also responsible for combining multiple predicted results into one if you have more than one predictive model. <em>Serving</em> then returns the final predicted result. PredictionIO will convert it to a JSON response automatically.</p><div class="highlight java"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8</pre></td><td class="code"><pre><span class="kd">public</span> <span class="kd">class</span> <span class="nc">Serving</span> <span class="kd">extends</span> <span class="n">LJavaServing</span><span class="o">&lt;</span><span class="n">Query</span><span class="o">,</span> <span class="n">PredictedResult</span><span class="o">&gt;</span> <span class="o">{</span>
<span class="nd">@Override</span>
<span class="kd">public</span> <span class="n">PredictedResult</span> <span class="n">serve</span><span class="o">(</span><span class="n">Query</span> <span class="n">query</span><span class="o">,</span> <span class="n">Seq</span><span class="o">&lt;</span><span class="n">PredictedResult</span><span class="o">&gt;</span> <span class="n">predictions</span><span class="o">)</span> <span class="o">{</span>
<span class="k">return</span> <span class="n">predictions</span><span class="o">.</span><span class="na">head</span><span class="o">();</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>When you send a JSON query to <a href="http://localhost:8000/queries.json">http://localhost:8000/queries.json</a>, <code>PredictedResult</code> from all models will be passed to <code>serve</code> as a sequence, i.e. <code>Seq&lt;PredictedResult&gt;</code>.</p> <blockquote> <p>An engine can train multiple models if you specify more than one Algorithm component in <code>RecommendationEngine</code>. Since we only have one algorithm, this <code>Seq</code> contains one element.</p></blockquote> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-5a24945b.js"></script></body></html>