blob: 33657784d13ce3df2bdb69fa37146570b49d9f82 [file] [log] [blame]
<!DOCTYPE html><html><head><title>Implementing DASE</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Implementing DASE"/><link rel="canonical" href="https://docs.prediction.io/customize/dase/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3598c7d7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="header-nav-options-wrapper"><ul><li><a href="/">Install & Doc</a></li> <li><a href="/support">Support</a></li> </ul></div><div id="pill-wrapper"><a class="pill left" href="//templates.prediction.io/">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Implement DASE</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final active" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="http://templates.prediction.io"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Community Support</span></a></li><li class="level-2"><a class="final" href="/support/#enterprise-support"><span>Enterprise Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Customizing an Engine</a><span class="spacer">&gt;</span></li><li><span class="last">Implement DASE</span></li></ul></div><div id="page-title"><h1>Implementing DASE</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#datasource">DataSource</a> <ul> <li> <a href="#readtraining">readTraining()</a> </li> <li> <a href="#using-peventstore-engine-api">Using PEventStore Engine API</a> </li> </ul> </li> <li> <a href="#preparator">Preparator</a> <ul> <li> <a href="#prepare">prepare()</a> </li> </ul> </li> <li> <a href="#algorithm">Algorithm</a> <ul> <li> <a href="#train">train()</a> </li> <li> <a href="#predict">predict()</a> </li> <li> <a href="#p2lalgorithm">P2LAlgorithm</a> </li> <li> <a href="#palgorithm">PAlgorithm</a> </li> <li> <a href="#using-leventstore-engine-api-in-predict">using LEventStore Engine API in predict()</a> </li> </ul> </li> <li> <a href="#serving">Serving</a> <ul> <li> <a href="#serve">serve()</a> </li> </ul> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/customize/dase.html.md.erb"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">Customizing an Engine</a><span class="spacer">&gt;</span></li><li><span class="last">Implement DASE</span></li></ul></div><div id="page-title"><h1>Implementing DASE</h1></div></div><div class="content"><p>This section gives you an overview of DASE components and how to implement them. You will find links to some engine templates for more concrete examples.</p><h1 id='datasource' class='header-anchors'>DataSource</h1><p>DataSource reads and selects useful data from the Event Store (data store of the Event Server) and returns TrainingData.</p><h2 id='readtraining()' class='header-anchors'>readTraining()</h2><p>You need to implment readTraining() of <a href="https://docs.prediction.io/api/current/#io.prediction.controller.PDataSource">PDataSource</a>, where you can use the <a href="https://docs.prediction.io/api/current/#io.prediction.data.store.PEventStore$">PEventStore Engine API</a> to read the events and create the TrainingData based on the events.</p><p>The following code example reads user &quot;view&quot; and &quot;buy&quot; item events, filters specific type of events for future processing and returns TrainingData accordingly.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27</pre></td><td class="code"><pre><span class="k">class</span> <span class="nc">DataSource</span><span class="o">(</span><span class="k">val</span> <span class="n">dsp</span><span class="k">:</span> <span class="kt">DataSourceParams</span><span class="o">)</span>
<span class="k">extends</span> <span class="nc">PDataSource</span><span class="o">[</span><span class="kt">TrainingData</span>,
<span class="kt">EmptyEvaluationInfo</span>, <span class="kt">Query</span>, <span class="kt">EmptyActualResult</span><span class="o">]</span> <span class="o">{</span>
<span class="nd">@transient</span> <span class="k">lazy</span> <span class="k">val</span> <span class="n">logger</span> <span class="k">=</span> <span class="nc">Logger</span><span class="o">[</span><span class="kt">this.</span><span class="k">type</span><span class="o">]</span>
<span class="k">override</span>
<span class="k">def</span> <span class="n">readTraining</span><span class="o">(</span><span class="n">sc</span><span class="k">:</span> <span class="kt">SparkContext</span><span class="o">)</span><span class="k">:</span> <span class="kt">TrainingData</span> <span class="o">=</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">eventsRDD</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">Event</span><span class="o">]</span> <span class="k">=</span> <span class="nc">PEventStore</span><span class="o">.</span><span class="n">find</span><span class="o">(</span>
<span class="n">appName</span> <span class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span class="n">appName</span><span class="o">,</span>
<span class="n">entityType</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="s">"user"</span><span class="o">),</span>
<span class="n">eventNames</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span class="o">(</span><span class="s">"view"</span><span class="o">,</span> <span class="s">"buy"</span><span class="o">)),</span>
<span class="c1">// targetEntityType is optional field of an event.
</span> <span class="n">targetEntityType</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">Some</span><span class="o">(</span><span class="s">"item"</span><span class="o">)))(</span><span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="n">cache</span><span class="o">()</span>
<span class="k">val</span> <span class="n">viewEventsRDD</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">ViewEvent</span><span class="o">]</span> <span class="k">=</span> <span class="n">eventsRDD</span>
<span class="o">.</span><span class="n">filter</span> <span class="o">{</span> <span class="n">event</span> <span class="k">=&gt;</span> <span class="n">event</span><span class="o">.</span><span class="n">event</span> <span class="o">==</span> <span class="s">"view"</span> <span class="o">}</span>
<span class="o">.</span><span class="n">map</span> <span class="o">{</span> <span class="o">...</span> <span class="o">}</span>
<span class="o">...</span>
<span class="k">new</span> <span class="nc">TrainingData</span><span class="o">(...)</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <h2 id='using-peventstore-engine-api' class='header-anchors'>Using PEventStore Engine API</h2><p>Please see <a href="https://docs.prediction.io/datacollection/">Event Server Overview</a> to understand <a href="https://docs.prediction.io/datacollection/eventapi/">EventAPI</a> and <a href="https://docs.prediction.io/datacollection/eventmodel/">event modeling</a>.</p><p>With <a href="https://docs.prediction.io/api/current/#io.prediction.data.store.PEventStore$">PEventStore Engine API</a>, you can easily read different events in DataSource and get the information you need.</p><p>For example, let&#39;s say you have events like the following:</p><div class="highlight json"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14</pre></td><td class="code"><pre><span class="p">{</span><span class="w">
</span><span class="s2">"event"</span><span class="p">:</span><span class="w"> </span><span class="s2">"myEvent"</span><span class="p">,</span><span class="w">
</span><span class="s2">"entityType"</span><span class="p">:</span><span class="w"> </span><span class="s2">"user"</span><span class="p">,</span><span class="w">
</span><span class="s2">"entityId"</span><span class="p">:</span><span class="w"> </span><span class="s2">"u0"</span><span class="p">,</span><span class="w">
</span><span class="s2">"targetEntityType"</span><span class="p">:</span><span class="w"> </span><span class="s2">"item"</span><span class="p">,</span><span class="w">
</span><span class="s2">"targetEntityId"</span><span class="p">:</span><span class="w"> </span><span class="s2">"i0"</span><span class="p">,</span><span class="w">
</span><span class="s2">"properties"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="s2">"a"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="mi">3</span><span class="p">,</span><span class="w">
</span><span class="s2">"b"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="s2">"some_string"</span><span class="p">,</span><span class="w">
</span><span class="s2">"c"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="s2">"a"</span><span class="p">,</span><span class="w"> </span><span class="s2">"b"</span><span class="p">,</span><span class="w"> </span><span class="s2">"c"</span><span class="p">],</span><span class="w">
</span><span class="s2">"d"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="mf">1.2</span><span class="p">,</span><span class="w"> </span><span class="mf">3.4</span><span class="p">,</span><span class="w"> </span><span class="mf">5.6</span><span class="p">],</span><span class="w">
</span><span class="s2">"e"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="mi">6</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">}</span><span class="w">
</span></pre></td></tr></tbody></table> </div> <p>Then following code could read these events and extract the properties field of the event and convert it to a <code>MyEvent</code> object.</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23</pre></td><td class="code"><pre> <span class="k">val</span> <span class="n">myEvents</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[</span><span class="kt">MyEvent</span><span class="o">]</span> <span class="k">=</span> <span class="nc">PEventStore</span><span class="o">.</span><span class="n">find</span><span class="o">(</span>
<span class="n">appName</span> <span class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span class="n">appName</span><span class="o">,</span>
<span class="n">entityType</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="s">"user"</span><span class="o">),</span>
<span class="n">eventNames</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span class="o">(</span><span class="s">"myEvent"</span><span class="o">)),</span>
<span class="c1">// targetEntityType is optional field of an event.
</span> <span class="n">targetEntityType</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">Some</span><span class="o">(</span><span class="s">"item"</span><span class="o">)))(</span><span class="n">sc</span><span class="o">)</span>
<span class="o">.</span><span class="n">map</span> <span class="o">{</span> <span class="n">event</span> <span class="k">=&gt;</span>
<span class="k">try</span> <span class="o">{</span>
<span class="nc">MyEvent</span><span class="o">(</span>
<span class="n">entityId</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">entityId</span><span class="o">,</span>
<span class="n">targetEntityId</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">targetEntityId</span><span class="o">.</span><span class="n">get</span><span class="o">,</span>
<span class="n">a</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Int</span><span class="o">](</span><span class="s">"a"</span><span class="o">),</span>
<span class="n">b</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">String</span><span class="o">](</span><span class="s">"b"</span><span class="o">),</span>
<span class="n">c</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">List</span><span class="o">[</span><span class="kt">String</span><span class="o">]](</span><span class="s">"c"</span><span class="o">),</span>
<span class="n">d</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">List</span><span class="o">[</span><span class="kt">Double</span><span class="o">]](</span><span class="s">"d"</span><span class="o">),</span>
<span class="n">e</span> <span class="k">=</span> <span class="n">event</span><span class="o">.</span><span class="n">properties</span><span class="o">.</span><span class="n">getOpt</span><span class="o">[</span><span class="kt">Int</span><span class="o">](</span><span class="s">"e"</span><span class="o">)</span> <span class="c1">// use getOpt for optional data
</span> <span class="o">)</span>
<span class="o">}</span> <span class="k">catch</span> <span class="o">{</span>
<span class="k">case</span> <span class="n">e</span><span class="k">:</span> <span class="kt">Exception</span> <span class="o">=&gt;</span>
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="o">(</span><span class="n">s</span><span class="s">"Cannot convert ${event}. Exception: ${e}."</span><span class="o">)</span>
<span class="k">throw</span> <span class="n">e</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>If you have used special events <code>$set/$unset/$delete</code> setting entity&#39;s properties, you can retrieve it with <code>PEventStore.aggregateProperties()</code>.</p><p>Please see <a href="https://docs.prediction.io/datacollection/eventmodel/">event modeling</a> to understand usage of special <code>$set/$unset/$delete</code> events.</p><p>For example, the following code show how you could retrieve properties of the &quot;item&quot; entities:</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23</pre></td><td class="code"><pre> <span class="c1">// create a RDD of (entityID, Item)
</span> <span class="k">val</span> <span class="n">itemsRDD</span><span class="k">:</span> <span class="kt">RDD</span><span class="o">[(</span><span class="kt">String</span>, <span class="kt">Item</span><span class="o">)]</span> <span class="k">=</span> <span class="nc">PEventStore</span><span class="o">.</span><span class="n">aggregateProperties</span><span class="o">(</span>
<span class="n">appName</span> <span class="k">=</span> <span class="n">dsp</span><span class="o">.</span><span class="n">appName</span><span class="o">,</span>
<span class="n">entityType</span> <span class="k">=</span> <span class="s">"item"</span>
<span class="o">)(</span><span class="n">sc</span><span class="o">).</span><span class="n">map</span> <span class="o">{</span> <span class="k">case</span> <span class="o">(</span><span class="n">entityId</span><span class="o">,</span> <span class="n">properties</span><span class="o">)</span> <span class="k">=&gt;</span>
<span class="k">try</span> <span class="o">{</span>
<span class="k">val</span> <span class="n">item</span> <span class="k">=</span> <span class="nc">Item</span><span class="o">(</span>
<span class="n">a</span> <span class="k">=</span> <span class="n">preopties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">Int</span><span class="o">](</span><span class="s">"a"</span><span class="o">),</span>
<span class="n">b</span> <span class="k">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">String</span><span class="o">](</span><span class="s">"b"</span><span class="o">),</span>
<span class="n">c</span> <span class="k">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">List</span><span class="o">[</span><span class="kt">String</span><span class="o">]](</span><span class="s">"c"</span><span class="o">),</span>
<span class="n">d</span> <span class="k">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">get</span><span class="o">[</span><span class="kt">List</span><span class="o">[</span><span class="kt">Double</span><span class="o">]](</span><span class="s">"d"</span><span class="o">),</span>
<span class="n">e</span> <span class="k">=</span> <span class="n">properties</span><span class="o">.</span><span class="n">getOpt</span><span class="o">[</span><span class="kt">Int</span><span class="o">](</span><span class="s">"e"</span><span class="o">)</span> <span class="c1">// use getOpt for optional data
</span> <span class="o">)</span>
<span class="o">(</span><span class="n">entityId</span><span class="o">,</span> <span class="n">item</span><span class="o">)</span>
<span class="o">}</span> <span class="k">catch</span> <span class="o">{</span>
<span class="k">case</span> <span class="n">e</span><span class="k">:</span> <span class="kt">Exception</span> <span class="o">=&gt;</span>
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="o">(</span><span class="n">s</span><span class="s">"Failed to get properties ${properties} of ${entityId}. Exception: ${e}."</span><span class="o">)</span>
<span class="k">throw</span> <span class="n">e</span>
<span class="o">}</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>Example:</p> <ul> <li><a href="/templates/similarproduct/dase/#data">DataSource of Similar Product Template</a></li> </ul> <h1 id='preparator' class='header-anchors'>Preparator</h1><p>Preparator is responsible for pre-processing <code>TrainingData</code> for any necessary feature selection and data processing tasks and generate <code>PreparedData</code> which contains the data the Algorithm needs.</p><p>A few example usages of Preparator:</p> <ul> <li>Feature extraction</li> <li>Common pre-processing logic if you have multiple algorithms</li> <li>For simple cases, the Preparator may simply pass the same <code>TrainingData</code> as <code>PreparedData</code> for Algorithm.</li> </ul> <h2 id='prepare()' class='header-anchors'>prepare()</h2><p>You need to implement the <code>prepare()</code> method of <a href="https://docs.prediction.io/api/current/#io.prediction.controller.PPreparator">PPrepartor</a> to perform such tasks.</p><p>Example:</p> <ul> <li><a href="/templates/leadscoring/dase/#data">Preparator of Leading Scoring Template</a>: it pre-processes the TrainingData and generate the feature vectors needed for the algorithm.</li> <li><a href="/templates/similarproduct/dase/#data">Preparator of Similar Product Template</a>: it simply passes the TrainingData as PreparedData for the algorithm.</li> </ul> <h1 id='algorithm' class='header-anchors'>Algorithm</h1><p>The two methods of the Algorithm class are train() and predict():</p><h2 id='train()' class='header-anchors'>train()</h2><p>train() is responsible for training a predictive model. It is called when you run <code>pio train</code>. Apache PredictionIO (incubating) will store this model.</p><h2 id='predict()' class='header-anchors'>predict()</h2><p>predict() is responsible for using this model to make prediction. It is called when you send a JSON query to the engine. Note that predict() is called in real time.</p><p>Apache PredictionIO (incubating) supports two types of algorithms:</p> <ul> <li><strong><a href="https://docs.prediction.io/api/current/#io.prediction.controller.P2LAlgorithm">P2LAlgorithm</a></strong>: trains a Model which does not contain RDD</li> <li><strong><a href="https://docs.prediction.io/api/current/#io.prediction.controller.PAlgorithm">PAlgorithm</a></strong>: trains a Model which contains RDD</li> </ul> <h2 id='p2lalgorithm' class='header-anchors'>P2LAlgorithm</h2><p>For <code>P2LAlgorithm</code>, the Model is automatically serialized and persisted by Apache PredictionIO (incubating) after training.</p><p>Implementing <code>IPersistentModel</code> and <code>IPersistentModelLoader</code> is optional for P2LAlgorithm.</p><p>Example:</p> <ul> <li><a href="/templates/similarproduct/dase/#algorithm">Algorithm of Similar Product Template</a></li> </ul> <h2 id='palgorithm' class='header-anchors'>PAlgorithm</h2><p><code>PAlgorithm</code> should be used when your Model contains RDD. The model produced by <code>PAlgorithm</code> is not persisted by default. To persist the model, you need to do the following:</p> <ul> <li>The Model class should extend the <code>IPersistentModel</code> trait and implement the <code>save()</code> method for saving the model. The trait <code>IPersistentModel</code> requires a type parameter which is the class type of algorithm parameter.</li> <li>Implement a Model factory object which extends the <code>IPersistentModelLoader</code> trait and implement the <code>apply()</code> for loading the model. The trait <code>IPersistentModelLoader</code> requires two type parameters which are the types of algorithm parameter and the model produced by the algorithm.</li> </ul> <p>Example:</p> <ul> <li><a href="/templates/recommendation/dase/#algorithm">Algorithm of Recommendation Template</a>: it implements PAlgorithm and the IPersistentModel and IPersistentModelLoader.</li> <li><a href="/templates/vanilla/dase">Algorithm of Vanilla Template</a>: it walks through example of P2LAlgorithm and PAlgorithm.</li> </ul> <h2 id='using-leventstore-engine-api-in-predict()' class='header-anchors'>using LEventStore Engine API in predict()</h2><p>You may use <a href="https://docs.prediction.io/api/current/#io.prediction.data.store.LEventStore$">LEventStore.findByEntity()</a> to retrieve events of a specific entity. For example, retrieve recent events of the user specified in the query) and use these recent events to make prediction in real time.</p><p>For example, the following code reads the recent 10 view events of <code>query.user</code>:</p><div class="highlight scala"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22</pre></td><td class="code"><pre> <span class="k">val</span> <span class="n">recentEvents</span> <span class="k">=</span> <span class="k">try</span> <span class="o">{</span>
<span class="nc">LEventStore</span><span class="o">.</span><span class="n">findByEntity</span><span class="o">(</span>
<span class="n">appName</span> <span class="k">=</span> <span class="n">ap</span><span class="o">.</span><span class="n">appName</span><span class="o">,</span>
<span class="c1">// entityType and entityId is specified for fast lookup
</span> <span class="n">entityType</span> <span class="k">=</span> <span class="s">"user"</span><span class="o">,</span>
<span class="n">entityId</span> <span class="k">=</span> <span class="n">query</span><span class="o">.</span><span class="n">user</span><span class="o">,</span>
<span class="n">eventNames</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">List</span><span class="o">(</span><span class="s">"view"</span><span class="o">)),</span>
<span class="n">targetEntityType</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="nc">Some</span><span class="o">(</span><span class="s">"item"</span><span class="o">)),</span>
<span class="n">limit</span> <span class="k">=</span> <span class="nc">Some</span><span class="o">(</span><span class="mi">10</span><span class="o">),</span>
<span class="n">latest</span> <span class="k">=</span> <span class="kc">true</span><span class="o">,</span>
<span class="c1">// set time limit to avoid super long DB access
</span> <span class="n">timeout</span> <span class="k">=</span> <span class="nc">Duration</span><span class="o">(</span><span class="mi">200</span><span class="o">,</span> <span class="s">"millis"</span><span class="o">)</span>
<span class="o">)</span>
<span class="o">}</span> <span class="k">catch</span> <span class="o">{</span>
<span class="k">case</span> <span class="n">e</span><span class="k">:</span> <span class="kt">scala.concurrent.TimeoutException</span> <span class="o">=&gt;</span>
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="o">(</span><span class="n">s</span><span class="s">"Timeout when read recent events."</span> <span class="o">+</span>
<span class="n">s</span><span class="s">" Empty list is used. ${e}"</span><span class="o">)</span>
<span class="nc">Iterator</span><span class="o">[</span><span class="kt">Event</span><span class="o">]()</span>
<span class="k">case</span> <span class="n">e</span><span class="k">:</span> <span class="kt">Exception</span> <span class="o">=&gt;</span>
<span class="n">logger</span><span class="o">.</span><span class="n">error</span><span class="o">(</span><span class="n">s</span><span class="s">"Error when read recent events: ${e}"</span><span class="o">)</span>
<span class="k">throw</span> <span class="n">e</span>
<span class="o">}</span>
</pre></td></tr></tbody></table> </div> <p>Example:</p> <ul> <li><a href="/templates/ecommercerecommendation/dase#algorithm">Algorithm of E-Commerce Recommendation template</a>: LEventStore.findByEntity() is used to retrieve all items seen by the user and filter them from recommendation in predict().</li> </ul> <h1 id='serving' class='header-anchors'>Serving</h1><h2 id='serve()' class='header-anchors'>serve()</h2><p>You need to implement the serve() method of the class <a href="https://docs.prediction.io/api/current/#io.prediction.controller.LServing">LServing</a>. The serve() method processes predicted result. It is also responsible for combining multiple predicted results into one if you have more than one predictive model.</p><p>Example:</p> <ul> <li><a href="/templates/similarproduct/dase/#serving">Serving of Similar Product Template</a>: It simply returns the predicted result</li> <li><a href="/templates/similarproduct/multi-events-multi-algos/">Serving of multi-algorithm examples of Similar Product Template</a>: It combines the result of multiple algorithms and return</li> </ul> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-5a24945b.js"></script></body></html>