blob: d8db4b73a069ec98d0a9e1572a24e4493a996ab5 [file] [log] [blame]
<!DOCTYPE html><html><head><title>Using Another Data Store</title><meta charset="utf-8"/><meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta class="swiftype" name="title" data-type="string" content="Using Another Data Store"/><link rel="canonical" href="https://docs.prediction.io/system/anotherdatastore/"/><link href="/images/favicon/normal-b330020a.png" rel="shortcut icon"/><link href="/images/favicon/apple-c0febcf2.png" rel="apple-touch-icon"/><link href="//fonts.googleapis.com/css?family=Open+Sans:300italic,400italic,600italic,700italic,800italic,400,300,600,700,800" rel="stylesheet"/><link href="//maxcdn.bootstrapcdn.com/font-awesome/4.2.0/css/font-awesome.min.css" rel="stylesheet"/><link href="/stylesheets/application-3598c7d7.css" rel="stylesheet" type="text/css"/><script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.2/html5shiv.min.js"></script><script src="//cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML"></script><script src="//use.typekit.net/pqo0itb.js"></script><script>try{Typekit.load({ async: true });}catch(e){}</script></head><body><div id="global"><header><div class="container" id="header-wrapper"><div class="row"><div class="col-sm-12"><div id="logo-wrapper"><span id="drawer-toggle"></span><a href="#"></a><a href="http://predictionio.incubator.apache.org/"><img alt="PredictionIO" id="logo" src="/images/logos/logo-ee2b9bb3.png"/></a></div><div id="menu-wrapper"><div id="header-nav-options-wrapper"><ul><li><a href="/">Install & Doc</a></li> <li><a href="/support">Support</a></li> </ul></div><div id="pill-wrapper"><a class="pill left" href="//templates.prediction.io/">TEMPLATES</a> <a class="pill right" href="//github.com/apache/incubator-predictionio/">OPEN SOURCE</a></div></div><img class="mobile-search-bar-toggler hidden-md hidden-lg" src="/images/icons/search-glass-704bd4ff.png"/></div></div></div></header><div id="search-bar-row-wrapper"><div class="container-fluid" id="search-bar-row"><div class="row"><div class="col-md-9 col-sm-11 col-xs-11"><div class="hidden-md hidden-lg" id="mobile-page-heading-wrapper"><p>PredictionIO Docs</p><h4>Using Another Data Store</h4></div><h4 class="hidden-sm hidden-xs">PredictionIO Docs</h4></div><div class="col-md-3 col-sm-1 col-xs-1 hidden-md hidden-lg"><img id="left-menu-indicator" src="/images/icons/down-arrow-dfe9f7fe.png"/></div><div class="col-md-3 col-sm-12 col-xs-12 swiftype-wrapper"><div class="swiftype"><form class="search-form"><img class="search-box-toggler hidden-xs hidden-sm" src="/images/icons/search-glass-704bd4ff.png"/><div class="search-box"><img src="/images/icons/search-glass-704bd4ff.png"/><input type="text" id="st-search-input" class="st-search-input" placeholder="Search Doc..."/></div><img class="swiftype-row-hider hidden-md hidden-lg" src="/images/icons/drawer-toggle-active-fcbef12a.png"/></form></div></div><div class="mobile-left-menu-toggler hidden-md hidden-lg"></div></div></div></div><div id="page" class="container-fluid"><div class="row"><div id="left-menu-wrapper" class="col-md-3"><nav id="nav-main"><ul><li class="level-1"><a class="expandible" href="/"><span>Apache PredictionIO (incubating) Documentation</span></a><ul><li class="level-2"><a class="final" href="/"><span>Welcome to Apache PredictionIO (incubating)</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Started</span></a><ul><li class="level-2"><a class="final" href="/start/"><span>A Quick Intro</span></a></li><li class="level-2"><a class="final" href="/install/"><span>Installing Apache PredictionIO (incubating)</span></a></li><li class="level-2"><a class="final" href="/start/download/"><span>Downloading an Engine Template</span></a></li><li class="level-2"><a class="final" href="/start/deploy/"><span>Deploying Your First Engine</span></a></li><li class="level-2"><a class="final" href="/start/customize/"><span>Customizing the Engine</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Integrating with Your App</span></a><ul><li class="level-2"><a class="final" href="/appintegration/"><span>App Integration Overview</span></a></li><li class="level-2"><a class="expandible" href="/sdk/"><span>List of SDKs</span></a><ul><li class="level-3"><a class="final" href="/sdk/java/"><span>Java & Android SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/php/"><span>PHP SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/python/"><span>Python SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/ruby/"><span>Ruby SDK</span></a></li><li class="level-3"><a class="final" href="/sdk/community/"><span>Community Powered SDKs</span></a></li></ul></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Deploying an Engine</span></a><ul><li class="level-2"><a class="final" href="/deploy/"><span>Deploying as a Web Service</span></a></li><li class="level-2"><a class="final" href="/cli/#engine-commands"><span>Engine Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/deploy/engineparams/"><span>Setting Engine Parameters</span></a></li><li class="level-2"><a class="final" href="/deploy/enginevariants/"><span>Deploying Multiple Engine Variants</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Customizing an Engine</span></a><ul><li class="level-2"><a class="final" href="/customize/"><span>Learning DASE</span></a></li><li class="level-2"><a class="final" href="/customize/dase/"><span>Implement DASE</span></a></li><li class="level-2"><a class="final" href="/customize/troubleshooting/"><span>Troubleshooting Engine Development</span></a></li><li class="level-2"><a class="final" href="/api/current/#package"><span>Engine Scala APIs</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Collecting and Analyzing Data</span></a><ul><li class="level-2"><a class="final" href="/datacollection/"><span>Event Server Overview</span></a></li><li class="level-2"><a class="final" href="/cli/#event-server-commands"><span>Event Server Command-line Interface</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventapi/"><span>Collecting Data with REST/SDKs</span></a></li><li class="level-2"><a class="final" href="/datacollection/eventmodel/"><span>Events Modeling</span></a></li><li class="level-2"><a class="final" href="/datacollection/webhooks/"><span>Unifying Multichannel Data with Webhooks</span></a></li><li class="level-2"><a class="final" href="/datacollection/channel/"><span>Channel</span></a></li><li class="level-2"><a class="final" href="/datacollection/batchimport/"><span>Importing Data in Batch</span></a></li><li class="level-2"><a class="final" href="/datacollection/analytics/"><span>Using Analytics Tools</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Choosing an Algorithm(s)</span></a><ul><li class="level-2"><a class="final" href="/algorithm/"><span>Built-in Algorithm Libraries</span></a></li><li class="level-2"><a class="final" href="/algorithm/switch/"><span>Switching to Another Algorithm</span></a></li><li class="level-2"><a class="final" href="/algorithm/multiple/"><span>Combining Multiple Algorithms</span></a></li><li class="level-2"><a class="final" href="/algorithm/custom/"><span>Adding Your Own Algorithms</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>ML Tuning and Evaluation</span></a><ul><li class="level-2"><a class="final" href="/evaluation/"><span>Overview</span></a></li><li class="level-2"><a class="final" href="/evaluation/paramtuning/"><span>Hyperparameter Tuning</span></a></li><li class="level-2"><a class="final" href="/evaluation/evaluationdashboard/"><span>Evaluation Dashboard</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricchoose/"><span>Choosing Evaluation Metrics</span></a></li><li class="level-2"><a class="final" href="/evaluation/metricbuild/"><span>Building Evaluation Metrics</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>System Architecture</span></a><ul><li class="level-2"><a class="final" href="/system/"><span>Architecture Overview</span></a></li><li class="level-2"><a class="final active" href="/system/anotherdatastore/"><span>Using Another Data Store</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Engine Template Gallery</span></a><ul><li class="level-2"><a class="final" href="http://templates.prediction.io"><span>Browse</span></a></li><li class="level-2"><a class="final" href="/community/submit-template/"><span>Submit your Engine as a Template</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Demo Tutorials</span></a><ul><li class="level-2"><a class="final" href="/demo/tapster/"><span>Comics Recommendation Demo</span></a></li><li class="level-2"><a class="final" href="/demo/community/"><span>Community Contributed Demo</span></a></li><li class="level-2"><a class="final" href="/demo/textclassification/"><span>Text Classification Engine Tutorial</span></a></li></ul></li><li class="level-1"><a class="expandible" href="/community/"><span>Getting Involved</span></a><ul><li class="level-2"><a class="final" href="/community/contribute-code/"><span>Contribute Code</span></a></li><li class="level-2"><a class="final" href="/community/contribute-documentation/"><span>Contribute Documentation</span></a></li><li class="level-2"><a class="final" href="/community/contribute-sdk/"><span>Contribute a SDK</span></a></li><li class="level-2"><a class="final" href="/community/contribute-webhook/"><span>Contribute a Webhook</span></a></li><li class="level-2"><a class="final" href="/community/projects/"><span>Community Projects</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Getting Help</span></a><ul><li class="level-2"><a class="final" href="/resources/faq/"><span>FAQs</span></a></li><li class="level-2"><a class="final" href="/support/"><span>Community Support</span></a></li><li class="level-2"><a class="final" href="/support/#enterprise-support"><span>Enterprise Support</span></a></li></ul></li><li class="level-1"><a class="expandible" href="#"><span>Resources</span></a><ul><li class="level-2"><a class="final" href="/resources/intellij/"><span>Developing Engines with IntelliJ IDEA</span></a></li><li class="level-2"><a class="final" href="/resources/upgrade/"><span>Upgrade Instructions</span></a></li><li class="level-2"><a class="final" href="/resources/glossary/"><span>Glossary</span></a></li></ul></li></ul></nav></div><div class="col-md-9 col-sm-12"><div class="content-header hidden-md hidden-lg"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">System Architecture</a><span class="spacer">&gt;</span></li><li><span class="last">Using Another Data Store</span></li></ul></div><div id="page-title"><h1>Using Another Data Store</h1></div></div><div id="table-of-content-wrapper"><h5>On this page</h5><aside id="table-of-contents"><ul> <li> <a href="#concepts">Concepts</a> </li> <li> <a href="#data-store-configuration">Data Store Configuration</a> </li> <li> <a href="#adding-support-of-other-backends">Adding Support of Other Backends</a> </li> </ul> </aside><hr/><a id="edit-page-link" href="https://github.com/apache/incubator-predictionio/tree/livedoc/docs/manual/source/system/anotherdatastore.html.md"><img src="/images/icons/edit-pencil-d6c1bb3d.png"/>Edit this page</a></div><div class="content-header hidden-sm hidden-xs"><div id="breadcrumbs" class="hidden-sm hidden xs"><ul><li><a href="#">System Architecture</a><span class="spacer">&gt;</span></li><li><span class="last">Using Another Data Store</span></li></ul></div><div id="page-title"><h1>Using Another Data Store</h1></div></div><div class="content"><p>PredictionIO has a thin storage layer to abstract meta data, event data, and model data access. The layer defines a set of standard interfaces to support multiple data store backends. PredictionIO users can configure the backend of choice through configuration files or environmental variables. Engine developers need not worry about the actual underlying storage architecture. Advanced developers can implement their own backend driver as an external library.</p><h2 id='concepts' class='header-anchors'>Concepts</h2><p>In this section, we will visit some storage layer concepts that are common to users, engine developers, and advanced developers:</p> <ul> <li><p><strong>Repository</strong> is the highest level of data access abstraction and is where all engines and PredictionIO itself access data with.</p></li> <li><p><strong>Source</strong> is the actual data store backend that provide data access. A source is an implementation of the set of data access interfaces defined by <em>repositories</em>.</p></li> </ul> <p>Each of them will be explained in detail below:</p><h3 id='repositories' class='header-anchors'>Repositories</h3><p><em>Repository</em> is the highest level of data access abstraction and is where all engines and PredictionIO itself access data with.</p><p>The storage layer currently defines three mandatory data repositories: <em>meta data</em>, <em>event data</em>, and <em>model data</em>. Each repository has its own set of data access interfaces.</p> <ul> <li><p><strong>Meta data</strong> is used by PredictionIO to store engine training and evaluation information. Commands like <code>pio build</code>, <code>pio train</code>, <code>pio deploy</code>, and <code>pio eval</code> all access meta data.</p></li> <li><p><strong>Event data</strong> is used by the Event Server to collect events, and by engines to source data.</p></li> <li><p><strong>Model data</strong> is used by PredictionIO for automatic persistence of trained models.</p></li> </ul> <p>The following configuration variables are used for configure these repositories:</p> <ul> <li><em>Meta data</em> is configured by the <code>PIO_STORAGE_REPOSITORIES_METADATA_XXX</code> variables.</li> <li><em>Event data</em> is configured by the <code>PIO_STORAGE_REPOSITORIES_EVENTDATA_XXX</code> variables.</li> <li><em>Model data</em> is configured by the <code>PIO_STORAGE_REPOSITORIES_MODELDATA_XXX</code> variables.</li> </ul> <p>Configuration variables will be explained in more details in later sections below (see Data Store Configuration).</p><p>For example, you may see the following configuration variables defined in <code>conf/pio-env.sh</code></p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4
5
6
7
8</pre></td><td class="code"><pre><span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_NAME</span><span class="o">=</span>predictionio_metadata
<span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_SOURCE</span><span class="o">=</span>ELASTICSEARCH
<span class="nv">PIO_STORAGE_REPOSITORIES_EVENTDATA_NAME</span><span class="o">=</span>predictionio_eventdata
<span class="nv">PIO_STORAGE_REPOSITORIES_EVENTDATA_SOURCE</span><span class="o">=</span>HBASE
<span class="nv">PIO_STORAGE_REPOSITORIES_MODELDATA_NAME</span><span class="o">=</span>pio_
<span class="nv">PIO_STORAGE_REPOSITORIES_MODELDATA_SOURCE</span><span class="o">=</span>LOCALFS
</pre></td></tr></tbody></table> </div> <p>The configuration variable with the <em>NAME</em> suffix controls the namespace used by the <em>source</em>.</p><p>The configuration variable with the <em>SOURCE</em> suffix points to the actual <strong>source</strong> that will back this repository. <em>Source</em> will be explained below.</p><h3 id='sources' class='header-anchors'>Sources</h3><p><em>Sources</em> are actual data store backends that provide data access. A source is an implementation of the set of data access interfaces defined by <em>repositories</em>.</p><p>PredictionIO comes with the following sources:</p> <ul> <li><p><strong>JDBC</strong> (tested on MySQL and PostgreSQL):</p> <ul> <li>Type name is <strong>jdbc</strong>.</li> <li>Can be used for <em>Meta Data</em>, <em>Event Data</em> and <em>Model Data</em> repositories</li> </ul></li> <li><p><strong>Elasticsearch</strong>:</p> <ul> <li>Type name is <strong>elasticsearch</strong></li> <li>Can be used for <em>Meta Data</em> repository</li> </ul></li> <li><p><strong>Apache HBase</strong>:</p> <ul> <li>Type name is <strong>hbase</strong></li> <li>Can be used for <em>Event Data</em> repository</li> </ul></li> <li><p><strong>Local file system</strong>:</p> <ul> <li>Type name is <strong>localfs</strong></li> <li>Can be used for <em>Model Data</em> repository</li> </ul></li> <li><p><strong>HDFS</strong>:</p> <ul> <li>Type name is <strong>hdfs</strong>.</li> <li>Can be used for <em>Model Data</em> repository</li> </ul></li> </ul> <p>Each repository can be configured to use different sources as shown above.</p><p>Each source has its own set of configuration parameters. Configuration variables will be explained in more details in later sections below (see Data Store Configuration).</p><p>The following is an example source configuration with name &quot;PGSQL&quot; with type <code>jdbc</code>:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3
4</pre></td><td class="code"><pre><span class="nv">PIO_STORAGE_SOURCES_PGSQL_TYPE</span><span class="o">=</span>jdbc
<span class="nv">PIO_STORAGE_SOURCES_PGSQL_URL</span><span class="o">=</span>jdbc:postgresql:predictionio
<span class="nv">PIO_STORAGE_SOURCES_PGSQL_USERNAME</span><span class="o">=</span>pio
<span class="nv">PIO_STORAGE_SOURCES_PGSQL_PASSWORD</span><span class="o">=</span>pio
</pre></td></tr></tbody></table> </div> <p>The following is an example of using this source &quot;PGSQL&quot; for the <em>meta data</em> repository:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2</pre></td><td class="code"><pre><span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_NAME</span><span class="o">=</span>predictionio_metadata
<span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_SOURCE</span><span class="o">=</span>PGSQL
</pre></td></tr></tbody></table> </div> <h2 id='data-store-configuration' class='header-anchors'>Data Store Configuration</h2><p>Data store configuration is done by settings environmental variables. If you set them inside <code>conf/pio-env.sh</code>, they will be automatically available whenever you perform a <code>pio</code> command, e.g. <code>pio train</code>.</p><p>Notice that all variables are prefixed by <code>PIO_STORAGE_</code>.</p><h3 id='repositories-configuration' class='header-anchors'>Repositories Configuration</h3><p>Variable Format: <code>PIO_STORAGE_REPOSITORIES_&lt;REPO&gt;_&lt;KEY&gt;</code></p><p>Configuration variables of repositories are prefixed by <code>PIO_STORAGE_REPOSITORIES_</code>, followed by the repository name (e.g. <code>METADATA</code>), and then either <code>NAME</code> or <code>SOURCE</code>.</p><p>Consider the following example:</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2</pre></td><td class="code"><pre><span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_NAME</span><span class="o">=</span>predictionio_metadata
<span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_SOURCE</span><span class="o">=</span>PGSQL
</pre></td></tr></tbody></table> </div> <p>The above configures PredictionIO to look for a source configured with the name <code>PGSQL</code>, and use <code>predictionio_metadata</code> as the namespace within such source. There is no restriction on namespace usage by the source, so behavior may vary. As an example, the official JDBC source uses the namespace as database table prefix.</p><h3 id='sources-configuration' class='header-anchors'>Sources Configuration</h3><p>Variable Format: <code>PIO_STORAGE_SOURCES_&lt;NAME&gt;_&lt;KEY&gt;</code></p><p>Configuration variables of sources are prefixed by <code>PIO_STORAGE_SOURCES_</code>, followed by the source name of choice (e.g. <code>PGSQL</code>, <code>MYSQL</code>, <code>HBASE</code>, etc), and a configuration <code>KEY</code>.</p><div class="alert-message info"><p>The <code>TYPE</code> configuration key is mandatory. It is used by PredictionIO to determine the actual driver type to load.</p></div><p>Depending on what the source <code>TYPE</code> is, different configuration keys are required.</p><h4 id='jdbc-configuration' class='header-anchors'>JDBC Configuration</h4><p>Variable Format: <code>PIO_STORAGE_SOURCES_[NAME]_TYPE=jdbc</code></p><p>Supported Repositories: <strong>meta</strong>, <strong>event</strong>, <strong>model</strong></p><p>Tested on: MySQL 5.1+, PostgreSQL 9.1+</p><p>When <code>TYPE</code> is set to <code>jdbc</code>, the following configuration keys are supported.</p> <ul> <li><p> URL (mandatory)</p><p>The value must be a valid JDBC URL that points to a database, e.g. <code>PIO_STORAGE_SOURCES_PGSQL_URL=jdbc:postgresql:predictionio</code></p></li> <li><p> USERNAME (mandatory)</p><p>The value must be a valid, non-empty username for the JDBC connection, e.g. <code>PIO_STORAGE_SOURCES_PGSQL_USERNAME=pio_user</code></p></li> <li><p> PASSWORD (mandatory)</p><p>The value must be a valid, non-empty password for the JDBC connection, e.g. <code>PIO_STORAGE_SOURCES_PGSQL_PASSWORD=pio_user_password</code></p></li> <li><p> PARTITIONS (optional, default to 4)</p><p>This value is used by Apache Spark to determine the number of partitions to use when it reads from the JDBC connection, e.g. <code>PIO_STORAGE_SOURCES_PGSQL_PARTITIONS=4</code></p></li> <li><p> CONNECTIONS (optional, default to 8)</p><p>This value is used by scalikejdbc library to determine the max size of connection pool, e.g. <code>PIO_STORAGE_SOURCES_PGSQL_CONNECTIONS=8</code></p></li> <li><p> INDEX (optional since v0.9.6, default to disabled)</p><p>This value is used by creating indexes on entityId and entityType columns to improve performance when findByEntity function is called. Note that these columns of entityId and entityType will be created as varchar(255), e.g. <code>PIO_STORAGE_SOURCES_PGSQL_INDEX=enabled</code></p></li> </ul> <h4 id='apache-hbase-configuration' class='header-anchors'>Apache HBase Configuration</h4><p>Variable Format: <code>PIO_STORAGE_SOURCES_[NAME]_TYPE=hbase</code></p><p>Supported Repositories: <strong>event</strong></p><p>Tested on: Apache HBase 0.98.5+, 1.0.0+</p><p>When <code>TYPE</code> is set to <code>hbase</code>, no other configuration keys are required. Other client side HBase configuration must be done through <code>hbase-site.xml</code> pointed by the <code>HBASE_CONF_DIR</code> configuration variable.</p><h4 id='elasticsearch-configuration' class='header-anchors'>Elasticsearch Configuration</h4><p>Variable Format: <code>PIO_STORAGE_SOURCES_[NAME]_TYPE=elasticsearch</code></p><p>Supported Repositories: <strong>meta</strong></p><p>When <code>TYPE</code> is set to <code>elasticsearch</code>, the following configuration keys are supported.</p> <ul> <li><p> HOSTS (mandatory)</p><p>Comma-separated list of hostnames, e.g. <code>PIO_STORAGE_SOURCES_ES_HOSTS=es1,es2,es3</code></p></li> <li><p> PORTS (mandatory)</p><p>Comma-separated list of ports that corresponds to <code>HOSTS</code>, e.g. <code>PIO_STORAGE_SOURCES_ES_PORTS=9200,9200,9222</code></p></li> <li><p> CLUSTERNAME (optional, default to <code>elasticsearch</code>)</p><p>Elasticsearch cluster name, e.g. <code>PIO_STORAGE_SOURCES_ES_CLUSTERNAME=myescluster</code></p></li> </ul> <div class="alert-message info"><p>Other advanced Elasticsearch parameters can be set by pointing <code>ES_CONF_DIR</code> configuration variable to the location of <code>elasticsearch.yml</code>.</p></div><h4 id='local-file-system-configuration' class='header-anchors'>Local File System Configuration</h4><p>Variable Format: <code>PIO_STORAGE_SOURCES_[NAME]_TYPE=localfs</code></p><p>Supported Repositories: <strong>model</strong></p><p>When <code>TYPE</code> is set to <code>localfs</code>, the following configuration keys are supported.</p> <ul> <li><p> PATH (mandatory)</p><p>File system path at where models are stored, e.g. <code>PIO_STORAGE_SOURCES_FS_PATH=/mymodels</code></p></li> </ul> <h4 id='hdfs-configuration' class='header-anchors'>HDFS Configuration</h4><p>Variable Format: <code>PIO_STORAGE_SOURCES_[NAME]_TYPE=hdfs</code></p><p>Supported Repositories: <strong>model</strong></p><p>When <code>TYPE</code> is set to <code>hdfs</code>, the following configuration keys are supported.</p> <ul> <li><p> PATH (mandatory)</p><p>HDFS path at where models are stored, e.g. <code>PIO_STORAGE_SOURCES_HDFS_PATH=/mymodels</code></p></li> </ul> <h2 id='adding-support-of-other-backends' class='header-anchors'>Adding Support of Other Backends</h2><p>It is quite straightforward to implement support of other backends. A good starting point is to reference the JDBC implementation inside the <a href="https://github.com/PredictionIO/PredictionIO/tree/develop/data/src/main/scala/io/prediction/data/storage/jdbc">io.prediction.data.storage.jdbc package</a>.</p><p>Contributions of different backends implementation is highly encouraged. To start contributing, please refer to <a href="/community/contribute-code/">this guide</a>.</p><h3 id='deploying-your-custom-backend-support-as-a-plugin' class='header-anchors'>Deploying Your Custom Backend Support as a Plugin</h3><p>It is possible to deploy your custom backend implementation as a standalone JAR apart from the main PredictionIO binary distribution. The following is an outline of how this can be achieved.</p> <ol> <li><p> Create an SBT project with a library dependency on PredictionIO&#39;s data access base traits (inside the <code>data</code> artifact).</p></li> <li><p> Implement traits that you intend to support, and package everything into a big fat JAR (e.g. sbt-assembly).</p></li> <li><p> Create a directory named <code>plugins</code> inside PredictionIO binary installation.</p></li> <li><p> Copy the JAR from step 2 to <code>plugins</code>.</p></li> <li><p> In storage configuration, specify <code>TYPE</code> as your complete package name. As an example, if you have implemented all your traits under the package name <code>org.mystorage.jdbc</code>, use something like</p><div class="highlight shell"><table style="border-spacing: 0"><tbody><tr><td class="gutter gl" style="text-align: right"><pre class="lineno">1
2
3</pre></td><td class="code"><pre><span class="nv">PIO_STORAGE_SOURCES_MYJDBC_TYPE</span><span class="o">=</span>org.mystorage.jdbc
...
<span class="nv">PIO_STORAGE_REPOSITORIES_METADATA_SOURCE</span><span class="o">=</span>MYJDBC
</pre></td></tr></tbody></table> </div> <p>to instruct PredictionIO to pick up <code>StorageClient</code> from the appropriate package.</p></li> <li><p> Now you should be able to use your custom source and assign it to different repositories as you wish.</p></li> </ol> </div></div></div></div><footer><div class="container"><div class="seperator"></div><div class="row"><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Community</h4><ul><li><a href="//docs.prediction.io/install/" target="blank">Download</a></li><li><a href="//docs.prediction.io/" target="blank">Docs</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">GitHub</a></li><li><a href="mailto:user-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to User Mailing List</a></li><li><a href="//stackoverflow.com/questions/tagged/predictionio" target="blank">Stackoverflow</a></li></ul></div></div><div class="col-md-6 col-xs-6 footer-link-column"><div class="footer-link-column-row"><h4>Contribute</h4><ul><li><a href="//predictionio.incubator.apache.org/community/contribute-code/" target="blank">Contribute</a></li><li><a href="//github.com/apache/incubator-predictionio" target="blank">Source Code</a></li><li><a href="//issues.apache.org/jira/browse/PIO" target="blank">Bug Tracker</a></li><li><a href="mailto:dev-subscribe@predictionio.incubator.apache.org" target="blank">Subscribe to Development Mailing List</a></li></ul></div></div></div></div><div id="footer-bottom"><div class="container"><div class="row"><div class="col-md-12"><div id="footer-logo-wrapper"><img alt="PredictionIO" src="/images/logos/logo-white-d1e9c6e6.png"/></div><div id="social-icons-wrapper"><a class="github-button" href="https://github.com/apache/incubator-predictionio" data-style="mega" data-count-href="/apache/incubator-predictionio/stargazers" data-count-api="/repos/apache/incubator-predictionio#stargazers_count" data-count-aria-label="# stargazers on GitHub" aria-label="Star apache/incubator-predictionio on GitHub">Star</a> <a class="github-button" href="https://github.com/apache/incubator-predictionio/fork" data-icon="octicon-git-branch" data-style="mega" data-count-href="/apache/incubator-predictionio/network" data-count-api="/repos/apache/incubator-predictionio#forks_count" data-count-aria-label="# forks on GitHub" aria-label="Fork apache/incubator-predictionio on GitHub">Fork</a> <script id="github-bjs" async="" defer="" src="https://buttons.github.io/buttons.js"></script><a href="//www.facebook.com/predictionio" target="blank"><img alt="PredictionIO on Twitter" src="/images/icons/twitter-ea9dc152.png"/></a> <a href="//twitter.com/predictionio" target="blank"><img alt="PredictionIO on Facebook" src="/images/icons/facebook-5c57939c.png"/></a> </div></div></div></div></div></footer></div><script>(function(w,d,t,u,n,s,e){w['SwiftypeObject']=n;w[n]=w[n]||function(){
(w[n].q=w[n].q||[]).push(arguments);};s=d.createElement(t);
e=d.getElementsByTagName(t)[0];s.async=1;s.src=u;e.parentNode.insertBefore(s,e);
})(window,document,'script','//s.swiftypecdn.com/install/v1/st.js','_st');
_st('install','HaUfpXXV87xoB_zzCQ45');</script><script src="/javascripts/application-5a24945b.js"></script></body></html>