blob: 6c28825f7c52b7578842d3a192e74ee033f8d26f [file] [log] [blame]
---
title: Quick Start - Complementary Purchase Engine Template
---
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
## Overview
This engine template recommends the complementary items which most user frequently buy at the same time together with one or more items in the query.
## Usage
### Event Data Requirements
By default, the template requires the following events to be collected:
- user 'buy' item events
INFO: A correct eventTime should be used in order for engine to determine if the items being bought are in the same 'basket'.
NOTE: You can customize to use other event.
### Input Query
- set of items
- num of recommends items per condition
### Output PredictedResult
- array of condition and top n recommended items given the condition. The engine will use each combination of the query items as condition.
## 1. Install and Run PredictionIO
<%= partial 'shared/quickstart/install' %>
## 2. Create a new Engine from an Engine Template
<%= partial 'shared/quickstart/create_engine', locals: { engine_name: 'MyComplementaryPurchase', template_name: 'Complementary Purchase Engine Template', template_repo: 'PredictionIO/template-scala-parallel-complementarypurchase' } %>
## 3. Generate an App ID and Access Key
<%= partial 'shared/quickstart/create_app' %>
## 4. Collecting Data
Next, let's collect training data for this Engine. By default,
Complementary Purchase Engine Template supports the following entities: **user**, **item**. A user buys an item. This template requires user-buy-item events.
Note that the engine requires correct buy event time being used in order to determine if the items being bought are in the same 'basket', which is configured by the 'basketWindow' parameter. Using an unreal event time for the buy events will cause an incorrect model. If you use SDK, the current time is used as event time by default.
WARNING: In particular, make sure correct event time is specified if you import data in batch (i.e. not in real time). If the event time is omitted, the SDK will use **current time** as event time which is not the actual time of the buy event in this case!
<%= partial 'shared/quickstart/collect_data' %>
When an user u0 buys item i0 on time `2014-11-02T09:39:45.618-08:00` (current time will be used if eventTime is not specified), you can send a buy event. Run the following `curl` command:
<div class="tabs">
<div data-tab="REST API" data-lang="json">
```
$ curl -i -X POST http://localhost:7070/events.json?accessKey=$ACCESS_KEY \
-H "Content-Type: application/json" \
-d '{
"event" : "buy",
"entityType" : "user",
"entityId" : "u0",
"targetEntityType" : "item",
"targetEntityId" : "i0",
"eventTime" : "2014-11-02T09:39:45.618-08:00"
}'
```
</div>
<div data-tab="Python SDK" data-lang="python">
```python
import predictionio
client = predictionio.EventClient(
access_key=<ACCESS KEY>,
url=<URL OF EVENTSERVER>,
threads=5,
qsize=500
)
# A user buys an item (use current time as event time)
client.create_event(
event="buy",
entity_type="user",
entity_id=<USER ID>,
target_entity_type="item",
target_entity_id=<ITEM ID>
)
# A user buys an item (explicitly specify event time)
client.create_event(
event="buy",
entity_type="user",
entity_id=<USER ID>,
target_entity_type="item",
target_entity_id=<ITEM ID>,
event_time=<EVENT_TIME>
)
```
</div>
<div data-tab="PHP SDK" data-lang="php">
```php
<?php
require_once("vendor/autoload.php");
use predictionio\EventClient;
$client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);
// A user buys an item (use current time as event time)
$client->createEvent(array(
'event' => 'buy',
'entityType' => 'user',
'entityId' => <USER ID>,
'targetEntityType' => 'item',
'targetEntityId' => <ITEM ID>
));
// A user buys an item (explicitly specify event time)
$client->createEvent(array(
'event' => 'buy',
'entityType' => 'user',
'entityId' => <USER ID>,
'targetEntityType' => 'item',
'targetEntityId' => <ITEM ID>,
'eventTime' => <EVENT_TIME>
));
?>
```
</div>
<div data-tab="Ruby SDK" data-lang="ruby">
```ruby
# Create a client object.
client = PredictionIO::EventClient.new(<ACCESS KEY>, <URL OF EVENTSERVER>)
# A user buys an item (use current time as event time)
client.create_event(
'buy',
'user',
<USER ID>, {
'targetEntityType' => 'item',
'targetEntityId' => <ITEM ID>
}
)
# A user buys an item (explicitly specify event time)
client.create_event(
'buy',
'user',
<USER ID>, {
'targetEntityType' => 'item',
'targetEntityId' => <ITEM ID>,
'eventTime' => <EVENT_TIME>
}
)
```
</div>
<div data-tab="Java SDK" data-lang="java">
```java
import org.apache.predictionio.Event;
import org.apache.predictionio.EventClient;
import com.google.common.collect.ImmutableList;
EventClient client = new EventClient(<ACCESS KEY>, <URL OF EVENTSERVER>);
// A user buys an item (use current time as event time)
Event buyEvent = new Event()
.event("buy")
.entityType("user")
.entityId(<USER_ID>)
.targetEntityType("item")
.targetEntityId(<ITEM_ID>)
client.createEvent(buyEvent);
// A user buys an item (explicitly specify event time)
Event buyEvent = new Event()
.event("buy")
.entityType("user")
.entityId(<USER_ID>)
.targetEntityType("item")
.targetEntityId(<ITEM_ID>)
.eventTime(<EVENT_TIME>)
client.createEvent(buyEvent);
```
</div>
</div>
<%= partial 'shared/quickstart/query_eventserver' %>
### Import More Sample Data
<%= partial 'shared/quickstart/import_sample_data' %>
A Python import script `import_eventserver.py` is provided to import sample data. The script generates some frequent item sets (prefix with "s"), some other random items (prefix with "i") and a few popular items (prefix with "p"). Then each user (with user ID "u1" to "u10") performs 5 buy transactions (buy events are within 10 seconds in each transaction). In each transaction, the user may or may not buy some random items, always buy one of the popular items and buy 2 or more items in one of the frequent item sets.
<%= partial 'shared/quickstart/install_python_sdk' %>
Make sure you are under the `MyComplementaryPurchase` directory. Execute the following to import the data:
```
$ cd MyComplementaryPurchase
$ python data/import_eventserver.py --access_key $ACCESS_KEY
```
You should see the following output:
```
...
User u10 buys item i20 at 2014-10-19 15:42:35.618000-07:53
User u10 buys item i5 at 2014-10-19 15:42:45.618000-07:53
User u10 buys item p3 at 2014-10-19 15:42:55.618000-07:53
User u10 buys item s2i3 at 2014-10-19 15:43:05.618000-07:53
User u10 buys item s2i1 at 2014-10-19 15:43:15.618000-07:53
225 events are imported.
```
<%= partial 'shared/quickstart/query_eventserver_short' %>
## 5. Deploy the Engine as a Service
<%= partial 'shared/quickstart/deploy_enginejson', locals: { engine_name: 'MyComplementaryPurchase' } %>
<%= partial 'shared/quickstart/deploy', locals: { engine_name: 'MyComplementaryPurchase' } %>
## 6. Use the Engine
Now, You can query the engine. For example, return top 3 items which are frequently bought with item "s2i1". You can sending this JSON '{ "items" : ["s2i1"], "num" : 3 }' to the deployed engine. The engine will return a JSON with the recommended items.
If you include one or more items in the query, the engine will use each combination of the query items as condition, and return recommended items if there is any for this condition. For example, if you query items are ["A", "B"], then the engine will use ["A"], ["B"], and ["A", "B"] as condition and try to find top n recommended items for each combination.
You can simply send a query by making a HTTP request or through the `EngineClient` of an SDK.
With the deployed engine running, open another terminal and run the following `curl` command or use SDK to send the query:
<div class="tabs">
<div data-tab="REST API" data-lang="json">
```
$ curl -H "Content-Type: application/json" \
-d '{
"items" : ["s2i1"],
"num" : 3
}' \
http://localhost:8000/queries.json
```
</div>
<div data-tab="Python SDK" data-lang="python">
```python
import predictionio
engine_client = predictionio.EngineClient(url="http://localhost:8000")
print engine_client.send_query({
"items" : ["s2i1"],
"num" : 3
})
```
</div>
<div data-tab="PHP SDK" data-lang="php">
```php
<?php
require_once("vendor/autoload.php");
use predictionio\EngineClient;
$client = new EngineClient('http://localhost:8000');
$response = $client->sendQuery(array(
'items' => array('s2i1'),
'num' => 3
));
print_r($response);
?>
```
</div>
<div data-tab="Ruby SDK" data-lang="ruby">
```ruby
# Create client object.
client = PredictionIO::EngineClient.new('http://localhost:8000')
# Query PredictionIO.
response = client.send_query(
'items' => ['s2i1'],
'num' => 3
)
puts response
```
</div>
<div data-tab="Java SDK" data-lang="java">
```java
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableList;
import com.google.gson.JsonObject;
import org.apache.predictionio.EngineClient;
// create client object
EngineClient engineClient = new EngineClient("http://localhost:8000");
// query
JsonObject response = engineClient.sendQuery(ImmutableMap.<String, Object>of(
"items", ImmutableList.of("s2i1"),
"num", 3
));
```
</div>
</div>
The following is sample JSON response. The `cond` field is one of the combination of query items used as condition to determine other frequently bought items with this condition, followed by top items. If there are multiple conditions with recommended items found, the `rules` array will contain multiple elements, and each correspond to the condition.
```
{
"rules":[
{
"cond":["s2i1"],
"itemScores":[
{
"item":"s2i2",
"support":0.2,
"confidence":0.9090909090909091,
"lift":3.787878787878788
},
{
"item":"s2i3",
"support":0.14,
"confidence":0.6363636363636364,
"lift":3.535353535353535
}
]
}
]
}
```
*MyComplementaryPurchase* is now running.
<%= partial 'shared/quickstart/production' %>
#### [Next: DASE Components Explained](/templates/complementarypurchase/dase/)