Merge branch 'develop'
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 40647a0..8007edf 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -54,7 +54,7 @@
# The short X.Y version.
version = '0.8'
# The full version, including alpha/beta/rc tags.
-release = '0.8.0'
+release = '0.8.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/docs/source/predictionio.rst b/docs/source/predictionio.rst
index e7b90e7..a31f155 100644
--- a/docs/source/predictionio.rst
+++ b/docs/source/predictionio.rst
@@ -10,7 +10,7 @@
and extract prediction results.
Please read `PredictionIO Quick Start
-<http://docs.prediction.io/0.8.0/tutorials/engines/quickstart.html>`_ for
+<http://docs.prediction.io/0.8.2/recommendation/quickstart.html>`_ for
detailed explanation.
predictionio.EventClient Class
@@ -104,7 +104,7 @@
For example, to import 100000 of user records::
>>> # generate 100000 asynchronous requests and store the AsyncRequest objects
- >>> event_client = EventClient(app_id=1)
+ >>> event_client = EventClient(access_key=<YOUR_ACCESS_KEY>)
>>> for i in range(100000):
>>> event_client.aset_user(user_record[i].uid)
>>>
diff --git a/examples/demo-movielens/README.md b/examples/demo-movielens/README.md
index 4f7f47e..b555417 100644
--- a/examples/demo-movielens/README.md
+++ b/examples/demo-movielens/README.md
@@ -5,12 +5,12 @@
Step 1. Get sample data and unzip it.
```
-$ curl -o ml-100k.zip http://www.grouplens.org/system/files/ml-100k.zip
+$ curl -o ml-100k.zip http://files.grouplens.org/datasets/movielens/ml-100k.zip
$ unzip ml-100k.zip
```
Step 2. Run this app:
```
-$ python -m examples.demo-movielens.batch_import <app_id> <server_url>
+$ python -m examples.demo-movielens.batch_import <access_key> <server_url>
```
diff --git a/examples/demo-movielens/batch_import.py b/examples/demo-movielens/batch_import.py
index fa2e1c0..762a1f3 100644
--- a/examples/demo-movielens/batch_import.py
+++ b/examples/demo-movielens/batch_import.py
@@ -3,11 +3,27 @@
import predictionio
import sys
import pytz
+import datetime
-def batch_import_task(app_id, app_data, client, all_info=False):
+def batch_import_task(app_data, client, all_info=False):
+ # event_time is an important properties used by the PredictionIO platform. It
+ # is particularly useful in generating training and testing set, which uses
+ # event_time for splitting. Hence, when we import data, better to make the
+ # event_time as approximate to fact as possible.
+ #
+ # However, in many cases, the data doesn't come with a time. Movie-lens' user
+ # data, for example, only reveals the age, gender, occupation, and zip code of
+ # a user. It doesn't report when the user is "created". Likewise, for items,
+ # it only reports the release date.
+ #
+ # To remedy this problem, we have to make some assumptions to the data. In
+ # this import script, the event_time for user is set to epoch=0, and the
+ # event_time for item is set to the release_date + 00:00:00 UTC.
print "[Info] Importing users to PredictionIO..."
+ user_create_time = datetime.datetime.fromtimestamp(0, tz=pytz.utc)
count = 0
+ set_user_request_list = []
for k, v in app_data.get_users().iteritems():
count += 1
if all_info:
@@ -17,13 +33,20 @@
sys.stdout.write('\r[Info] %s' % count)
sys.stdout.flush()
- client.aset_user(uid=v.uid)
+ set_user_request_list.append(
+ client.aset_user(uid=v.uid, event_time=user_create_time))
+ [r.get_response() for r in set_user_request_list]
sys.stdout.write('\r[Info] %s users were imported.\n' % count)
sys.stdout.flush()
print "[Info] Importing items to PredictionIO..."
count = 0
+ set_item_request_list = []
+ # event_time is a datetime, hence need to add a time component to the release
+ # date.
+ midnight_utc = datetime.time(0, 0, 0, tzinfo=pytz.utc)
+ epoch = datetime.datetime.fromtimestamp(0, tz=pytz.utc)
for k, v in app_data.get_items().iteritems():
count += 1
if all_info:
@@ -34,18 +57,34 @@
sys.stdout.flush()
itypes = ("movie",) + v.genres
- client.aset_item(iid=v.iid,
- properties={
- "pio_itypes" : list(itypes),
- "pio_starttime" : v.release_date.isoformat() + 'Z',
- "name" : v.name,
- "year" : v.year } )
+ release_datetime = datetime.datetime.combine(
+ v.release_date,
+ midnight_utc)
+
+ # event_time must be after epoch.
+ event_time = release_datetime if release_datetime > epoch else epoch
+
+ utf8_name = v.name.decode('utf-8', 'ignore')
+
+ set_item_request = client.aset_item(
+ iid=v.iid,
+ event_time=event_time,
+ properties={
+ "pio_itypes": list(itypes),
+ "pio_starttime": release_datetime.isoformat(),
+ "name": utf8_name,
+ "year": v.year } )
+
+ set_item_request_list.append(set_item_request)
+
+ [r.get_response() for r in set_item_request_list]
sys.stdout.write('\r[Info] %s items were imported.\n' % count)
sys.stdout.flush()
print "[Info] Importing rate actions to PredictionIO..."
count = 0
+ create_event_request_list = []
for v in app_data.get_rate_actions():
count += 1
if all_info:
@@ -66,6 +105,9 @@
event_time=v.t.replace(tzinfo=pytz.utc),
)
+ create_event_request_list.append(req)
+
+ [r.get_response() for r in create_event_request_list]
sys.stdout.write('\r[Info] %s rate actions were imported.\n' % count)
sys.stdout.flush()
@@ -73,12 +115,12 @@
if __name__ == '__main__':
if len(sys.argv) < 3:
sys.exit("Usage: python -m examples.demo-movielens.batch_import "
- "<app_id> <url>")
+ "<access_key> <url>")
- app_id = int(sys.argv[1])
+ access_key = sys.argv[1]
client = predictionio.EventClient(
- app_id=app_id,
+ access_key=access_key,
url=sys.argv[2],
threads=5,
qsize=500)
@@ -87,5 +129,5 @@
print "Status:", client.get_status()
app_data = AppData()
- batch_import_task(app_id, app_data, client)
+ batch_import_task(app_data, client)
client.close()
diff --git a/examples/event_sample.py b/examples/event_sample.py
index e2a1411..0a7a339 100644
--- a/examples/event_sample.py
+++ b/examples/event_sample.py
@@ -4,7 +4,10 @@
import pytz
import sys
-client = EventClient(app_id=4, url="http://localhost:7070")
+access_key = None
+assert access_key is not None, "Please create an access key with 'pio app new'"
+
+client = EventClient(access_key=access_key, url="http://localhost:7070")
# Check status
print("Check status")
diff --git a/examples/import_yahoo.py b/examples/import_yahoo.py
index 69421d4..93b367d 100644
--- a/examples/import_yahoo.py
+++ b/examples/import_yahoo.py
@@ -2,13 +2,14 @@
Import historical stock data from yahoo finance.
"""
-import argparse
from datetime import datetime
+from pandas.io import data as pdata
+import argparse
+import numpy
import predictionio
import pytz
+import sys
import time
-from pandas.io import data as pdata
-import numpy
EPOCH = datetime(1970, 1, 1, tzinfo=pytz.utc)
@@ -70,7 +71,7 @@
return (dt - EPOCH).total_seconds()
-def import_data(client, app_id, ticker, start_time, end_time, event_time):
+def import_data(client, access_key, ticker, start_time, end_time, event_time):
print "Importing:", ticker, start_time, end_time
try:
@@ -114,7 +115,7 @@
print(response)
-def import_all(app_id):
+def import_all(access_key):
"""This method import all SP500 stocks and some SPDR ETFs."""
time_slices = [
(datetime(1999, 1, 1), datetime(2004, 1, 1), datetime(2004, 1, 2)),
@@ -123,17 +124,17 @@
]
url = 'http://localhost:7070'
- client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
+ client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
tickers = SP500_LIST + ETF_LIST
for ticker in tickers:
for time_slice in time_slices:
- import_data(client, app_id, ticker,
+ import_data(client, access_key, ticker,
time_slice[0], time_slice[1], time_slice[2])
-def import_data_with_gaps(app_id):
+def import_data_with_gaps(access_key):
"""This method import data with time gaps.
Data imported by this method is used by stock engine, it demonsrates how it
@@ -154,11 +155,11 @@
tickers = ['SPY', 'AAPL', 'IBM', 'MSFT']
url = 'http://localhost:7070'
- client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
+ client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
for ticker in tickers:
for time_slice in time_slices:
- import_data(client, app_id, ticker,
+ import_data(client, access_key, ticker,
time_slice[0], time_slice[1], time_slice[2])
# below are data with holes
@@ -171,7 +172,7 @@
tickers = ['AMZN']
for ticker in tickers:
for time_slice in time_slices:
- import_data(client, app_id, ticker,
+ import_data(client, access_key, ticker,
time_slice[0], time_slice[1], time_slice[2])
time_slices = [
@@ -181,11 +182,11 @@
tickers = ['FB']
for ticker in tickers:
for time_slice in time_slices:
- import_data(client, app_id, ticker,
+ import_data(client, access_key, ticker,
time_slice[0], time_slice[1], time_slice[2])
-def import_one(app_id):
+def import_one(access_key):
"""Import TSLA.
Import data with from 2014-01-01 until 2014-03-01. event_time specifies when
@@ -197,12 +198,16 @@
ticker = 'TSLA'
url = 'http://localhost:7070'
- client = predictionio.EventClient(app_id=app_id, threads=1, url=url)
+ client = predictionio.EventClient(access_key=access_key, threads=1, url=url)
- import_data(client, app_id, ticker, start_time, end_time, event_time)
+ import_data(client, access_key, ticker, start_time, end_time, event_time)
if __name__ == '__main__':
- #import_all(app_id=2)
- import_data_with_gaps(app_id=1)
- #import_one(app_id=1)
+ if len(sys.argv) < 2:
+ sys.exit("Usage: python -m examples.import_yahoo <access_key>")
+
+ access_key = sys.argv[1]
+ import_all(access_key=access_key)
+ #import_data_with_gaps(access_key=access_key)
+ #import_one(access_key=access_key)
diff --git a/examples/itemrank_quick_start.py b/examples/itemrank_quick_start.py
index b76abc1..846d1d5 100644
--- a/examples/itemrank_quick_start.py
+++ b/examples/itemrank_quick_start.py
@@ -5,12 +5,13 @@
import predictionio
import random
+import sys
-def import_itemrank(app_id):
+def import_itemrank(access_key):
random.seed()
- client = predictionio.EventClient(app_id=app_id)
+ client = predictionio.EventClient(access_key)
print client.get_status()
@@ -39,4 +40,6 @@
if __name__ == '__main__':
- import_itemrank(7)
+ if len(sys.argv) < 2:
+ sys.exit("Usage: python -m examples.itemrank_quick_start <access_key>")
+ import_itemrank(sys.argv[1])
diff --git a/predictionio/__init__.py b/predictionio/__init__.py
index 3b6fe00..ecb6f27 100644
--- a/predictionio/__init__.py
+++ b/predictionio/__init__.py
@@ -5,7 +5,7 @@
"""
-__version__ = "0.8.1"
+__version__ = "0.8.2"
# import deprecated libraries.
from predictionio.obsolete import Client
@@ -151,7 +151,10 @@
class EventClient(BaseClient):
"""Client for importing data into PredictionIO Event Server.
- :param app_id: the id used to identify application data.
+ Notice that app_id has been deprecated as of 0.8.2. Please use access_token
+ instead.
+
+ :param access_key: the access key for your application.
:param url: the url of PredictionIO Event Server.
:param threads: number of threads to handle PredictionIO API requests.
Must be >= 1.
@@ -162,13 +165,24 @@
:param timeout: timeout for HTTP connection attempts and requests in
seconds (optional).
Default value is 5.
-
"""
- def __init__(self, app_id, url="http://localhost:7070",
+ def __init__(self, access_key,
+ url="http://localhost:7070",
threads=1, qsize=0, timeout=5):
+ assert type(access_key) is str, ("access_key must be string. "
+ "Notice that app_id has been deprecated in Prediction.IO 0.8.2. "
+ "Please use access_key instead.")
+
super(EventClient, self).__init__(url, threads, qsize, timeout)
- self.app_id = app_id
+
+ if len(access_key) <= 8:
+ raise DeprecationWarning(
+ "It seems like you are specifying an app_id. It is deprecated in "
+ "Prediction.IO 0.8.2. Please use access_key instead. Or, "
+ "you may use an earlier version of this sdk.")
+
+ self.access_key = access_key
def acreate_event(self, event, entity_type, entity_id,
target_entity_type=None, target_entity_id=None, properties=None,
@@ -194,7 +208,6 @@
object to get the final resuls or status of this asynchronous request.
"""
data = {
- "appId": self.app_id,
"event": event,
"entityType": entity_type,
"entityId": entity_id,
@@ -215,7 +228,7 @@
et_str = et.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + et.strftime("%z")
data["eventTime"] = et_str
- path = "/events.json"
+ path = "/events.json?accessKey=" + self.access_key
request = AsyncRequest("POST", path, **data)
request.set_rfunc(self._acreate_resp)
self._connection.make_request(request)
diff --git a/setup.py b/setup.py
index 8a98273..48ce914 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
setup(
name='PredictionIO',
- version="0.8.1",
+ version="0.8.2",
author=__author__,
author_email=__email__,
packages=['predictionio'],