Implement an OpenCL component implementation evaluator. Add OpenCL tests, working on Ubuntu and MacOS with both CPU and NVIDIA and ATI GPUs.

git-svn-id: https://svn.apache.org/repos/asf/tuscany/sca-cpp/trunk@1162473 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/.gitignore b/.gitignore
index 792e5b6..c2b43cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -141,4 +141,6 @@
 modules/edit/apps/*/nuvem
 modules/edit/apps/*/lib
 chat-send
+opencl-shell
+opencl-test
 
diff --git a/INSTALL b/INSTALL
index 5e2cb83..ca73ef5 100644
--- a/INSTALL
+++ b/INSTALL
@@ -131,6 +131,10 @@
 Python 2.6.6 (http://www.python.org/)
 Google AppEngine 1.4.0 (http://code.google.com/appengine/)
 
+OpenCL:
+an OpenCL SDK (http://software.intel.com/en-us/articles/opencl-sdk/,
+http://developer.nvidia.com/, http://developer.amd.com/sdks/amdappsdk)
+
 Java:
 a Java 5+ JDK (http://openjdk.java.net/, http://harmony.apache.org/)
 
@@ -178,6 +182,9 @@
 To enable support for Python component implementations:
 --enable-python
 
+To enable support for OpenCL component implementations:
+--enable-opencl
+
 To enable support for Java component implementations:
 --enable-java
 
@@ -229,6 +236,7 @@
 --with-libcloud=$HOME/libcloud-0.3.1-bin \
 --enable-threads \
 --enable-python --with-python=/usr \
+--enable-opencl --with-opencl-include=/usr/include --with-opencl-lib=/usr/lib \
 --enable-gae --with-gae=$HOME/google_appengine \
 --enable-java --with-java=/usr/lib/jvm/default-java \
 --enable-webservice --with-axis2c=$HOME/axis2c-1.6.0-bin \
diff --git a/README b/README
index 6e7b99c..b4d257d 100644
--- a/README
+++ b/README
@@ -64,6 +64,7 @@
  |   |   |-- java             Support for Java components
  |   |   |-- json             JSON data encoding
  |   |   |-- oauth            User signin using OAuth
+ |   |   |-- opencl           Support for OpenCL components
  |   |   |-- openid           User signin using OpenID
  |   |   |-- python           Support for Python components
  |   |   |-- rss              RSS data encoding
diff --git a/configure.ac b/configure.ac
index 28f93fd..66d4e64 100644
--- a/configure.ac
+++ b/configure.ac
@@ -388,6 +388,53 @@
   AM_CONDITIONAL([WANT_PYTHON], false)
 fi
 
+# Enable OpenCL support.
+AC_MSG_CHECKING([whether to enable OpenCL support])
+AC_ARG_ENABLE(opencl, [AS_HELP_STRING([--enable-opencl], [enable OpenCL support [default=yes]])],
+[ case "${enableval}" in
+  no)
+    AC_MSG_RESULT(no)
+    ;;
+  *)
+    AC_MSG_RESULT(yes)
+    want_opencl=true
+    ;;
+  esac ],
+[
+  AC_MSG_RESULT(yes)
+  want_opencl=true
+])
+if test "${want_opencl}" = "true"; then
+
+  # Configure path to OpenCL includes and lib.
+  AC_MSG_CHECKING([for opencl])
+  AC_ARG_WITH([opencl-include], [AC_HELP_STRING([--with-opencl-include=PATH], [path to installed OpenCL 1.1 include dir [default=/usr/include]])], [
+    OPENCL_INCLUDE="${withval}"
+    AC_MSG_RESULT("${withval}")
+  ], [
+    OPENCL_INCLUDE="/usr/include"
+    AC_MSG_RESULT(/usr/include)
+  ])
+  AC_SUBST(OPENCL_INCLUDE)
+  AC_ARG_WITH([opencl-lib], [AC_HELP_STRING([--with-opencl-lib=PATH], [path to installed OpenCL 1.1 lib dir [default=/usr/lib]])], [
+    OPENCL_LIB="${withval}"
+    AC_MSG_RESULT("${withval}")
+  ], [
+    OPENCL_LIB="/usr/lib"
+    AC_MSG_RESULT(/usr/lib)
+  ])
+  AC_SUBST(OPENCL_LIB)
+  if test "${UNAME}" != "Darwin"; then
+    LIBS="-L${OPENCL_LIB} ${defaultlibs}"
+    AC_CHECK_LIB([OpenCL], [clGetDeviceIDs], [], [AC_MSG_ERROR([couldn't find a suitable libOpenCL, use --with-opencl=PATH])])
+  fi
+  AM_CONDITIONAL([WANT_OPENCL], true)
+  AC_DEFINE([WANT_OPENCL], 1, [enable OpenCL support])
+
+else
+  AM_CONDITIONAL([WANT_OPENCL], false)
+fi
+
 # Enable Java support.
 AC_MSG_CHECKING([whether to enable Java support])
 AC_ARG_ENABLE(java, [AS_HELP_STRING([--enable-java], [enable Java support [default=no]])],
@@ -929,6 +976,7 @@
                  modules/http/Makefile
                  modules/server/Makefile
                  modules/python/Makefile
+                 modules/opencl/Makefile
                  modules/java/Makefile
                  modules/openid/Makefile
                  modules/oauth/Makefile
diff --git a/kernel/perf.hpp b/kernel/perf.hpp
index f5004d0..82d0dda 100644
--- a/kernel/perf.hpp
+++ b/kernel/perf.hpp
@@ -64,5 +64,17 @@
     return (double)t / (double)count;
 }
 
+const unsigned long timems() {
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return (unsigned long)(t.tv_sec * 1000 + t.tv_usec / 1000);
+}
+
+const unsigned long timens() {
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return (unsigned long)(t.tv_sec * 1000000000 + t.tv_usec * 1000);
+}
+
 }
 #endif /* tuscany_perf_hpp */
diff --git a/macos/macos-install b/macos/macos-install
index 6d7b90c..d7b2feb 100755
--- a/macos/macos-install
+++ b/macos/macos-install
@@ -285,7 +285,7 @@
 git clone git://git.apache.org/tuscany-sca-cpp.git
 cd tuscany-sca-cpp
 ./bootstrap
-./configure CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ --prefix=$build/tuscany-sca-cpp-bin --with-curl=$build/curl-7.19.5-bin --with-apr=$build/apr-1.4.x-bin --with-httpd=$build/httpd-2.3.10-bin --with-memcached=$build/memcached-1.4.5-bin --with-tinycdb=$build/tinycdb-bin --with-js-include=$build/js-1.8.5-bin/include/js --with-js-lib=$build/js-1.8.5-bin/lib --with-libcloud=$build/libcloud-0.4.2-bin --enable-threads --enable-python --with-libxml2=$build/libxml2-2.7.7-bin --enable-chat --with-libstrophe=$build/libstrophe-bin --enable-openid --with-mod-auth-openid=$build/mod-auth-openid-bin --enable-oauth --with-liboauth=$build/liboauth-0.9.1-bin --enable-mod-security --with-mod-security=$build/modsecurity-apache-2.6.0-bin
+./configure CC=/usr/local/bin/gcc CXX=/usr/local/bin/g++ --prefix=$build/tuscany-sca-cpp-bin --with-curl=$build/curl-7.19.5-bin --with-apr=$build/apr-1.4.x-bin --with-httpd=$build/httpd-2.3.10-bin --with-memcached=$build/memcached-1.4.5-bin --with-tinycdb=$build/tinycdb-bin --with-js-include=$build/js-1.8.5-bin/include/js --with-js-lib=$build/js-1.8.5-bin/lib --with-libcloud=$build/libcloud-0.4.2-bin --enable-threads --enable-python --enable-opencl --with-libxml2=$build/libxml2-2.7.7-bin --enable-chat --with-libstrophe=$build/libstrophe-bin --enable-openid --with-mod-auth-openid=$build/mod-auth-openid-bin --enable-oauth --with-liboauth=$build/liboauth-0.9.1-bin --enable-mod-security --with-mod-security=$build/modsecurity-apache-2.6.0-bin
 make
 make install
 if [ "$?" != "0" ]; then
diff --git a/modules/Makefile.am b/modules/Makefile.am
index 278d8b4..f4195dc 100644
--- a/modules/Makefile.am
+++ b/modules/Makefile.am
@@ -15,5 +15,5 @@
 #  specific language governing permissions and limitations
 #  under the License.
 
-SUBDIRS = scheme atom rss js json scdl http server python java openid oauth wsgi edit
+SUBDIRS = scheme atom rss js json scdl http server python opencl java openid oauth wsgi edit
 
diff --git a/modules/opencl/Makefile.am b/modules/opencl/Makefile.am
new file mode 100644
index 0000000..95e30d2
--- /dev/null
+++ b/modules/opencl/Makefile.am
@@ -0,0 +1,57 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#  
+#    http://www.apache.org/licenses/LICENSE-2.0
+#    
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+
+if WANT_OPENCL
+
+INCLUDES = -I${OPENCL_INCLUDE}
+
+incl_HEADERS = *.hpp
+incldir = $(prefix)/include/modules/opencl
+
+dist_mod_SCRIPTS = opencl-conf
+moddir = $(prefix)/modules/opencl
+
+EXTRA_DIST = domain-test.composite server-test.cl
+
+if DARWIN
+OPENCL_FLAGS = -framework OpenCL
+else
+OPENCL_FLAGS = -L${OPENCL_LIB} -R${OPENCL_LIB} -lOpenCL
+endif
+
+#mod_LTLIBRARIES = libmod_tuscany_opencl.la
+#libmod_tuscany_opencl_la_SOURCES = mod-opencl.cpp
+#libmod_tuscany_opencl_la_LDFLAGS = -lxml2 -lcurl -lmozjs -framework OpenCL
+#noinst_DATA = libmod_tuscany_opencl${libsuffix}
+#libmod_tuscany_opencl${libsuffix}:
+#	ln -s .libs/libmod_tuscany_opencl${libsuffix}
+
+opencl_test_SOURCES = opencl-test.cpp
+opencl_test_LDFLAGS = ${OPENCL_FLAGS}
+
+opencl_shell_SOURCES = opencl-shell.cpp
+opencl_shell_LDFLAGS = ${OPENCL_FLAGS}
+
+client_test_SOURCES = client-test.cpp
+client_test_LDFLAGS = -lxml2 -lcurl -lmozjs
+
+dist_noinst_SCRIPTS = server-test
+noinst_PROGRAMS = opencl-test opencl-shell client-test
+TESTS = opencl-test
+
+endif
diff --git a/modules/opencl/client-test.cpp b/modules/opencl/client-test.cpp
new file mode 100644
index 0000000..7af3cc7
--- /dev/null
+++ b/modules/opencl/client-test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+/**
+ * Test HTTP client functions.
+ */
+
+#include "stream.hpp"
+#include "string.hpp"
+#include "../server/client-test.hpp"
+
+int main() {
+    tuscany::cout << "Testing..." << tuscany::endl;
+    tuscany::server::testURI = "http://localhost:8090/opencl";
+
+    tuscany::server::testServer();
+
+    tuscany::cout << "OK" << tuscany::endl;
+
+    return 0;
+}
diff --git a/modules/opencl/domain-test.composite b/modules/opencl/domain-test.composite
new file mode 100644
index 0000000..e69399d
--- /dev/null
+++ b/modules/opencl/domain-test.composite
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.    
+-->
+<composite xmlns="http://docs.oasis-open.org/ns/opencsa/sca/200912"
+  targetNamespace="http://domain/test"
+  name="domain-test">
+        
+    <component name="opencl-test">
+        <implementation.opencl kernel="server-test.cl"/>
+        <service name="test">
+            <binding.http uri="opencl"/>
+        </service>
+    </component>     
+
+</composite>
diff --git a/modules/opencl/driver.hpp b/modules/opencl/driver.hpp
new file mode 100644
index 0000000..b4b6c28
--- /dev/null
+++ b/modules/opencl/driver.hpp
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+#ifndef tuscany_opencl_driver_hpp
+#define tuscany_opencl_driver_hpp
+
+/**
+ * OpenCL evaluator main driver loop.
+ */
+
+#include "string.hpp"
+#include "stream.hpp"
+#include "monad.hpp"
+#include "../scheme/driver.hpp"
+#include "eval.hpp"
+
+namespace tuscany {
+namespace opencl {
+
+const value evalDriverLoop(const OpenCLProgram& clprog, istream& in, ostream& out, const OpenCLContext& cl) {
+    scheme::promptForInput(scheme::evalInputPrompt, out);
+    value input = scheme::readValue(in);
+    if (isNil(input))
+        return input;
+    const failable<value> output = evalKernel(createKernel(input, clprog), input, 1, value::String, 512, cl);
+    scheme::announceOutput(scheme::evalOutputPrompt, out);
+    scheme::userPrint(content(output), out);
+    return evalDriverLoop(clprog, in, out, cl);
+}
+
+const bool evalDriverRun(const char* path, istream& in, ostream& out) {
+    OpenCLContext cl(OpenCLContext::DEFAULT);
+    scheme::setupDisplay(out);
+    ifstream is(path);
+    failable<OpenCLProgram> clprog = readProgram(path, is, cl);
+    if (!hasContent(clprog))
+        return true;
+    evalDriverLoop(content(clprog), in, out, cl);
+    return true;
+}
+
+}
+}
+#endif /* tuscany_opencl_driver_hpp */
diff --git a/modules/opencl/eval.hpp b/modules/opencl/eval.hpp
new file mode 100644
index 0000000..ef0e028
--- /dev/null
+++ b/modules/opencl/eval.hpp
@@ -0,0 +1,725 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+#ifndef tuscany_opencl_eval_hpp
+#define tuscany_opencl_eval_hpp
+
+/**
+ * OpenCL kernel evaluation logic.
+ */
+#ifdef IS_DARWIN
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include "list.hpp"
+#include "value.hpp"
+#include "perf.hpp"
+
+namespace tuscany {
+namespace opencl {
+
+/**
+ * Convert an OpenCL error code to a string.
+ */
+const string clError(const cl_int e) {
+    ostringstream s;
+    s << "error " << e;
+    return str(s);
+}
+
+#ifdef WANT_MAINTAINER_MODE
+
+/**
+ * OpenCL profiling counters.
+ */
+cl_ulong memtime = 0;
+cl_ulong kernelqtime = 0;
+cl_ulong kerneltime = 0;
+cl_ulong preptime = 0;
+cl_ulong evaltime = 0;
+
+/**
+ * Reset the OpenCL profiling counters.
+ */
+bool resetOpenCLCounters() {
+    memtime = kernelqtime = kerneltime = preptime = evaltime = 0;
+    return true;
+}
+
+/**
+ * Print the OpenCL profiling counters.
+ */
+bool printOpenCLCounters(const long n) {
+    cout << " kernelq " << ((double)kernelqtime / 1000000.0) / (double)n << " ms kernel " << ((double)kerneltime / 1000000.0) / (double)n << " ms memory " << ((double)memtime / 1000000.0) / (double)n << " ms prep " << ((double)preptime / 1000000.0) / (double)n << " ms eval " << ((double)evaltime / 1000000.0) / (double)n << " ms";
+    return true;
+}
+
+/**
+ * Profile a memory event.
+ */
+failable<cl_ulong> profileMemEvent(const cl_event evt) {
+    cl_ulong start = 0;
+    const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
+    if (serr != CL_SUCCESS)
+        return mkfailure<cl_ulong>("Couldn't profile memory event start: " + clError(serr));
+    cl_ulong end = 0;
+    const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
+    if (eerr != CL_SUCCESS)
+        return mkfailure<cl_ulong>("Couldn't profile memory event end: " + clError(eerr));
+    const cl_ulong t = end - start;
+    memtime += t;
+    return t;
+}
+
+/**
+ * Profile a kernel event.
+ */
+failable<cl_ulong> profileKernelEvent(const cl_event evt) {
+    cl_ulong queued = 0;
+    const cl_int qerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, NULL);
+    if (qerr != CL_SUCCESS)
+        return mkfailure<cl_ulong>("Couldn't profile kernel event queue: " + clError(qerr));
+    cl_ulong start = 0;
+    const cl_int serr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
+    if (serr != CL_SUCCESS)
+        return mkfailure<cl_ulong>("Couldn't profile kernel event start: " + clError(serr));
+    cl_ulong end = 0;
+    const cl_int eerr = clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
+    if (eerr != CL_SUCCESS)
+        return mkfailure<cl_ulong>("Couldn't profile kernel event end: " + clError(eerr));
+    const cl_ulong q = start - queued;
+    kernelqtime += q;
+    const cl_ulong t = end - start;
+    kerneltime += t;
+    return t;
+}
+
+/**
+ * Profile an array of memory events.
+ */
+failable<cl_ulong> profileMemEvents(const cl_uint n, const cl_event* evt) {
+    if (n == 0)
+        return 0;
+    const failable<cl_ulong> t = profileMemEvent(*evt);
+    if (!hasContent(t))
+        return t;
+    const failable<cl_ulong> r = profileMemEvents(n - 1, evt + 1);
+    if (!hasContent(r))
+        return r;
+    return content(t) + content(r);
+}
+
+#else
+
+#define resetOpenCLCounters()
+#define printOpenCLCounters(n)
+
+#endif
+
+class OpenCLContext;
+class OpenCLProgram;
+class OpenCLKernel;
+class OpenCLBuffer;
+
+/**
+ * Represent an OpenCL context.
+ */
+class OpenCLContext {
+public:
+#define OPENCL_MAX_DEVICES 64
+
+    enum DeviceType {
+        DEFAULT = 0, CPU = 1, GPU = 2
+    };
+
+    OpenCLContext(const OpenCLContext::DeviceType devtype) : dev(OpenCLContext::DEFAULT), ndevs(0), ctx(0) {
+        debug("opencl::OpenCLContext");
+        for (int i = 0; i < OPENCL_MAX_DEVICES; i++)
+            cq[i] = 0;
+
+        // Get the available platforms
+        cl_uint nplatforms;
+        cl_platform_id platforms[16];
+        const cl_int gperr = clGetPlatformIDs(16, platforms, &nplatforms);
+        if(nplatforms == 0 || gperr != CL_SUCCESS) {
+            mkfailure<bool>("Couldn't get OpenCL platforms: " + clError(gperr));
+            return;
+        }
+        for(cl_uint i = 0; i < nplatforms; ++i) {
+            char vendor[256];
+            const cl_int gverr = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
+            if(gverr != CL_SUCCESS) {
+                mkfailure<bool>("Couldn't get OpenCL platform: " + clError(gverr));
+                return;
+            }
+            debug(vendor, "opencl::OpenCLContext::vendor");
+        }
+
+        // Get the available devices of the requested type
+        if (devtype == OpenCLContext::DEFAULT || devtype == OpenCLContext::GPU) {
+            for(cl_uint i = 0; i < nplatforms; ++i) {
+                const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, OPENCL_MAX_DEVICES, devid, &ndevs);
+                if (err == CL_SUCCESS) {
+                    debug(ndevs, "opencl::OpenCLContext::gcpus");
+                    dev = OpenCLContext::GPU;
+                    break;
+                }
+            }
+        }
+        if ((devtype == OpenCLContext::DEFAULT && ndevs == 0) || devtype == OpenCLContext::CPU) {
+            for(cl_uint i = 0; i < nplatforms; ++i) {
+                const cl_int err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, OPENCL_MAX_DEVICES, devid, &ndevs);
+                if (err == CL_SUCCESS) {
+                    debug(ndevs, "opencl::OpenCLContext::ncpus");
+                    dev = OpenCLContext::CPU;
+                    break;
+                }
+            }
+        }
+        if (ndevs == 0)
+            return;
+
+        // Initialize OpenCL context and command queues
+        cl_int ccerr;
+        ctx = clCreateContext(0, ndevs, devid, NULL, NULL, &ccerr);
+        if(!ctx || ccerr != CL_SUCCESS) {
+            mkfailure<bool>("Couldn't create OpenCL context: " + clError(ccerr));
+            return;
+        }
+
+        for (cl_uint i = 0; i < ndevs; i++) {
+            cl_int cqerr;
+#ifdef WANT_MAINTAINER_MODE
+            cq[i] = clCreateCommandQueue(ctx, devid[i], CL_QUEUE_PROFILING_ENABLE, &cqerr);
+#else
+            cq[i] = clCreateCommandQueue(ctx, devid[i], 0, &cqerr);
+#endif
+            if (!cq[i] || cqerr != CL_SUCCESS) {
+                mkfailure<bool>("Couldn't create OpenCL command queue: " + clError(cqerr));
+                return;
+            }
+        }
+    }
+
+    ~OpenCLContext() {
+        debug("opencl::~OpenCLContext");
+        for (cl_uint i = 0; i < ndevs; i++) {
+            if (cq[i] != 0)
+                clReleaseCommandQueue(cq[i]);
+        }
+        if (ctx != 0)
+            clReleaseContext(ctx);
+    }
+
+private:
+    OpenCLContext::DeviceType dev;
+    cl_uint ndevs;
+    cl_device_id devid[OPENCL_MAX_DEVICES];
+    cl_context ctx;
+    cl_command_queue cq[OPENCL_MAX_DEVICES];
+
+    friend const cl_uint devices(const OpenCLContext& cl);
+    friend const cl_command_queue commandq(const OpenCLContext& cl);
+    friend const failable<OpenCLBuffer> readOnlyBuffer(const size_t size, const void* p, const OpenCLContext& cl, cl_command_queue cq);
+    friend const failable<OpenCLBuffer> writeOnlyBuffer(const size_t size, const OpenCLContext& cl);
+    friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
+    friend const failable<OpenCLProgram> readProgram(const string& path, istream& is, const OpenCLContext& cl);
+};
+
+/**
+ * Return the number of computing devices available in a context.
+ */
+const cl_uint devices(const OpenCLContext& cl) {
+    return cl.ndevs;
+}
+
+/**
+ * Return a command queue from a context.
+ */
+const cl_command_queue commandq(const OpenCLContext& cl) {
+    return cl.cq[0];
+}
+
+/**
+ * Represents an OpenCL program.
+ */
+class OpenCLProgram {
+public:
+    OpenCLProgram() : prog(0) {
+    }
+
+    OpenCLProgram(const cl_program prog) : prog(prog) {
+    }
+
+    OpenCLProgram(const OpenCLProgram& c) : prog(c.prog) {
+        const cl_int rperr = clRetainProgram(prog);
+        if (rperr != CL_SUCCESS)
+            mkfailure<bool>(string("Couldn't retain OpenCL program: ") + clError(rperr));
+    }
+
+    ~OpenCLProgram() {
+        if (!prog)
+            return;
+        const cl_int rperr = clReleaseProgram(prog);
+        if (rperr != CL_SUCCESS)
+            mkfailure<bool>(string("Couldn't release OpenCL program: ") + clError(rperr));
+    }
+
+private:
+    const cl_program prog;
+
+    friend const failable<OpenCLKernel> createKernel(const value& expr, const OpenCLProgram& clprog);
+    friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
+};
+
+/**
+ * Represents an OpenCL kernel.
+ */
+class OpenCLKernel {
+public:
+    OpenCLKernel() : k(0) {
+    }
+
+    OpenCLKernel(const cl_kernel k) : k(k) {
+    }
+
+    OpenCLKernel(const OpenCLKernel& c) : k(c.k) {
+        const cl_int rkerr = clRetainKernel(k);
+        if (rkerr != CL_SUCCESS)
+            mkfailure<bool>(string("Couldn't retain OpenCL kernel: ") + clError(rkerr));
+    }
+
+    ~OpenCLKernel() {
+        if (!k)
+            return;
+        const cl_int rkerr = clReleaseKernel(k);
+        if (rkerr != CL_SUCCESS)
+            mkfailure<bool>(string("Couldn't release OpenCL kernel: ") + clError(rkerr));
+    }
+
+private:
+    const cl_kernel k;
+
+    friend const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel);
+    friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
+};
+
+/**
+ * Represents an OpenCL buffer.
+ */
+class OpenCLBuffer {
+public:
+    OpenCLBuffer() : mem(0), evt(0) {
+    }
+
+    OpenCLBuffer(const cl_mem mem, const cl_event evt) : mem(mem), evt(evt) {
+    }
+
+    OpenCLBuffer(const OpenCLBuffer& c) : mem(c.mem), evt(c.evt) {
+        if (mem != 0) {
+            const cl_int rmerr = clRetainMemObject(mem);
+            if (rmerr != CL_SUCCESS)
+                mkfailure<bool>(string("Couldn't retain OpenCL buffer: ") + clError(rmerr));
+        }
+        if (evt != 0) {
+            const cl_int reerr = clRetainEvent(evt);
+            if (reerr != CL_SUCCESS)
+                mkfailure<bool>(string("Couldn't retain OpenCL event: ") + clError(reerr));
+        }
+    }
+
+    ~OpenCLBuffer() {
+        if (mem != 0) {
+            const cl_int rmerr = clReleaseMemObject(mem);
+            if (rmerr != CL_SUCCESS)
+                mkfailure<bool>(string("Couldn't release OpenCL buffer: ") + clError(rmerr));
+        }
+        if (evt != 0) {
+            const cl_int reerr = clReleaseEvent(evt);
+            if (reerr != CL_SUCCESS)
+                mkfailure<bool>(string("Couldn't release OpenCL event: ") + clError(reerr));
+        }
+    }
+
+private:
+    const cl_mem mem;
+    const cl_event evt;
+
+    friend const cl_uint writeBufferEvents(const list<OpenCLBuffer>& buf, cl_event* evt);
+    friend const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel);
+    friend const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl);
+};
+
+/**
+ * Return a read-only memory buffer.
+ */
+const failable<OpenCLBuffer> readOnlyBuffer(const size_t size, const void* p, const OpenCLContext& cl, const cl_command_queue cq) {
+    if (cl.dev == OpenCLContext::CPU) {
+        cl_int err;
+        const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, size, const_cast<void*>(p), &err);
+        if (!buf || err != CL_SUCCESS)
+            return mkfailure<OpenCLBuffer>(string("Couldn't map OpenCL host memory: ") + clError(err));
+        return OpenCLBuffer(buf, 0);
+    }
+    cl_int berr;
+    const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_READ_ONLY, size, NULL, &berr);
+    if (!buf || berr != CL_SUCCESS)
+        return mkfailure<OpenCLBuffer>(string("Couldn't allocate OpenCL device memory: ") + clError(berr));
+    cl_event wevt;
+    const cl_int werr = clEnqueueWriteBuffer(cq, buf, CL_FALSE, 0, size, p, 0, NULL, &wevt);
+    if (werr != CL_SUCCESS) {
+        clReleaseMemObject(buf);
+        return mkfailure<OpenCLBuffer>(string("Couldn't enqueue write to device memory: ") + clError(werr));
+    }
+    return OpenCLBuffer(buf, wevt);
+}
+
+/**
+ * Fill an array of write events for a given list of buffers.
+ */
+const cl_uint writeBufferEvents(const list<OpenCLBuffer>& buf, cl_event* evt) {
+    if (isNil(buf))
+        return 0;
+    const cl_event e = car(buf).evt;
+    if (e == 0)
+        return writeBufferEvents(cdr(buf), evt);
+    *evt = e;
+    return 1 + writeBufferEvents(cdr(buf), evt + 1);
+}
+
+/**
+ * Return a write-only memory buffer.
+ */
+const failable<OpenCLBuffer> writeOnlyBuffer(const size_t size, const OpenCLContext& cl) {
+    if (cl.dev == OpenCLContext::CPU) {
+        cl_int err;
+        const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, size, NULL, &err);
+        if (!buf || err != CL_SUCCESS)
+            return mkfailure<OpenCLBuffer>(string("Couldn't map OpenCL host memory: ") + clError(err));
+        return OpenCLBuffer(buf, 0);
+    }
+    cl_int err;
+    const cl_mem buf = clCreateBuffer(cl.ctx, CL_MEM_WRITE_ONLY, size, NULL, &err);
+    if (!buf || err != CL_SUCCESS)
+        return mkfailure<OpenCLBuffer>(string("Couldn't allocate OpenCL device memory: ") + clError(err));
+    return OpenCLBuffer(buf, 0);
+}
+
+/**
+ * Convert a value to a kernel arg.
+ */
+const failable<OpenCLBuffer> valueToKernelArg(const cl_uint i, const size_t size, const void* p, const failable<OpenCLBuffer>& buf, const OpenCLKernel& kernel) {
+    if (!hasContent(buf))
+        return buf;
+    if (p != NULL) {
+        const cl_int err = clSetKernelArg(kernel.k, (cl_uint)i, size, p);
+        if (err != CL_SUCCESS)
+            return mkfailure<OpenCLBuffer>(string("Couldn't set OpenCL simple kernel arg: ") + clError(err));
+        return buf;
+    }
+    const OpenCLBuffer b = content(buf);
+    const cl_int err = clSetKernelArg(kernel.k, i, sizeof(cl_mem), &b.mem);
+    if (err != CL_SUCCESS)
+        return mkfailure<OpenCLBuffer>(string("Couldn't set OpenCL buffer kernel arg: ") + clError(err));
+    return buf;
+}
+
+const failable<OpenCLBuffer> valueToKernelArg(const value& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
+    switch (type(v)) {
+    case value::Symbol: {
+        const string s = string("'") + v;
+        return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel);
+    }
+    case value::String: {
+        const string s = (string)v;
+        return valueToKernelArg(i, 0, NULL, readOnlyBuffer(length(s) + 1, c_str(s), cl, cq), kernel);
+    }
+    case value::Number: {
+        const cl_float d = (cl_float)((double)v);
+        return valueToKernelArg(i, sizeof(cl_float), &d, OpenCLBuffer(), kernel);
+    }
+    case value::Bool: {
+        const cl_int b = (cl_int)((bool)v);
+        return valueToKernelArg(i, sizeof(cl_int), &b, OpenCLBuffer(), kernel);
+    }
+    default: {
+        return valueToKernelArg(i, sizeof(cl_mem), NULL, readOnlyBuffer(sizeof(value), &v, cl, cq), kernel);
+    }
+    }
+}
+
+/**
+ * Convert a list of values to kernel args.
+ */
+const failable<list<OpenCLBuffer>> valuesToKernelArgsListHelper(const list<value>& v, const cl_uint i, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
+    if (isNil(v))
+        return list<OpenCLBuffer>();
+    const failable<OpenCLBuffer> a = valueToKernelArg(car(v), i, kernel, cl, cq);
+    if (!hasContent(a))
+        return mkfailure<list<OpenCLBuffer>>(reason(a));
+    const failable<list<OpenCLBuffer>> al = valuesToKernelArgsListHelper(cdr(v), i + 1, kernel, cl, cq);
+    if (!hasContent(al))
+        return al;
+    return cons<OpenCLBuffer>(content(a), content(al));
+}
+
+const failable<list<OpenCLBuffer>> valuesToKernelArgs(const list<value>& v, const OpenCLKernel& kernel, const OpenCLContext& cl, const cl_command_queue cq) {
+    return valuesToKernelArgsListHelper(v, 0, kernel, cl, cq);
+}
+
+/**
+ * Convert a kernel result to a value.
+ */
+const value kernelResultToValue(const void* p, const value::ValueType type) {
+    switch(type) {
+    case value::Symbol: {
+        const char* s = static_cast<const char*>(p);
+        const size_t l = strlen(s);
+        if (l != 0 && *s == '\'')
+            return value(s + 1);
+        return value(s);
+    }
+    case value::String: {
+        const char* s = static_cast<const char*>(p);
+        const size_t l = strlen(s);
+        if (l != 0 && *s == '\'')
+            return value(s + 1);
+        return value(string(s, l));
+    }
+    case value::Number:
+        return (double)(*(static_cast<const cl_float*>(p)));
+    case value::Bool:
+        return (bool)(*(static_cast<const cl_int*>(p)));
+    default:
+        return *(static_cast<const value*>(p));
+    }
+}
+
+/**
+ * Return the value type corresponding to a C99 type name.
+ */
+const value::ValueType valueType(const string& t) {
+    if (t == "float")
+        return value::Number;
+    if (t == "int")
+        return value::Bool;
+    if (t == "char")
+        return value::String;
+    return value::Nil;
+}
+
+/**
+ * Return the size of a C99 type corresponding to a value type.
+ */
+const size_t valueSize(const value::ValueType type) {
+    switch(type) {
+    case value::Number:
+        return sizeof(cl_float);
+    case value::Bool:
+        return sizeof(cl_int);
+    case value::Symbol:
+        return sizeof(cl_char);
+    case value::String:
+        return sizeof(cl_char);
+    default:
+        return sizeof(value);
+    }
+}
+
+/**
+ * Return the result type of a kernel.
+ */
+class OpenCLResultType {
+public:
+    OpenCLResultType(const value::ValueType type, const size_t n, const size_t size) : type(type), n(n), size(size) {}
+    const value::ValueType type;
+    const size_t n;
+    const size_t size;
+};
+
+const OpenCLResultType kernelResultType(const string& fn, value::ValueType type, const size_t n) {
+    if (type != value::Nil)
+        return OpenCLResultType(type, n, valueSize(type));
+    const string s = car(tokenize("_", fn));
+    const size_t d = find_first_of(s, "0123456789");
+    if (d == length(s)) {
+        const value::ValueType vt = valueType(s);
+        return OpenCLResultType(vt, 1, valueSize(vt));
+    }
+    const value::ValueType vt = valueType(substr(s, 0, d));
+    return OpenCLResultType(vt, atoi(c_str(substr(s, d))), valueSize(vt));
+}
+
+/**
+ * Create the kernel implementing an expression.
+ */
+const failable<OpenCLKernel> createKernel(const value& expr, const OpenCLProgram& clprog) {
+
+    // Create an OpenCL kernel for the requested function
+    const value fn = car<value>(expr);
+    cl_int ckerr;
+    const cl_kernel k = clCreateKernel(clprog.prog, c_str(fn), &ckerr);
+    if (k == NULL || ckerr != CL_SUCCESS) {
+
+        // The start, stop, and restart functions are optional
+        //if (fn == "start" || fn == "stop")
+            //return value(lambda<value(const list<value>&)>());
+
+        return mkfailure<OpenCLKernel>(string("Couldn't find function: ") + car<value>(expr) + " : " + clError(ckerr));
+    }
+    return OpenCLKernel(k);
+}
+
+/**
+ * Evaluate an expression implemented by a kernel.
+ */
+const failable<value> evalKernel(const failable<OpenCLKernel>& fkernel, const value& expr, const size_t gwsize, const value::ValueType type, const size_t n, const OpenCLContext& cl) {
+
+#ifdef WANT_MAINTAINER_MODE
+    const cl_uint estart = (cl_uint)timens();
+    const cl_uint pstart = estart;
+#endif
+
+    if (!hasContent(fkernel))
+        return mkfailure<value>(reason(fkernel));
+    const OpenCLKernel kernel = content(fkernel);
+
+    // Get a command queue for the specified device type
+    const cl_command_queue cq = commandq(cl);
+
+    // Set the kernel input args
+    const failable<list<OpenCLBuffer>> args = valuesToKernelArgs(cdr<value>(expr), kernel, cl, cq);
+    if (!hasContent(args)) {
+        return mkfailure<value>(reason(args));
+    }
+
+    // Allocate result buffer in device memory
+    const value fn = car<value>(expr);
+    const OpenCLResultType rtype = kernelResultType(fn, type, n);
+    const size_t rsize = rtype.n * rtype.size;
+    const failable<OpenCLBuffer> rbuf = writeOnlyBuffer(rsize, cl);
+    if (!hasContent(rbuf))
+        return mkfailure<value>(reason(rbuf));
+
+    // Set it as a kernel output arg
+    const cl_mem rmem = content(rbuf).mem;
+    const failable<OpenCLBuffer> rarg = valueToKernelArg((cl_uint)length(cdr<value>(expr)), sizeof(cl_mem), &rmem, rbuf, kernel);
+    if (!hasContent(rarg))
+        return mkfailure<value>(reason(rarg));
+
+    // Enqueue the kernel, to be executed after all the writes complete
+    cl_event wevt[32];
+    const cl_uint nwevt = writeBufferEvents(content(args), wevt);
+    cl_event kevt;
+    const cl_int qerr = clEnqueueNDRangeKernel(cq, kernel.k, 1, NULL, &gwsize, NULL, nwevt, nwevt != 0? wevt : NULL, &kevt);
+    if (qerr != CL_SUCCESS)
+        return mkfailure<value>(string("Couldn't enqueue kernel task: ") + clError(qerr));
+
+    // Enqueue result buffer read, to be executed after the kernel completes
+    char res[rsize];
+    cl_event revt;
+    const cl_int rerr = clEnqueueReadBuffer(cq, rmem, CL_FALSE, 0, rsize, res, 1, &kevt, &revt);  
+    if (rerr != CL_SUCCESS) {
+        clReleaseEvent(kevt);
+        return mkfailure<value>(string("Couldn't read from OpenCL device memory: ") + clError(rerr));
+    }
+
+#ifdef WANT_MAINTAINER_MODE
+    const cl_uint pend = (cl_uint)timens();
+    preptime += (pend - pstart);
+#endif
+
+    // Wait for completion
+    const cl_int werr = clWaitForEvents(1, &revt);
+    if (werr != CL_SUCCESS) {
+        clReleaseEvent(revt);
+        clReleaseEvent(kevt);
+        return mkfailure<value>(string("Couldn't wait for kernel completion: ") + clError(werr));
+    }
+
+#ifdef WANT_MAINTAINER_MODE
+    profileMemEvents(nwevt, wevt);
+    profileKernelEvent(kevt);
+    profileMemEvent(revt);
+#endif
+
+    // Convert the result to a value
+    const value v = kernelResultToValue(res, rtype.type);
+
+    // Release OpenCL resources
+    clReleaseEvent(revt);
+    clReleaseEvent(kevt);
+
+#ifdef WANT_MAINTAINER_MODE
+    const cl_uint eend = (cl_uint)timens();
+    evaltime += (eend - estart);
+#endif
+
+    return v;
+}
+
+const failable<value> evalKernel(const failable<OpenCLKernel>& kernel, const value& expr, const OpenCLContext& cl) {
+    return evalKernel(kernel, expr, 1, value::Nil, 0, cl);
+}
+
+/**
+ * Read an opencl program from an input stream.
+ */
+const failable<OpenCLProgram> readProgram(const string& path, istream& is, const OpenCLContext& cl) {
+
+    // Read the program source
+    const list<string> ls = streamList(is);
+    ostringstream os;
+    write(ls, os);
+    const char* cs = c_str(str(os));
+
+    // Create the OpenCL program
+    cl_int cperr;
+    const cl_program prog = clCreateProgramWithSource(cl.ctx, 1, (const char **)&cs, NULL, &cperr);
+    if (!prog || cperr != CL_SUCCESS)
+        return mkfailure<OpenCLProgram>(string("Couldn't create OpenCL program from source: ") + path + " : " + clError(cperr));
+
+    // Built it
+    const cl_int bperr = clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
+    if(bperr != CL_SUCCESS) {
+        size_t l;
+        char b[2048];
+        clGetProgramBuildInfo(prog, cl.devid[0], CL_PROGRAM_BUILD_LOG, sizeof(b), b, &l);
+        return mkfailure<OpenCLProgram>(string("Couldn't build OpenCL program: ") + path + " : " + clError(bperr) + "\n" + string(b));
+    }
+    return OpenCLProgram(prog);
+}
+
+/**
+ * Evaluate an expression against an OpenCL program provided as an input stream.
+ */
+const failable<value> evalKernel(const value& expr, istream& is, const OpenCLContext& cl) {
+    failable<OpenCLProgram> clprog = readProgram("program.cl", is, cl);
+    if (!hasContent(clprog))
+        return mkfailure<value>(reason(clprog));
+    return evalKernel(createKernel(expr, content(clprog)), expr, 1, value::Nil, 0, cl);
+}
+
+}
+}
+#endif /* tuscany_opencl_eval_hpp */
diff --git a/modules/opencl/opencl-conf b/modules/opencl/opencl-conf
new file mode 100755
index 0000000..1ba2c33
--- /dev/null
+++ b/modules/opencl/opencl-conf
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#  
+#    http://www.apache.org/licenses/LICENSE-2.0
+#    
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+# Generate an OpenCL server conf
+here=`echo "import os; print os.path.realpath('$0')" | python`; here=`dirname $here`
+mkdir -p $1
+root=`echo "import os; print os.path.realpath('$1')" | python`
+
+uname=`uname -s`
+if [ $uname = "Darwin" ]; then
+    libsuffix=".dylib"
+else
+    libsuffix=".so"
+fi
+
+cat >>$root/conf/modules.conf <<EOF
+# Generated by: opencl-conf $*
+# Support for OpenCL SCA components
+LoadModule mod_tuscany_eval $here/libmod_tuscany_opencl$libsuffix
+
+EOF
diff --git a/modules/opencl/opencl-shell.cpp b/modules/opencl/opencl-shell.cpp
new file mode 100644
index 0000000..1dfeaee
--- /dev/null
+++ b/modules/opencl/opencl-shell.cpp
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+/**
+ * OpenCL script evaluator shell, used for interactive testing of scripts.
+ */
+
+#include <assert.h>
+#include "gc.hpp"
+#include "stream.hpp"
+#include "string.hpp"
+#include "driver.hpp"
+
+int main(const int argc, char** argv) {
+    tuscany::gc_scoped_pool pool;
+    if (argc != 2) {
+        tuscany::cerr << "Usage: opencl-shell <kernel.cl>" << tuscany::endl;
+        return 1;
+    }
+    tuscany::opencl::evalDriverRun(argv[1], tuscany::cin, tuscany::cout);
+    return 0;
+}
diff --git a/modules/opencl/opencl-test.cpp b/modules/opencl/opencl-test.cpp
new file mode 100644
index 0000000..17bc5cc
--- /dev/null
+++ b/modules/opencl/opencl-test.cpp
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/* $Rev$ $Date$ */
+
+/**
+ * Test OpenCL kernel evaluator.
+ */
+
+#include <assert.h>
+#include "stream.hpp"
+#include "string.hpp"
+#include "driver.hpp"
+#include "parallel.hpp"
+#include "perf.hpp"
+
+namespace tuscany {
+namespace opencl {
+
+const string testFloatsCPU =
+        "kernel void add(const float x, const float y, global float* r) {\n"
+        "    float l_x;\n"
+        "    float l_y;\n"
+        "    l_x = x;\n"
+        "    l_y = y;\n"
+        "    *r = l_x + l_y;\n"
+        "}\n"
+        "kernel void float_add(const float x, const float y, global float* r) {\n"
+        "    add(x, y, r);\n"
+        "}\n";
+
+const string testFloatsGPU =
+        "kernel void add(const float x, const float y, global float* r) {\n"
+        "    local float l_x;\n"
+        "    local float l_y;\n"
+        "    l_x = x;\n"
+        "    l_y = y;\n"
+        "    barrier(CLK_LOCAL_MEM_FENCE);\n"
+        "    *r = l_x + l_y;\n"
+        "}\n"
+        "kernel void float_add(const float x, const float y, global float* r) {\n"
+        "    add(x, y, r);\n"
+        "}\n";
+
+const string testBoolsCPU =
+        "kernel void int_or(const int x, const int y, global int* r) {\n"
+        "    int l_x;\n"
+        "    int l_y;\n"
+        "    l_x = x;\n"
+        "    l_y = y;\n"
+        "    *r = l_x || l_y;\n"
+        "}\n";
+
+const string testBoolsGPU =
+        "kernel void int_or(const int x, const int y, global int* r) {\n"
+        "    local int l_x;\n"
+        "    local int l_y;\n"
+        "    l_x = x;\n"
+        "    l_y = y;\n"
+        "    barrier(CLK_LOCAL_MEM_FENCE);\n"
+        "    *r = l_x || l_y;\n"
+        "}\n";
+
+const string testCharsCPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    for (int i = 0; i < ixl; i++)\n"
+        "        r[i] = x[i];\n"
+        "    for (int i = 0; i < iyl; i++)\n"
+        "        r[ixl + i] = y[i];\n"
+        "    r[ixl + iyl] = '\\0';\n"
+        "}\n"
+        "kernel void char128_add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    add(xl, x, yl, y, r);\n"
+        "}\n";
+
+const string testCharsCopyGPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    local char l_x[128];\n"
+        "    local char l_y[128];\n"
+        "    event_t re = async_work_group_copy(l_x, x, ixl, 0);\n"
+        "    async_work_group_copy(l_y, y, iyl, re);\n"
+        "    wait_group_events(1, &re);\n"
+        "    local char l_r[128];\n"
+        "    for (int i = 0; i < ixl; i++)\n"
+        "        l_r[i] = l_x[i];\n"
+        "    for (int i = 0; i < iyl; i++)\n"
+        "        l_r[ixl + i] = l_y[i];\n"
+        "    l_r[ixl + iyl] = '\\0';\n"
+        "    event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n"
+        "    wait_group_events(1, &we);\n"
+        "}\n"
+        "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    add(xl, x, yl, y, r);\n"
+        "}\n";
+
+const string testCharsGPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    for (int i = 0; i < ixl; i++)\n"
+        "        r[i] = x[i];\n"
+        "    for (int i = 0; i < iyl; i++)\n"
+        "        r[ixl + i] = y[i];\n"
+        "    r[ixl + iyl] = '\\0';\n"
+        "}\n"
+        "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    add(xl, x, yl, y, r);\n"
+        "}\n";
+
+const string testCharsParallelCPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int i = get_global_id(0);\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n"
+        "}\n";
+
+const string testCharsParallelCopyGPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int i = get_global_id(0);\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    local char l_x[128];\n"
+        "    local char l_y[128];\n"
+        "    event_t re = async_work_group_copy(l_x, x, ixl, 0);\n"
+        "    async_work_group_copy(l_y, y, iyl, re);\n"
+        "    wait_group_events(1, &re);\n"
+        "    local char l_r[128];\n"
+        "    l_r[i] = i < ixl? l_x[i] : i < ixl + iyl? l_y[i - ixl] : '\\0';\n"
+        "    event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n"
+        "    wait_group_events(1, &we);\n"
+        "}\n";
+
+const string testCharsParallelGPU =
+        "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n"
+        "    const int i = get_global_id(0);\n"
+        "    const int ixl = (int)xl;\n"
+        "    const int iyl = (int)yl;\n"
+        "    r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n"
+        "}\n";
+
+bool testTaskParallel(const OpenCLContext::DeviceType dev) {
+    gc_scoped_pool pool;
+    OpenCLContext cl(dev);
+    if (!devices(cl) != 0)
+        return true;
+
+    {
+        istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+        const value exp = mklist<value>("float_add", 2, 3);
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(5));
+    }
+    if (true) return true;
+    {
+        istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+        const value exp = mklist<value>("add", 2, 3);
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::Number, 1, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(5));
+    }
+    {
+        istringstream is(dev == OpenCLContext::CPU? testBoolsCPU : testBoolsGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+
+        const value exp = mklist<value>("int_or", true, false);
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(true));
+
+        const value exp2 = mklist<value>("int_or", false, false);
+        const failable<value> r2 = evalKernel(createKernel(exp2, content(clprog)), exp2, cl);
+        assert(hasContent(r2));
+        assert(content(r2) == value(false));
+    }
+    {
+        istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+
+        const value exp = mklist<value>("char128", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "));
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+    }
+    {
+        istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+
+        const value exp = mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "));
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::String, 128, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+    }
+
+    return true;
+}
+
+struct evalTaskParallelLoop {
+    evalTaskParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) {
+    }
+    const bool operator()() const {
+        const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 1, value::String, 128, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+        return true;
+    }
+
+    const OpenCLContext& cl;
+    const OpenCLProgram& clprog;
+    const value exp;
+};
+
+const bool testTaskParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) {
+    gc_scoped_pool pool;
+    OpenCLContext cl(dev);
+    if (!devices(cl) != 0)
+        return true;
+
+    istringstream is(dev == OpenCLContext::CPU? testCharsCPU : copy? testCharsCopyGPU : testCharsGPU);
+    failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+    assert(hasContent(clprog));
+
+    resetOpenCLCounters();
+    const lambda<bool()> el = evalTaskParallelLoop(cl, content(clprog));
+    cout << "OpenCL task-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms";
+    printOpenCLCounters(500);
+    cout << endl;
+    return true;
+}
+
+bool testDataParallel(const OpenCLContext::DeviceType dev) {
+    gc_scoped_pool pool;
+    OpenCLContext cl(dev);
+    if (!devices(cl) != 0)
+        return true;
+
+    {
+        istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : testCharsParallelGPU);
+        failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+        assert(hasContent(clprog));
+
+        const value exp = mklist<value>("add", 6, string("Hello "), 5, string("World"));
+        const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 121, value::String, 128, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(string("Hello World")));
+    }
+
+    return true;
+}
+
+struct evalDataParallelLoop {
+    evalDataParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) {
+    }
+    const bool operator()() const {
+        const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 121, value::String, 128, cl);
+        assert(hasContent(r));
+        assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World ")));
+        return true;
+    }
+
+    const OpenCLContext& cl;
+    const OpenCLProgram& clprog;
+    const value exp;
+};
+
+const bool testDataParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) {
+    gc_scoped_pool pool;
+    OpenCLContext cl(dev);
+    if (!devices(cl) != 0)
+        return true;
+
+    istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : copy? testCharsParallelCopyGPU : testCharsParallelGPU);
+    failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl);
+    assert(hasContent(clprog));
+
+    resetOpenCLCounters();
+    const lambda<bool()> el = evalDataParallelLoop(cl, content(clprog));
+    cout << "OpenCL data-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms";
+    printOpenCLCounters(500);
+    cout << endl;
+    return true;
+}
+
+}
+}
+
+int main() {
+    tuscany::cout << "Testing..." << tuscany::endl;
+
+    tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::CPU);
+    tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::CPU, false);
+    tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::CPU);
+    tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::CPU, false);
+
+    tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::GPU);
+    tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, false);
+    tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, true);
+    tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::GPU);
+    tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, false);
+    tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, true);
+
+    tuscany::cout << "OK" << tuscany::endl;
+    return 0;
+}
diff --git a/modules/opencl/server-test b/modules/opencl/server-test
new file mode 100755
index 0000000..e623599
--- /dev/null
+++ b/modules/opencl/server-test
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#  
+#    http://www.apache.org/licenses/LICENSE-2.0
+#    
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+# Setup
+../http/httpd-conf tmp localhost 8090 ../server/htdocs
+../server/server-conf tmp
+./opencl-conf tmp
+cat >>tmp/conf/httpd.conf <<EOF
+SCAContribution `pwd`/
+SCAComposite domain-test.composite
+EOF
+
+../http/httpd-start tmp
+sleep 2
+
+# Test
+./client-test 2>/dev/null
+rc=$?
+
+# Cleanup
+../http/httpd-stop tmp
+sleep 2
+exit $rc
diff --git a/modules/opencl/server-test.cl b/modules/opencl/server-test.cl
new file mode 100644
index 0000000..de5c2d1
--- /dev/null
+++ b/modules/opencl/server-test.cl
@@ -0,0 +1,17 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#  
+#    http://www.apache.org/licenses/LICENSE-2.0
+#    
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+