| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /* $Rev$ $Date$ */ |
| |
| /** |
| * Test OpenCL kernel evaluator. |
| */ |
| |
| #include <assert.h> |
| #include "stream.hpp" |
| #include "string.hpp" |
| #include "driver.hpp" |
| #include "parallel.hpp" |
| #include "perf.hpp" |
| |
| namespace tuscany { |
| namespace opencl { |
| |
| const string testFloatsCPU = |
| "kernel void add(const float x, const float y, global float* r) {\n" |
| " float l_x;\n" |
| " float l_y;\n" |
| " l_x = x;\n" |
| " l_y = y;\n" |
| " *r = l_x + l_y;\n" |
| "}\n" |
| "kernel void float_add(const float x, const float y, global float* r) {\n" |
| " add(x, y, r);\n" |
| "}\n"; |
| |
| const string testFloatsGPU = |
| "kernel void add(const float x, const float y, global float* r) {\n" |
| " local float l_x;\n" |
| " local float l_y;\n" |
| " l_x = x;\n" |
| " l_y = y;\n" |
| " barrier(CLK_LOCAL_MEM_FENCE);\n" |
| " *r = l_x + l_y;\n" |
| "}\n" |
| "kernel void float_add(const float x, const float y, global float* r) {\n" |
| " add(x, y, r);\n" |
| "}\n"; |
| |
| const string testBoolsCPU = |
| "kernel void int_or(const int x, const int y, global int* r) {\n" |
| " int l_x;\n" |
| " int l_y;\n" |
| " l_x = x;\n" |
| " l_y = y;\n" |
| " *r = l_x || l_y;\n" |
| "}\n"; |
| |
| const string testBoolsGPU = |
| "kernel void int_or(const int x, const int y, global int* r) {\n" |
| " local int l_x;\n" |
| " local int l_y;\n" |
| " l_x = x;\n" |
| " l_y = y;\n" |
| " barrier(CLK_LOCAL_MEM_FENCE);\n" |
| " *r = l_x || l_y;\n" |
| "}\n"; |
| |
| const string testCharsCPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " for (int i = 0; i < ixl; i++)\n" |
| " r[i] = x[i];\n" |
| " for (int i = 0; i < iyl; i++)\n" |
| " r[ixl + i] = y[i];\n" |
| " r[ixl + iyl] = '\\0';\n" |
| "}\n" |
| "kernel void char128_add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " add(xl, x, yl, y, r);\n" |
| "}\n"; |
| |
| const string testCharsCopyGPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " local char l_x[128];\n" |
| " local char l_y[128];\n" |
| " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" |
| " async_work_group_copy(l_y, y, iyl, re);\n" |
| " wait_group_events(1, &re);\n" |
| " local char l_r[128];\n" |
| " for (int i = 0; i < ixl; i++)\n" |
| " l_r[i] = l_x[i];\n" |
| " for (int i = 0; i < iyl; i++)\n" |
| " l_r[ixl + i] = l_y[i];\n" |
| " l_r[ixl + iyl] = '\\0';\n" |
| " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" |
| " wait_group_events(1, &we);\n" |
| "}\n" |
| "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " add(xl, x, yl, y, r);\n" |
| "}\n"; |
| |
| const string testCharsGPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " for (int i = 0; i < ixl; i++)\n" |
| " r[i] = x[i];\n" |
| " for (int i = 0; i < iyl; i++)\n" |
| " r[ixl + i] = y[i];\n" |
| " r[ixl + iyl] = '\\0';\n" |
| "}\n" |
| "kernel void char128(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " add(xl, x, yl, y, r);\n" |
| "}\n"; |
| |
| const string testCharsParallelCPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int i = get_global_id(0);\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" |
| "}\n"; |
| |
| const string testCharsParallelCopyGPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int i = get_global_id(0);\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " local char l_x[128];\n" |
| " local char l_y[128];\n" |
| " event_t re = async_work_group_copy(l_x, x, ixl, 0);\n" |
| " async_work_group_copy(l_y, y, iyl, re);\n" |
| " wait_group_events(1, &re);\n" |
| " local char l_r[128];\n" |
| " l_r[i] = i < ixl? l_x[i] : i < ixl + iyl? l_y[i - ixl] : '\\0';\n" |
| " event_t we = async_work_group_copy(r, l_r, ixl + iyl + 1, 0);\n" |
| " wait_group_events(1, &we);\n" |
| "}\n"; |
| |
| const string testCharsParallelGPU = |
| "kernel void add(const float xl, global const char* x, const float yl, global const char* y, global char* r) {\n" |
| " const int i = get_global_id(0);\n" |
| " const int ixl = (int)xl;\n" |
| " const int iyl = (int)yl;\n" |
| " r[i] = i < ixl? x[i] : i < ixl + iyl? y[i - ixl] : '\\0';\n" |
| "}\n"; |
| |
| bool testTaskParallel(const OpenCLContext::DeviceType dev) { |
| gc_scoped_pool pool; |
| OpenCLContext cl(dev); |
| if (!devices(cl) != 0) |
| return true; |
| |
| { |
| istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| const value exp = mklist<value>("float_add", 2, 3); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(5)); |
| } |
| if (true) return true; |
| { |
| istringstream is(dev == OpenCLContext::CPU? testFloatsCPU : testFloatsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| const value exp = mklist<value>("add", 2, 3); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::Number, 1, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(5)); |
| } |
| { |
| istringstream is(dev == OpenCLContext::CPU? testBoolsCPU : testBoolsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| const value exp = mklist<value>("int_or", true, false); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(true)); |
| |
| const value exp2 = mklist<value>("int_or", false, false); |
| const failable<value> r2 = evalKernel(createKernel(exp2, content(clprog)), exp2, cl); |
| assert(hasContent(r2)); |
| assert(content(r2) == value(false)); |
| } |
| { |
| istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| const value exp = mklist<value>("char128", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); |
| } |
| { |
| istringstream is(dev == OpenCLContext::CPU? testCharsCPU : testCharsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| const value exp = mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World ")); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 1, value::String, 128, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); |
| } |
| |
| return true; |
| } |
| |
| struct evalTaskParallelLoop { |
| evalTaskParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { |
| } |
| const bool operator()() const { |
| const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 1, value::String, 128, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); |
| return true; |
| } |
| |
| const OpenCLContext& cl; |
| const OpenCLProgram& clprog; |
| const value exp; |
| }; |
| |
| const bool testTaskParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { |
| gc_scoped_pool pool; |
| OpenCLContext cl(dev); |
| if (!devices(cl) != 0) |
| return true; |
| |
| istringstream is(dev == OpenCLContext::CPU? testCharsCPU : copy? testCharsCopyGPU : testCharsGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| resetOpenCLCounters(); |
| const lambda<bool()> el = evalTaskParallelLoop(cl, content(clprog)); |
| cout << "OpenCL task-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; |
| printOpenCLCounters(500); |
| cout << endl; |
| return true; |
| } |
| |
| bool testDataParallel(const OpenCLContext::DeviceType dev) { |
| gc_scoped_pool pool; |
| OpenCLContext cl(dev); |
| if (!devices(cl) != 0) |
| return true; |
| |
| { |
| istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : testCharsParallelGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| const value exp = mklist<value>("add", 6, string("Hello "), 5, string("World")); |
| const failable<value> r = evalKernel(createKernel(exp, content(clprog)), exp, 121, value::String, 128, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(string("Hello World"))); |
| } |
| |
| return true; |
| } |
| |
| struct evalDataParallelLoop { |
| evalDataParallelLoop(const OpenCLContext& cl, const OpenCLProgram& clprog) : cl(cl), clprog(clprog), exp(mklist<value>("add", 60, string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello "), 60, string("World World World World World World World World World World "))) { |
| } |
| const bool operator()() const { |
| const failable<value> r = evalKernel(createKernel(exp, clprog), exp, 121, value::String, 128, cl); |
| assert(hasContent(r)); |
| assert(content(r) == value(string("Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello World World World World World World World World World World "))); |
| return true; |
| } |
| |
| const OpenCLContext& cl; |
| const OpenCLProgram& clprog; |
| const value exp; |
| }; |
| |
| const bool testDataParallelPerf(const OpenCLContext::DeviceType dev, const bool copy) { |
| gc_scoped_pool pool; |
| OpenCLContext cl(dev); |
| if (!devices(cl) != 0) |
| return true; |
| |
| istringstream is(dev == OpenCLContext::CPU? testCharsParallelCPU : copy? testCharsParallelCopyGPU : testCharsParallelGPU); |
| failable<OpenCLProgram> clprog = readProgram("kernel.cl", is, cl); |
| assert(hasContent(clprog)); |
| |
| resetOpenCLCounters(); |
| const lambda<bool()> el = evalDataParallelLoop(cl, content(clprog)); |
| cout << "OpenCL data-parallel eval " << (dev == OpenCLContext::CPU? "CPU" : "GPU") << (copy? " copy" : "") << " test " << time(el, 5, 500) << " ms"; |
| printOpenCLCounters(500); |
| cout << endl; |
| return true; |
| } |
| |
| } |
| } |
| |
| int main() { |
| tuscany::cout << "Testing..." << tuscany::endl; |
| |
| tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::CPU); |
| tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); |
| tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::CPU); |
| tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::CPU, false); |
| |
| tuscany::opencl::testTaskParallel(tuscany::opencl::OpenCLContext::GPU); |
| tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); |
| tuscany::opencl::testTaskParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); |
| tuscany::opencl::testDataParallel(tuscany::opencl::OpenCLContext::GPU); |
| tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, false); |
| tuscany::opencl::testDataParallelPerf(tuscany::opencl::OpenCLContext::GPU, true); |
| |
| tuscany::cout << "OK" << tuscany::endl; |
| return 0; |
| } |