mesa/src/gallium/state_trackers/clover/api/kernel.cpp
Francisco Jerez 369419f761 clover: Define a few convenience equality operators.
Tested-by: Tom Stellard <thomas.stellard@amd.com>
2013-10-21 10:47:03 -07:00

315 lines
8.5 KiB
C++

//
// Copyright 2012 Francisco Jerez
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include "api/util.hpp"
#include "core/kernel.hpp"
#include "core/event.hpp"
using namespace clover;
PUBLIC cl_kernel
clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
auto &prog = obj(d_prog);
if (!name)
throw error(CL_INVALID_VALUE);
if (prog.binaries().empty())
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
auto sym = prog.binaries().begin()->second.sym(name);
ret_error(r_errcode, CL_SUCCESS);
return new kernel(prog, name, range(sym.args));
} catch (module::noent_error &e) {
ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
return NULL;
} catch (error &e) {
ret_error(r_errcode, e);
return NULL;
}
PUBLIC cl_int
clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
cl_kernel *rd_kerns, cl_uint *r_count) try {
auto &prog = obj(d_prog);
if (prog.binaries().empty())
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
auto &syms = prog.binaries().begin()->second.syms;
if (rd_kerns && count < syms.size())
throw error(CL_INVALID_VALUE);
if (rd_kerns)
copy(map([&](const module::symbol &sym) {
return desc(new kernel(prog, compat::string(sym.name),
range(sym.args)));
}, syms),
rd_kerns);
if (r_count)
*r_count = syms.size();
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clRetainKernel(cl_kernel d_kern) try {
obj(d_kern).retain();
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clReleaseKernel(cl_kernel d_kern) try {
if (obj(d_kern).release())
delete pobj(d_kern);
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
const void *value) try {
auto &kern = obj(d_kern);
if (idx >= kern.args.size())
throw error(CL_INVALID_ARG_INDEX);
kern.args[idx]->set(size, value);
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
size_t size, void *r_buf, size_t *r_size) try {
property_buffer buf { r_buf, size, r_size };
auto &kern = obj(d_kern);
switch (param) {
case CL_KERNEL_FUNCTION_NAME:
buf.as_string() = kern.name();
break;
case CL_KERNEL_NUM_ARGS:
buf.as_scalar<cl_uint>() = kern.args.size();
break;
case CL_KERNEL_REFERENCE_COUNT:
buf.as_scalar<cl_uint>() = kern.ref_count();
break;
case CL_KERNEL_CONTEXT:
buf.as_scalar<cl_context>() = desc(kern.prog.ctx);
break;
case CL_KERNEL_PROGRAM:
buf.as_scalar<cl_program>() = desc(kern.prog);
break;
default:
throw error(CL_INVALID_VALUE);
}
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
cl_kernel_work_group_info param,
size_t size, void *r_buf, size_t *r_size) try {
property_buffer buf { r_buf, size, r_size };
auto &kern = obj(d_kern);
auto pdev = pobj(d_dev);
if ((!pdev && kern.prog.binaries().size() != 1) ||
(pdev && !kern.prog.binaries().count(pdev)))
throw error(CL_INVALID_DEVICE);
switch (param) {
case CL_KERNEL_WORK_GROUP_SIZE:
buf.as_scalar<size_t>() = kern.max_block_size();
break;
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
buf.as_vector<size_t>() = kern.block_size();
break;
case CL_KERNEL_LOCAL_MEM_SIZE:
buf.as_scalar<cl_ulong>() = kern.mem_local();
break;
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
buf.as_scalar<size_t>() = 1;
break;
case CL_KERNEL_PRIVATE_MEM_SIZE:
buf.as_scalar<cl_ulong>() = kern.mem_private();
break;
default:
throw error(CL_INVALID_VALUE);
}
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
namespace {
///
/// Common argument checking shared by kernel invocation commands.
///
void
validate_common(command_queue &q, kernel &kern,
const ref_vector<event> &deps) {
if (kern.prog.ctx != q.ctx ||
any_of([&](const event &ev) {
return ev.ctx != q.ctx;
}, deps))
throw error(CL_INVALID_CONTEXT);
if (any_of([](kernel::argument &arg) {
return !arg.set();
}, map(derefs(), kern.args)))
throw error(CL_INVALID_KERNEL_ARGS);
if (!kern.prog.binaries().count(&q.dev))
throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
void
validate_grid(command_queue &q, cl_uint dims,
const size_t *d_grid_size, const size_t *d_block_size) {
auto grid_size = range(d_grid_size, dims);
if (dims < 1 || dims > q.dev.max_block_size().size())
throw error(CL_INVALID_WORK_DIMENSION);
if (!d_grid_size || any_of(is_zero(), grid_size))
throw error(CL_INVALID_GLOBAL_WORK_SIZE);
if (d_block_size) {
auto block_size = range(d_block_size, dims);
if (any_of(is_zero(), block_size) ||
any_of(greater(), block_size, q.dev.max_block_size()))
throw error(CL_INVALID_WORK_ITEM_SIZE);
if (any_of(modulus(), grid_size, block_size))
throw error(CL_INVALID_WORK_GROUP_SIZE);
if (fold(multiplies(), 1u, block_size) >
q.dev.max_threads_per_block())
throw error(CL_INVALID_WORK_GROUP_SIZE);
}
}
std::vector<size_t>
pad_vector(const size_t *p, unsigned n, size_t x) {
if (p)
return { p, p + n };
else
return { n, x };
}
}
PUBLIC cl_int
clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
cl_uint dims, const size_t *d_grid_offset,
const size_t *d_grid_size, const size_t *d_block_size,
cl_uint num_deps, const cl_event *d_deps,
cl_event *rd_ev) try {
auto &q = obj(d_q);
auto &kern = obj(d_kern);
auto deps = objs<wait_list_tag>(d_deps, num_deps);
validate_common(q, kern, deps);
validate_grid(q, dims, d_grid_size, d_block_size);
auto grid_offset = pad_vector(d_grid_offset, dims, 0);
auto grid_size = pad_vector(d_grid_size, dims, 1);
auto block_size = pad_vector(d_block_size, dims, 1);
hard_event *hev = new hard_event(
q, CL_COMMAND_NDRANGE_KERNEL, deps,
[=, &kern, &q](event &) {
kern.launch(q, grid_offset, grid_size, block_size);
});
ret_object(rd_ev, hev);
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
cl_uint num_deps, const cl_event *d_deps,
cl_event *rd_ev) try {
auto &q = obj(d_q);
auto &kern = obj(d_kern);
auto deps = objs<wait_list_tag>(d_deps, num_deps);
validate_common(q, kern, deps);
hard_event *hev = new hard_event(
q, CL_COMMAND_TASK, deps,
[=, &kern, &q](event &) {
kern.launch(q, { 0 }, { 1 }, { 1 });
});
ret_object(rd_ev, hev);
return CL_SUCCESS;
} catch (error &e) {
return e.get();
}
PUBLIC cl_int
clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
void *args, size_t args_size,
cl_uint num_mems, const cl_mem *d_mems,
const void **mem_handles, cl_uint num_deps,
const cl_event *d_deps, cl_event *rd_ev) {
return CL_INVALID_OPERATION;
}