Files
rocm-systems/opencl/amdocl/cl_program.cpp
T

1986 строки
80 KiB
C++

//
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
//
#include "cl_common.hpp"
#include "vdi_common.hpp"
#include "platform/context.hpp"
#include "platform/program.hpp"
#include "platform/kernel.hpp"
#include "platform/sampler.hpp"
#include "cl_semaphore_amd.h"
#include <vector>
static amd::Program* createProgram(cl_context context, cl_uint num_devices,
const cl_device_id* device_list, cl_int* errcode_ret) {
// Create the program
amd::Program* program = new amd::Program(*as_amd(context));
if (program == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return NULL;
}
// Add programs for all devices in the context.
if (device_list == NULL) {
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
for (const auto& it : devices) {
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return NULL;
}
}
return program;
}
*not_null(errcode_ret) = CL_SUCCESS;
for (cl_uint i = 0; i < num_devices; ++i) {
cl_device_id device = device_list[i];
if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) {
*not_null(errcode_ret) = CL_INVALID_DEVICE;
program->release();
return NULL;
}
cl_int status = program->addDeviceProgram(*as_amd(device));
if (status == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return NULL;
}
}
return program;
}
/*! \addtogroup API
* @{
*
* \addtogroup CL_Programs
*
* An OpenCL program consists of a set of kernels that are identified as
* functions declared with the __kernel qualifier in the program source.
* OpenCL programs may also contain auxiliary functions and constant data that
* can be used by __kernel functions. The program executable can be generated
* online or offline by the OpenCL compiler for the appropriate
* target device(s).
*
* @{
*
* \addtogroup CL_CreatingPrograms
* @{
*/
/*! \brief Create a program object for a context, and loads the source code
* specified by the text strings in the strings array into the program object.
*
* \param context must be a valid OpenCL context.
*
* \param count is the number of pointers in \a strings
*
* \param strings is an array of \a count pointers to optionally
* null-terminated character strings that make up the source code.
*
* \param lengths is an array with the number of chars in each string (the
* string length). If an element in lengths is zero, its accompanying string
* is null-terminated. If lengths is NULL, all strings in the strings argument
* are considered null-terminated.
*
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
* is NULL, no error code is returned.
*
* \return A valid non-zero program object and errcode_ret is set to
* \a CL_SUCCESS if the program object is created successfully. It returns a
* NULL value with one of the following error values returned in
* \a errcode_ret:
* - CL_INVALID_CONTEXT if \a context is not a valid context.
* - CL_INVALID_VALUE if \a count is zero or if \a strings or any entry in
* \a strings is NULL.
* - CL_COMPILER_NOT_AVAILABLE if a compiler is not available.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the runtime.
*
* \version 1.0r33
*/
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithSource,
(cl_context context, cl_uint count, const char** strings, const size_t* lengths,
cl_int* errcode_ret)) {
if (!is_valid(context)) {
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
return (cl_program)0;
}
if (count == 0 || strings == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
std::string sourceCode;
for (cl_uint i = 0; i < count; ++i) {
if (strings[i] == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
if (lengths && lengths[i] != 0) {
sourceCode.append(strings[i], lengths[i]);
} else {
sourceCode.append(strings[i]);
}
}
if (sourceCode.empty()) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
// Create the program
amd::Program* program = new amd::Program(*as_amd(context), sourceCode, amd::Program::OpenCL_C);
if (program == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_program)0;
}
// Add programs for all devices in the context.
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
for (const auto& it : devices) {
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return (cl_program)0;
}
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(program);
}
RUNTIME_EXIT
/*! \brief Create a program object for a context, and loads the IL into the
* program object.
*
* \param context must be a valid OpenCL context.
*
* \param string is a pointer to IL.
*
* \param length is the size in bytes of IL.
*
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
* is NULL, no error code is returned.
*
* \return A valid non-zero program object and errcode_ret is set to
* \a CL_SUCCESS if the program object is created successfully. It returns a
* NULL value with one of the following error values returned in
* \a errcode_ret:
* - CL_INVALID_CONTEXT if \a context is not a valid context.
* - CL_INVALID_VALUE if \a il is NULL or \a length is zero.
* - CL_INVALID_VALUE if the \a length-byte memory pointed to by \a il does
* not contain well-formed intermediate language input appropriate for the
* deployment environment in which the OpenCL platform is running.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
* by the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
* required by the OpenCL implementation on the host.
*
* \version 1.0r33
*/
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithIL,
(cl_context context, const void* il, size_t length, cl_int* errcode_ret)) {
if (!is_valid(context)) {
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
return (cl_program)0;
}
if (length == 0 || il == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
// Create the program
amd::Program* program = new amd::Program(*as_amd(context), amd::Program::SPIRV);
if (program == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_program)0;
}
// Add programs for all devices in the context.
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
for (const auto& it : devices) {
if (program->addDeviceProgram(*it, il, length) == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return (cl_program)0;
}
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(program);
}
RUNTIME_EXIT
/*! \brief Create a program object for a context, and loads the binary images
* into the program object.
*
* \param context must be a valid OpenCL context.
*
* \param device_list is a pointer to a list of devices that are in context.
* \a device_list must be a non-NULL value. The binaries are loaded for devices
* specified in this list.
*
* \param num_devices is the number of devices listed in \a device_list.
*
* \param device_list The devices associated with the program object. The
* list of devices specified by \a device_list must be devices associated with
* \a context.
*
* \param lengths is an array of the size in bytes of the program binaries to
* be loaded for devices specified by \a device_list.
*
* \param binaries is an array of pointers to program binaries to be loaded
* for devices specified by \a device_list. For each device given by
* \a device_list[i], the pointer to the program binary for that device is
* given by \a binaries[i] and the length of this corresponding binary is given
* by \a lengths[i]. \a lengths[i] cannot be zero and \a binaries[i] cannot be
* a NULL pointer. The program binaries specified by binaries contain the bits
* that describe the program executable that will be run on the device(s)
* associated with context. The program binary can consist of either or both:
* - Device-specific executable(s)
* - Implementation specific intermediate representation (IR) which will be
* converted to the device-specific executable.
*
* \param binary_status returns whether the program binary for each device
* specified in \a device_list was loaded successfully or not. It is an array
* of \a num_devices entries and returns CL_SUCCESS in \a binary_status[i] if
* binary was successfully loaded for device specified by \a device_list[i];
* otherwise returns CL_INVALID_VALUE if \a lengths[i] is zero or if
* \a binaries[i] is a NULL value or CL_INVALID_BINARY in \a binary_status[i]
* if program binary is not a valid binary for the specified device.
* If \a binary_status is NULL, it is ignored.
*
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
* is NULL, no error code is returned.
*
* \return A valid non-zero program object and \a errcode_ret is set to
* CL_SUCCESS if the program object is created successfully. It returns a NULL
* value with one of the following error values returned in \a errcode_ret:
* - CL_INVALID_CONTEXT if \a context is not a valid context.
* - CL_INVALID_VALUE if \a device_list is NULL or \a num_devices is zero.
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
* the list of devices associated with \a context
* - CL_INVALID_VALUE if \a lengths or \a binaries are NULL or if any entry
* in \a lengths[i] is zero or \a binaries[i] is NULL.
* - CL_INVALID_BINARY if an invalid program binary was encountered for any
* device. \a binary_status will return specific status for each device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the runtime.
*
* \version 1.0r33
*/
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBinary,
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
const size_t* lengths, const unsigned char** binaries, cl_int* binary_status,
cl_int* errcode_ret)) {
if (!is_valid(context)) {
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
return (cl_program)0;
}
if (num_devices == 0 || device_list == NULL || binaries == NULL || lengths == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
amd::Program* program = new amd::Program(*as_amd(context));
if (program == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_program)0;
}
*not_null(errcode_ret) = CL_SUCCESS;
for (cl_uint i = 0; i < num_devices; ++i) {
cl_device_id device = device_list[i];
if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) {
*not_null(errcode_ret) = CL_INVALID_DEVICE;
program->release();
return (cl_program)0;
}
if (binaries[i] == NULL || lengths[i] == 0) {
if (binary_status != NULL) {
binary_status[i] = CL_INVALID_VALUE;
}
*not_null(errcode_ret) = CL_INVALID_VALUE;
continue;
}
cl_int status = program->addDeviceProgram(*as_amd(device), binaries[i], lengths[i]);
*not_null(errcode_ret) = status;
if (status == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return (cl_program)0;
}
if (binary_status != NULL) {
binary_status[i] = status;
}
}
return as_cl(program);
}
RUNTIME_EXIT
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithAssemblyAMD,
(cl_context context, cl_uint count, const char** strings, const size_t* lengths,
cl_int* errcode_ret)) {
if (!is_valid(context)) {
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
return (cl_program)0;
}
if (count == 0 || strings == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
std::string assembly;
for (cl_uint i = 0; i < count; ++i) {
if (strings[i] == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
if (lengths && lengths[i] != 0) {
assembly.append(strings[i], lengths[i]);
} else {
assembly.append(strings[i]);
}
}
if (assembly.empty()) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
// Create the program
amd::Program* program = new amd::Program(*as_amd(context), assembly, amd::Program::Assembly);
if (program == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_program)0;
}
// Add programs for all devices in the context.
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
for (const auto& it : devices) {
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
program->release();
return (cl_program)0;
}
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(program);
}
RUNTIME_EXIT
/*! \brief Increment the program reference count.
*
* clCreateProgram does an implicit retain.
*
* \return CL_SUCCESS if the function is executed successfully. It returns
* CL_INVALID_PROGRAM if \a program is not a valid program object.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clRetainProgram, (cl_program program)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
as_amd(program)->retain();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Decrement the program reference count.
*
* The program object is deleted after all kernel objects associated with
* \a program have been deleted and the program reference count becomes zero.
*
* \return CL_SUCCESS if the function is executed successfully. It returns
* CL_INVALID_PROGRAM if \a program is not a valid program object.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clReleaseProgram, (cl_program program)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
as_amd(program)->release();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! @}
* \addtogroup CL_Build
* @{
*/
/*! \brief Build (compile & link) a program executable from the program source
* or binary for all the devices or a specific device(s) in the OpenCL context
* associated with program.
*
* OpenCL allows program executables to be built using the sources or binaries.
*
* \param program is the program object.
*
* \param device_list is a pointer to a list of devices associated with
* \a program. If \a device_list is a NULL value, the program executable is
* built for all devices associated with \a program for which a source or
* binary has been loaded. If \a device_list is a non-NULL value, the program
* executable is built for devices specified in this list for which a source
* or binary has been loaded.
*
* \param num_devices is the number of devices listed in \a device_list.
*
* \param options is a pointer to a string that describes the build options to
* be used for building the program executable.
*
* \param pfn_notify is a function pointer to a notification routine. The
* notification routine allows an application to register a callback function
* which will be called when the program executable has been built
* (successfully or unsuccessfully). If \a pfn_notify is not NULL,
* clBuildProgram does not need to wait for the build to complete and can
* return immediately. If \a pfn_notify is NULL, clBuildProgram does not
* return until the build has completed. This callback function may be called
* asynchronously by the OpenCL implementation. It is the application's
* responsibility to ensure that the callback function is thread-safe.
*
* \param user_data will be passed as the argument when \a pfn_notify is
* called. \a user_data can be NULL.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully
* - CL_INVALID_PROGRAM if \a program is not a valid program object
* - CL_INVALID_VALUE if \a device_list is NULL and \a num_devices is greater
* than zero, or if \a device_list is not NULL and \a num_devices is zero,
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
* the list of devices associated with \a program
* - CL_INVALID_BINARY if \a program is created with clCreateWithProgramBinary
* and devices listed in \a device_list do not have a valid program binary
* loaded
* - CL_INVALID_BUILD_OPTIONS if the build options specified by \a options are
* invalid
* - CL_INVALID_OPERATION if the build of a program executable for any of the
* devices listed in \a device_list by a previous call to clBuildProgram for
* \a program has not completed
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the runtime.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clBuildProgram,
(cl_program program, cl_uint num_devices, const cl_device_id* device_list,
const char* options,
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
void* user_data)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
return CL_INVALID_VALUE;
}
amd::Program* amdProgram = as_amd(program);
if (device_list == NULL) {
// build for all devices in the context.
return amdProgram->build(amdProgram->context().devices(), options, pfn_notify, user_data);
}
std::vector<amd::Device*> devices(num_devices);
for (cl_uint i = 0; i < num_devices; ++i) {
amd::Device* device = as_amd(device_list[i]);
if (!amdProgram->context().containsDevice(device)) {
return CL_INVALID_DEVICE;
}
devices[i] = device;
}
return amdProgram->build(devices, options, pfn_notify, user_data);
}
RUNTIME_EXIT
/*! \brief compiles a program's source for all the devices or a specific
* device(s) in the OpenCL context associated with program. The pre-processor
* runs before the program sources are compiled.
* The compiled binary is built for all devices associated with program or
* the list of devices specified. The compiled binary can be queried using
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
* to \a clCreateProgramWithBinary to create a new program object.
*
* \param program is the program object that is the compilation target.
*
* \param device_list is a pointer to a list of devices associated with program.
* If device_list is a NULL value, the compile is performed for all devices
* associated with program. If device_list is a non-NULL value, the compile is
* performed for devices specified in this list.
*
* \param num_devices is the number of devices listed in \a device_list.
*
* \param options is a pointer to a null-terminated string of characters that
* describes the compilation options to be used for building the program
* executable. The list of supported options is as described in section 5.6.4.
*
* \param num_input_headers specifies the number of programs that describe
* headers in the array referenced by input_headers.
*
* \param input_headers is an array of program embedded headers created with
* \a clCreateProgramWithSource.
*
* \param header_include_names is an array that has a one to one correspondence
* with input_headers.
* Each entry in \a header_include_names specifies the include name used by
* source in program that comes from an embedded header. The corresponding entry
* in input_headers identifies the program object which contains the header
* source to be used. The embedded headers are first searched before the headers
* in the list of directories specified by the -I compile option (as described in
* section 5.6.4.1). If multiple entries in header_include_names refer to the same
* header name, the first one encountered will be used.
*
* \param pfn_notify is a function pointer to a notification routine. The
* notification routine is a callback function that an application can register
* and which will be called when the program executable has been built
* (successfully or unsuccessfully). If pfn_notify is not NULL,
* \a clCompileProgram does not need to wait for the compiler to complete and can
* return immediately. If \a pfn_notify is NULL, \a clCompileProgram does not
* return until the compiler has completed. This callback function may be called
* asynchronously by the OpenCL implementation. It is the application's
* responsibility to ensure that the callback function is thread-safe.
*
* \param user_data will be passed as an argument when pfn_notify is called.
* \a user_data can be NULL.
*
* \return CL_SUCCESS if the function is executed successfully. Otherwise, it
* returns one of the following errors:
* - CL_INVALID_PROGRAM if program is not a valid program object.
* - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than
* zero, or if \a device_list is not NULL and \a num_devices is zero.
* - CL_INVALID_VALUE if num_input_headers is zero and \a header_include_names
* or input_headers are not NULL or if num_input_headers is not zero and
* \a header_include_names or input_headers are NULL.
* - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL.
* - CL_INVALID_DEVICE if OpenCL devices listed in device_list are not in the
* list of devices associated with program
* - CL_INVALID_COMPILER_OPTIONS if the compiler options specified by options
* are invalid.
* - CL_INVALID_OPERATION if the compilation or build of a program executable
* for any of the devices listed in device_list by a previous call to
* \a clCompileProgram or \a clBuildProgram for program has not completed.
* - CL_COMPILER_NOT_AVAILABLE if a compiler is not available i.e.
* - CL_DEVICE_COMPILER_AVAILABLE specified in table 4.3 is set to CL_FALSE.
* - CL_COMPILE_PROGRAM_FAILURE if there is a failure to compile the program
* source. This error will be returned if clCompileProgram does not return
* until the compile has completed.
* - CL_INVALID_OPERATION if there are kernel objects attached to program.
* - CL_INVALID_OPERATION if program has no source i.e. it has not been created
* with \a clCreateProgramWithSource.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
* by the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the OpenCL implementation on the host.
*
* \version 1.2r07
*/
RUNTIME_ENTRY(cl_int, clCompileProgram,
(cl_program program, cl_uint num_devices, const cl_device_id* device_list,
const char* options, cl_uint num_input_headers, const cl_program* input_headers,
const char** header_include_names,
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
void* user_data)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
return CL_INVALID_VALUE;
}
if ((num_input_headers > 0 && (input_headers == NULL || header_include_names == NULL)) ||
(num_input_headers == 0 && (input_headers != NULL || header_include_names != NULL))) {
return CL_INVALID_VALUE;
}
if (pfn_notify == NULL && user_data != NULL) {
return CL_INVALID_VALUE;
}
amd::Program* amdProgram = as_amd(program);
if (amdProgram->referenceCount() > 1) {
return CL_INVALID_OPERATION;
}
std::vector<const amd::Program*> headerPrograms(num_input_headers);
for (cl_uint i = 0; i < num_input_headers; ++i) {
if (!is_valid(input_headers[i])) {
return CL_INVALID_OPERATION;
}
const amd::Program* headerProgram = as_amd(input_headers[i]);
headerPrograms[i] = headerProgram;
}
if (device_list == NULL) {
// compile for all devices in the context.
return amdProgram->compile(amdProgram->context().devices(), num_input_headers, headerPrograms,
header_include_names, options, pfn_notify, user_data);
}
std::vector<amd::Device*> devices(num_devices);
for (cl_uint i = 0; i < num_devices; ++i) {
amd::Device* device = as_amd(device_list[i]);
if (!amdProgram->context().containsDevice(device)) {
return CL_INVALID_DEVICE;
}
devices[i] = device;
}
return amdProgram->compile(devices, num_input_headers, headerPrograms, header_include_names,
options, pfn_notify, user_data);
}
RUNTIME_EXIT
/*! \brief links a set of compiled program objects and libraries for all
* the devices or a specific device(s) in the OpenCL context and creates
* an executable. clLinkProgram creates a new program object which contains
* this executable. The executable binary can be queried using
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
* to \a clCreateProgramWithBinary to create a new program object.
* The devices associated with the returned program object will be the list
* of devices specified by device_list or if device_list is NULL it will be
* the list of devices associated with context.
*
* \param context must be a valid OpenCL context.
*
* \param device_list is a pointer to a list of devices that are in context.
* If device_list is a NULL value, the link is performed for all devices
* associated with context for which a compiled object is available.
* If device_list is a non-NULL value, the compile is performed for devices
* specified in this list for which a source has been loaded.
*
* \param num_devices is the number of devices listed in device_list.
*
* \param options is a pointer to a null-terminated string of characters
* that describes the link options to be used for building the program
* executable. The list of supported options is as described in section 5.6.5.
*
* \param num_input_programs specifies the number of programs in array
* referenced by input_programs.
*
* \param input_programs is an array of program objects that are compiled
* binaries or libraries that are to be linked to create the program executable.
* For each device in device_list or if device_list is NULL the list of devices
* associated with context, the following cases occur:
* All programs specified by input_programs contain a compiled binary or
* library for the device. In this case, a link is performed to generate
* a program executable for this device. None of the programs contain
* a compiled binary or library for that device. In this case, no link is
* performed and there will be no program executable generated for this device.
* All other cases will return a CL_INVALID_OPERATION error.
*
* \param pfn_notify is a function pointer to a notification routine.
* The notification routine is a callback function that an application can
* register and which will be called when the program executable has been built
* (successfully or unsuccessfully). If \a pfn_notify is not NULL,
* \a clLinkProgram does not need to wait for the linker to complete and can
* return immediately. Once the linker has completed, the \a pfn_notify
* callback function is called with a valid program object (if the link was
* successful) or NULL (if the link encountered a failure). This callback
* function may be called asynchronously by the OpenCL implementation. It is
* the application's responsibility to ensure that the callback function is
* thread-safe. If \a pfn_notify is NULL, \a clLinkProgram does not return
* until the linker has completed. clLinkProgram returns a valid non-zero
* program object (if the link was successful) or NULL (if the link
* encountered a failure).
*
* \a user_data will be passed as an argument when \a pfn_notify is called.
* user_data can be NULL.
*
* \return a valid non-zero program object and errcode_ret is set to CL_SUCCESS
* if the link was successful in generating a program executable for at least
* one device and the program object was created successfully. If \a pfn_notify
* is not NULL, \a clLinkProgram returns a NULL program object and
* \a errcode_ret is set to CL_SUCCESS if the function was executed
* successfully. Otherwise, it returns one of the following errors:
* - CL_INVALID_CONTEXT if context is not a valid context.
* - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than
* zero, or if \a device_list is not NULL and \a num_devices is zero.
* - CL_INVALID_VALUE if \a num_input_programs is zero and \a input_programs
* is NULL or if \a num_input_programs is zero and \a input_programs is not
* NULL or if \a num_input_programs is not zero and \a input_programs is NULL.
* - CL_INVALID_PROGRAM if programs specified in \a input_programs are not
* valid program objects.
* - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL.
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
* the list of devices associated with context
* - CL_INVALID_LINKER_OPTIONS if the linker options specified by options are
* invalid.
* - CL_INVALID_OPERATION if the compilation or build of a program executable
* for any of the devices listed in \a device_list by a previous call to
* clCompileProgram or clBuildProgram for program has not completed.
* - CL_INVALID_OPERATION if the rules for devices containing compiled binaries
* or libraries as described in \a input_programs argument above are
* not followed.
* - CL_LINKER_NOT_AVAILABLE if a linker is not available i.e.
* - CL_DEVICE_LINKER_AVAILABLE specified in table 4.3 is set to CL_FALSE.
* - CL_LINK_PROGRAM_FAILURE if there is a failure to link the compiled
* binaries and/or libraries.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
* by the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the OpenCL implementation on the host.
*
* \version 1.2r07
*/
RUNTIME_ENTRY_RET(cl_program, clLinkProgram,
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
const char* options, cl_uint num_input_programs,
const cl_program* input_programs,
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
void* user_data, cl_int* errcode_ret)) {
if (!is_valid(context)) {
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
return (cl_program)0;
}
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
if (num_input_programs == 0 || input_programs == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
if (pfn_notify == NULL && user_data != NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_program)0;
}
std::vector<amd::Program*> inputPrograms(num_input_programs);
for (cl_uint i = 0; i < num_input_programs; ++i) {
if (!is_valid(input_programs[i])) {
*not_null(errcode_ret) = CL_INVALID_PROGRAM;
return (cl_program)0;
}
amd::Program* inputProgram = as_amd(input_programs[i]);
inputPrograms[i] = inputProgram;
}
amd::Program* program = createProgram(context, num_devices, device_list, errcode_ret);
if (program == NULL) return (cl_program)0;
*not_null(errcode_ret) = CL_SUCCESS;
cl_int status;
if (device_list == NULL) {
// compile for all devices in the context.
status = program->link(as_amd(context)->devices(), num_input_programs, inputPrograms, options,
pfn_notify, user_data);
} else {
std::vector<amd::Device*> devices(num_devices);
for (cl_uint i = 0; i < num_devices; ++i) {
amd::Device* device = as_amd(device_list[i]);
if (!as_amd(context)->containsDevice(device)) {
program->release();
*not_null(errcode_ret) = CL_INVALID_DEVICE;
return (cl_program)0;
}
devices[i] = device;
}
status =
program->link(devices, num_input_programs, inputPrograms, options, pfn_notify, user_data);
}
*not_null(errcode_ret) = status;
if (status == CL_SUCCESS) {
return as_cl(program);
}
program->release();
return (cl_program)0;
}
RUNTIME_EXIT
/*! \brief creates a program object for a context, and loads the information
* related to the built-in kernels into a program object.
*
* \param context must be a valid OpenCL context.
*
* \param num_devices is the number of devices listed in device_list.
*
* \param device_list is a pointer to a list of devices that are in context.
* \a device_list must be a non-NULL value. The built-in kernels are loaded
* for devices specified in this list. The devices associated with the
* program object will be the list of devices specified by \a device_list.
* The list of devices specified by \a device_list must be devices associated
* with context.
*
* \param kernel_names is a semi-colon separated list of built-in kernel names.
*
* \return a valid non-zero program object and \a errcode_ret is set to
* CL_SUCCESS if the program object is created successfully. Otherwise, it
* returns a NULL value with one of the following error values returned
* in errcode_ret:
* - CL_INVALID_CONTEXT if context is not a valid context.
* - CL_INVALID_VALUE if device_list is NULL or num_devices is zero.
* - CL_INVALID_VALUE if kernel_names is NULL or kernel_names contains a kernel
* name that is not supported by any of the devices in \a device_list.
* - CL_INVALID_DEVICE if devices listed in device_list are not in the list
* of devices associated with context.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
* by the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the OpenCL implementation on the host.
*
* \version 1.2r07
*/
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBuiltInKernels,
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
const char* kernel_names, cl_int* errcode_ret)) {
//!@todo Add implementation
amd::Program* program = NULL;
Unimplemented();
return as_cl(program);
}
RUNTIME_EXIT
/*! @}
* \addtogroup CL_Unloading
* @{
*/
/*! \brief Allows the implementation to release the resources allocated by
* the OpenCL compiler for platform. This is a hint from the application
* and does not guarantee that the compiler will not be used in the future
* or that the compiler will actually be unloaded by the implementation.
* Calls to \a clBuildProgram, \a clCompileProgram or \a clLinkProgram after
* \a clUnloadPlatformCompiler will reload the compiler,
* if necessary, to build the appropriate program executable.
*
* \return CL_SUCCESS if the function is executed successfully.
* Otherwise, it returns one of the following errors:
* - CL_INVALID_PLATFORM if platform is not a valid platform.
*
* \version 1.2r07
*/
RUNTIME_ENTRY(cl_int, clUnloadPlatformCompiler, (cl_platform_id platform)) {
if (platform != NULL && platform != AMD_PLATFORM) {
return CL_INVALID_PLATFORM;
}
//! @todo: Implement Compiler::unload()
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Allow to runtime to release the resources allocated by the OpenCL
* compiler.
*
* This is a hint from the application and does not guarantee that the compiler
* will not be used in the future or that the compiler will actually be
* unloaded by the implementation.
*
* Calls to clBuildProgram after clUnloadCompiler may reload the compiler,
* if necessary, to build the appropriate program executable.
*
* \return This call currently always returns CL_SUCCESS
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clUnloadCompiler, (void)) {
//! @todo: Implement Compiler::unload()
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! @}
* \addtogroup CL_ProgramQueries
* @{
*/
/*! \brief Return information about the program object.
*
* \param program specifies the program object being queried.
*
* \param param_name specifies the information to query.
*
* \param param_value is a pointer to memory where the appropriate result
* being queried is returned. If \a param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by \a param_value. This size must be >= size of return type.
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
* specified by \a param_value_size is < size of return type and
* \a param_value is not NULL
* - CL_INVALID_PROGRAM_EXECUTABLE if param_name is
* CL_PROGRAM_NUM_KERNELS or CL_PROGRAM_KERNEL_NAMES and a successful
* program executable has not been built for at least one device in the list
* of devices associated with program.
* - CL_INVALID_PROGRAM if \a program is a not a valid program object
*
* \version 1.2r07
*/
RUNTIME_ENTRY(cl_int, clGetProgramInfo,
(cl_program program, cl_program_info param_name, size_t param_value_size,
void* param_value, size_t* param_value_size_ret)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
switch (param_name) {
case CL_PROGRAM_REFERENCE_COUNT: {
cl_uint count = as_amd(program)->referenceCount();
return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_CONTEXT: {
cl_context context = const_cast<cl_context>(as_cl(&as_amd(program)->context()));
return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_NUM_DEVICES: {
cl_uint numDevices = (cl_uint)as_amd(program)->deviceList().size();
return amd::clGetInfo(numDevices, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_DEVICES: {
const amd::Program::devicelist_t& devices = as_amd(program)->deviceList();
const size_t numDevices = devices.size();
const size_t valueSize = numDevices * sizeof(cl_device_id);
if (param_value != NULL && param_value_size < valueSize) {
return CL_INVALID_VALUE;
}
*not_null(param_value_size_ret) = valueSize;
if (param_value != NULL) {
cl_device_id* device_list = (cl_device_id*)param_value;
for (const auto& it : devices) {
*device_list++ = const_cast<cl_device_id>(as_cl(it));
}
if (param_value_size > valueSize) {
::memset(static_cast<address>(param_value) + valueSize, '\0',
param_value_size - valueSize);
}
}
return CL_SUCCESS;
}
case CL_PROGRAM_SOURCE: {
const char* source = as_amd(program)->sourceCode().c_str();
return amd::clGetInfo(source, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_BINARY_SIZES: {
amd::Program* amdProgram = as_amd(program);
const amd::Program::devicelist_t& devices = amdProgram->deviceList();
const size_t numBinaries = devices.size();
const size_t valueSize = numBinaries * sizeof(size_t);
if (param_value != NULL && param_value_size < valueSize) {
return CL_INVALID_VALUE;
}
*not_null(param_value_size_ret) = valueSize;
if (param_value != NULL) {
size_t* binary_sizes = (size_t*)param_value;
for (const auto& it : devices) {
*binary_sizes++ = amdProgram->getDeviceProgram(*it)->binary().second;
}
if (param_value_size > valueSize) {
::memset(static_cast<address>(param_value) + valueSize, '\0',
param_value_size - valueSize);
}
}
return CL_SUCCESS;
}
case CL_PROGRAM_BINARIES: {
amd::Program* amdProgram = as_amd(program);
const amd::Program::devicelist_t& devices = amdProgram->deviceList();
const size_t numBinaries = devices.size();
const size_t valueSize = numBinaries * sizeof(char*);
if (param_value != NULL && param_value_size < valueSize) {
return CL_INVALID_VALUE;
}
*not_null(param_value_size_ret) = valueSize;
if (param_value != NULL) {
char** binaries = (char**)param_value;
for (const auto& it : devices) {
const device::Program::binary_t& binary = amdProgram->getDeviceProgram(*it)->binary();
// If an entry value in the array is NULL,
// then runtime should skip copying the program binary
if (*binaries != NULL) {
::memcpy(*binaries, binary.first, binary.second);
}
binaries++;
}
if (param_value_size > valueSize) {
::memset(static_cast<address>(param_value) + valueSize, '\0',
param_value_size - valueSize);
}
}
return CL_SUCCESS;
}
case CL_PROGRAM_NUM_KERNELS: {
if (as_amd(program)->symbolsPtr() == NULL) {
return CL_INVALID_PROGRAM_EXECUTABLE;
}
size_t numKernels = as_amd(program)->symbols().size();
return amd::clGetInfo(numKernels, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_KERNEL_NAMES: {
const char* kernelNames = as_amd(program)->kernelNames().c_str();
return amd::clGetInfo(kernelNames, param_value_size, param_value, param_value_size_ret);
}
default:
break;
}
return CL_INVALID_VALUE;
}
RUNTIME_EXIT
/*! \brief Return build information for each device in the program object.
*
* \param program specifies the program object being queried.
*
* \param device specifies the device for which build information is being
* queried. device must be a valid device associated with \a program.
*
* \param param_name specifies the information to query.
*
* \param param_value is a pointer to memory where the appropriate result being
* queried is returned. If \a param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by \a param_value. This size must be >= size of return type
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully
* - CL_INVALID_DEVICE if \a device is not in the list of devices associated
* with \a program
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
* specified by \a param_value_size is < size of return type and
* \a param_value is not NULL
* - CL_INVALID_PROGRAM if \a program is a not a valid program object
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clGetProgramBuildInfo,
(cl_program program, cl_device_id device, cl_program_build_info param_name,
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
if (!is_valid(device)) {
return CL_INVALID_DEVICE;
}
const device::Program* devProgram = as_amd(program)->getDeviceProgram(*as_amd(device));
if (devProgram == NULL) {
return CL_INVALID_DEVICE;
}
switch (param_name) {
case CL_PROGRAM_BUILD_STATUS: {
cl_build_status status = devProgram->buildStatus();
return amd::clGetInfo(status, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_BUILD_OPTIONS: {
const std::string optionsStr = devProgram->lastBuildOptionsArg();
const char* options = optionsStr.c_str();
return amd::clGetInfo(options, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_BUILD_LOG: {
const std::string logstr = as_amd(program)->programLog() + devProgram->buildLog().c_str();
const char* log = logstr.c_str();
return amd::clGetInfo(log, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_BINARY_TYPE: {
const device::Program::type_t devProgramType = devProgram->type();
cl_uint type;
switch (devProgramType) {
case device::Program::TYPE_NONE: {
type = CL_PROGRAM_BINARY_TYPE_NONE;
break;
}
case device::Program::TYPE_COMPILED: {
type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
break;
}
case device::Program::TYPE_LIBRARY: {
type = CL_PROGRAM_BINARY_TYPE_LIBRARY;
break;
}
case device::Program::TYPE_EXECUTABLE: {
type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
break;
}
case device::Program::TYPE_INTERMEDIATE: {
type = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE;
break;
}
default:
return CL_INVALID_VALUE;
}
return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret);
}
case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: {
size_t size = devProgram->globalVariableTotalSize();
return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret);
}
default:
break;
}
return CL_INVALID_VALUE;
}
RUNTIME_EXIT
/*! \brief Sets the values of a SPIR-V specialization constants.
*
* \param program must be a valid OpenCL program created from a SPIR-V module.
*
* \param spec id_ identifies the SPIR-V specialization constant whose value will be set.
*
* \param spec_size specifies the size in bytes of the data pointed to by spec_value. This should
* be 1 for boolean constants. For all other constant types this should match the size of the
* specialization constant in the SPIR-V module.
*
* \param spec_value is a pointer to the memory location that contains the value of the
* specialization constant. The data pointed to by \a spec_value are copied and can be safely
* reused by the application after \a clSetProgramSpecializationConstant returns. This
* specialization value will be used by subsequent calls to \a clBuildProgram until another call to
* \a clSetProgramSpecializationConstant changes it. If a specialization constant is a boolean
* constant, _spec value_should be a pointer to a cl_uchar value. A value of zero will set the
* specialization constant to false; any other value will set it to true.
*
* Calling this function multiple times for the same specialization constant shall cause the last
* provided value to override any previously specified value. The values are used by a subsequent
* \a clBuildProgram call for the program.
*
* Application is not required to provide values for every specialization constant contained in
* SPIR-V module. SPIR-V provides default values for all specialization constants.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_PROGRAM if program is not a valid program object created from a SPIR-V module.
* - CL_INVALID_SPEC_ID if spec_id is not a valid specialization constant ID
* - CL_INVALID_VALUE if spec_size does not match the size of the specialization constant in the
* SPIR-V module, or if spec_value is NULL.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
* implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL
* implementation on the host.
*
* \version 2.2-3
*/
RUNTIME_ENTRY(cl_int, clSetProgramSpecializationConstant,
(cl_program program, cl_uint spec_id, size_t spec_size, const void* spec_value)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
return CL_INVALID_VALUE;
}
RUNTIME_EXIT
/*! \brief registers a user callback function with a program object. Each call to
* \a clSetProgramReleaseCallback registers the specified user callback function on a callback stack
* associated with program. The registered user callback functions are called in the reverse order
* in which they were registered. The user callback functions are called after destructors (if any)
* for program scope global variables (if any) are called and before the program is released.
* This provides a mechanism for the application (and libraries) to be notified when destructors
* are complete.
*
* \param program is a valid program object
*
* \param pfn_notify is the callback function that can be registered by the application. This
* callback function may be called asynchronously by the OpenCL implementation. It is the
* application's responsibility to ensure that the callback function is thread safe. The parameters
* to this callback function are:
* - \a prog is the program object whose destructors are being called. When the user callback is
* called by the implementation, this program object is not longer valid. \a prog is only provided
* for reference purposes.
* - \a user_data is a pointer to user supplied data. \a user_data will be passed as the
* \a user_data argument when pfn_notify is called. user data can be NULL.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully.
* - CL_INVALID_PROGRAM if program is not a valid program object.
* - CL_INVALID_VALUE if pfn_notify is NULL.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
* implementation on the device.
*
* \version 2.2-3
*/
RUNTIME_ENTRY(cl_int, clSetProgramReleaseCallback,
(cl_program program, void (CL_CALLBACK *pfn_notify)(
cl_program program, void *user_data
), void *user_data)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
return CL_INVALID_VALUE;
}
RUNTIME_EXIT
/*! @}
* @}
*
* \addtogroup CL_Kernels
*
* A kernel is a function declared in a program. A kernel is identified by the
* __kernel qualifier applied to any function in a program. A kernel object
* encapsulates the specific __kernel function declared in a program and
* the argument values to be used when executing this __kernel function.
*
* @{
*
* \addtogroup CL_CreateKernel
* @{
*/
/*! \brief Create a kernel object.
*
* \param program is a program object with a successfully built executable.
*
* \param kernel_name is a function name in the program declared with the
* __kernel qualifier.
*
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
* is NULL, no error code is returned.
*
* \return A valid non-zero kernel object and \a errcode_ret is set to
* CL_SUCCESS if the kernel object is created successfully. It returns a NULL
* value with one of the following error values returned in \a errcode_ret:
* - CL_INVALID_PROGRAM if \a program is not a valid program object
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable
* for \a program.
* - CL_INVALID_KERNEL_NAME if \a kernel_name is not found in \a program.
* - CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel
* function given by \a kernel_name such as the number of arguments, the
* argument types are not the same for all devices for which the program
* executable has been built.
* - CL_INVALID_VALUE if \a kernel_name is NULL.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the runtime.
*
* \version 1.0r33
*/
RUNTIME_ENTRY_RET(cl_kernel, clCreateKernel,
(cl_program program, const char* kernel_name, cl_int* errcode_ret)) {
if (!is_valid(program)) {
*not_null(errcode_ret) = CL_INVALID_PROGRAM;
return (cl_kernel)0;
}
if (kernel_name == NULL) {
*not_null(errcode_ret) = CL_INVALID_VALUE;
return (cl_kernel)0;
}
/* FIXME_lmoriche, FIXME_spec: What are we supposed to do here?
* if (!as_amd(program)->containsOneSuccesfullyBuiltProgram())
* {
* *NotNull(errcode) = CL_INVALID_PROGRAM_EXECUTABLE;
* return (cl_kernel) 0;
* }
*/
amd::Program* amd_program = as_amd(program);
const amd::Symbol* symbol = amd_program->findSymbol(kernel_name);
if (symbol == NULL) {
*not_null(errcode_ret) = CL_INVALID_KERNEL_NAME;
return (cl_kernel)0;
}
amd::Kernel* kernel = new amd::Kernel(*amd_program, *symbol, kernel_name);
if (kernel == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_kernel)0;
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(kernel);
}
RUNTIME_EXIT
/*! \brief Create kernel objects for all kernel functions in program.
*
* Kernel objects may not be created for any __kernel functions in program
* that do not have the same function definition across all devices for which
* a program executable has been successfully built.
*
* \param program is a program object with a successfully built executable.
*
* \param num_kernels is the size of memory pointed to by \a kernels specified
* as the number of cl_kernel entries.
*
* \param kernels is the buffer where the kernel objects for kernels in
* \a program will be returned. If \a kernels is NULL, it is ignored.
* If \a kernels is not NULL, \a num_kernels must be greater than or equal
* to the number of kernels in program.
*
* \param num_kernels_ret is the number of kernels in program. If
* \a num_kernels_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the kernel objects were successfully allocated
* - CL_INVALID_PROGRAM if \a program is not a valid program object
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable
* for any device in \a program
* - CL_INVALID_VALUE if \a kernels is not NULL and \a num_kernels is less
* than the number of kernels in program
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the runtime.
*
* Kernel objects can only be created once you have a program object with a
* valid program source or binary loaded into the program object and the
* program executable has been successfully built for one or more devices
* associated with \a program. No changes to the program executable are
* allowed while there are kernel objects associated with a program object.
* This means that calls to clBuildProgram return CL_INVALID_OPERATION if there
* are kernel objects attached to a program object. The OpenCL context
* associated with program will be the context associated with kernel.
* Devices associated with a program object for which a valid program
* executable has been built can be used to execute kernels declared in the
* program object.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clCreateKernelsInProgram, (cl_program program, cl_uint num_kernels,
cl_kernel* kernels, cl_uint* num_kernels_ret)) {
if (!is_valid(program)) {
return CL_INVALID_PROGRAM;
}
cl_uint numKernels = (cl_uint)as_amd(program)->symbols().size();
if (kernels != NULL && num_kernels < numKernels) {
return CL_INVALID_VALUE;
}
*not_null(num_kernels_ret) = numKernels;
if (kernels == NULL) {
return CL_SUCCESS;
}
const amd::Program::symbols_t& symbols = as_amd(program)->symbols();
cl_kernel* result = kernels;
for (const auto& it : symbols) {
amd::Kernel* kernel = new amd::Kernel(*as_amd(program), it.second, it.first);
if (kernel == NULL) {
while (--result >= kernels) {
as_amd(*result)->release();
}
return CL_OUT_OF_HOST_MEMORY;
}
*result++ = as_cl(kernel);
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Increment the kernel reference count.
*
* \return CL_SUCCESS if the function is executed successfully. It returns
* CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
*
* clCreateKernel or clCreateKernelsInProgram do an implicit retain.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clRetainKernel, (cl_kernel kernel)) {
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
as_amd(kernel)->retain();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Decrement the kernel reference count.
*
* \return CL_SUCCESS if the function is executed successfully. It returns
* CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
*
* The kernel object is deleted once the number of instances that are retained
* to \a kernel become zero and after all queued execution instances of
* \a kernel have finished.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clReleaseKernel, (cl_kernel kernel)) {
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
as_amd(kernel)->release();
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Makes a shallow copy of the kernel object, its arguments and any
* information passed to the kernel object using \a clSetKernelExecInfo. If
* the kernel object was ready to be enqueued before copying it, the clone of
* the kernel object is ready to enqueue.
*
* \param source_kernel is a valid cl_kernel object that will be copied.
* source_kernel will not be modified in any way by this function.
*
* \param errcode_ret will be assigned an appropriate error code. If
* errcode_ret is NULL, no error code is returned.
*
* \return a valid non-zero kernel object and errcode_ret is set to
* CL_SUCCESS if the kernel is successfully copied. Otherwise it returns a
* NULL value with one of the following error values returned in errcode_ret:
* - CL_INVALID_KERNEL if kernel is not a valid kernel object.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
* by the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
* required by the OpenCL implementation on the host.
*
* \version 2.1r01
*/
RUNTIME_ENTRY_RET(cl_kernel, clCloneKernel,
(cl_kernel source_kernel, cl_int* errcode_ret)) {
if (!is_valid(source_kernel)) {
*not_null(errcode_ret) = CL_INVALID_KERNEL;
return (cl_kernel)0;
}
amd::Kernel* kernel = new amd::Kernel(*as_amd(source_kernel));
if (kernel == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_kernel)0;
}
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(kernel);
}
RUNTIME_EXIT
/*! @}
* \addtogroup CL_SettingArgs
* @{
*/
/*! \brief Set the argument value for a specific argument of a kernel.
*
* \param kernel is a valid kernel object.
*
* \param arg_index is the argument index. Arguments to the kernel are referred
* by indices that go from 0 for the leftmost argument to n - 1, where n is the
* total number of arguments declared by a kernel.
*
* \param arg_value is a pointer to data that should be used as the argument
* value for argument specified by \a arg_index. The argument data pointed to
* by \a arg_value is copied and the \a arg_value pointer can therefore be
* reused by the application after clSetKernelArg returns. If the argument is
* a memory object (buffer or image), the \a arg_value entry will be a pointer
* to the appropriate buffer or image object. The memory object must be created
* with the context associated with the kernel object. If the argument is
* declared with the __local qualifier, the \a arg_value entry must be NULL.
* For all other kernel arguments, the \a arg_value entry must be a pointer to
* the actual data to be used as argument value. The memory object specified
* as argument value must be a buffer object if the argument is declared to be
* a pointer of a built-in or user defined type with the __global or __constant
* qualifier. If the argument is declared with the __constant qualifier, the
* size in bytes of the memory object cannot exceed
* CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE and the number of arguments declared
* with the __constant qualifier cannot exceed CL_DEVICE_MAX_CONSTANT_ARGS. The
* memory object specified as argument value must be a 2D image object if the
* argument is declared to be of type image2d_t. The memory object specified as
* argument value must be a 3D image object if argument is declared to be of
* type image3d_t. If the argument is of type sampler_t, the arg_value entry
* must be a pointer to the sampler object.
*
* \param arg_size specifies the size of the argument value. If the argument is
* a memory object, the size is the size of the buffer or image object type.
* For arguments declared with the __local qualifier, the size specified will
* be the size in bytes of the buffer that must be allocated for the __local
* argument. If the argument is of type sampler_t, the arg_size value must be
* equal to sizeof(cl_sampler). For all other arguments, the size will be the
* size of argument type.
*
* \return One of the following values:
* - CL_SUCCESS if the function was executed successfully
* - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
* - CL_INVALID_ARG_INDEX if \a arg_index is not a valid argument index.
* - CL_INVALID_ARG_VALUE if \a arg_value specified is NULL for an argument
* that is not declared with the __local qualifier or vice-versa.
* - CL_INVALID_MEM_OBJECT for an argument declared to be a memory object but
* the specified \a arg_value is not a valid memory object.
* - CL_INVALID_SAMPLER for an argument declared to be of type sampler_t but
* the specified \a arg_value is not a valid sampler object.
* - CL_INVALID_ARG_SIZE if \a arg_size does not match the size of the data
* type for an argument that is not a memory object or if the argument is a
* memory object and \a arg_size != sizeof(cl_mem) or if \a arg_size is zero
* and the argument is declared with the __local qualifier or if the
* argument is a sampler and arg_size != sizeof(cl_sampler).
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clSetKernelArg,
(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void* arg_value)) {
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
const amd::KernelSignature& signature = as_amd(kernel)->signature();
if (arg_index >= signature.numParameters()) {
return CL_INVALID_ARG_INDEX;
}
const amd::KernelParameterDescriptor& desc = signature.at(arg_index);
const bool is_local = (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL);
if (((arg_value == NULL) && !is_local && (desc.type_ != T_POINTER)) ||
((arg_value != NULL) && is_local)) {
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
return CL_INVALID_ARG_VALUE;
}
if (!is_local && (desc.type_ == T_POINTER) && (arg_value != NULL)) {
cl_mem memObj = *static_cast<const cl_mem*>(arg_value);
amd::RuntimeObject* pObject = as_amd(memObj);
if (NULL != memObj && amd::RuntimeObject::ObjectTypeMemory != pObject->objectType()) {
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
return CL_INVALID_MEM_OBJECT;
}
} else if ((desc.type_ == T_SAMPLER) && !is_valid(*static_cast<const cl_sampler*>(arg_value))) {
return CL_INVALID_SAMPLER;
} else if (desc.type_ == T_QUEUE) {
cl_command_queue queue = *static_cast<const cl_command_queue*>(arg_value);
if (!is_valid(queue)) {
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
return CL_INVALID_DEVICE_QUEUE;
}
if (NULL == as_amd(queue)->asDeviceQueue()) {
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
return CL_INVALID_DEVICE_QUEUE;
}
}
if ((!is_local && (arg_size != desc.size_)) || (is_local && (arg_size == 0))) {
if (LP64_ONLY(true ||) ((desc.type_ != T_POINTER) && (desc.type_ != T_SAMPLER)) ||
(arg_size != sizeof(void*))) {
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
return CL_INVALID_ARG_SIZE;
}
}
as_amd(kernel)->parameters().set(static_cast<size_t>(arg_index), arg_size, arg_value);
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! @}
* \addtogroup CL_KernelQuery
* @{
*/
/*! \brief Return information about the kernel object.
*
* \param kernel specifies the kernel object being queried.
*
* \param param_name specifies the information to query.
*
* \param param_value is a pointer to memory where the appropriate result
* being queried is returned. If \a param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by \a param_value. This size must be >= size of return type.
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
* specified by \a param_value_size is < size of return type and
* \a param_value is not NULL
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
*
* \version 1.0r33
*/
RUNTIME_ENTRY(cl_int, clGetKernelInfo,
(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size,
void* param_value, size_t* param_value_size_ret)) {
// Check if we have a valid kernel
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
const amd::Kernel* amdKernel = as_amd(kernel);
// Get the corresponded parameters
switch (param_name) {
case CL_KERNEL_FUNCTION_NAME: {
const char* name = amdKernel->name().c_str();
// Return the kernel's name
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_NUM_ARGS: {
cl_uint numParam = static_cast<cl_uint>(amdKernel->signature().numParameters());
// Return the number of kernel's parameters
return amd::clGetInfo(numParam, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_REFERENCE_COUNT: {
cl_uint count = amdKernel->referenceCount();
// Return the reference counter
return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_CONTEXT: {
cl_context context = const_cast<cl_context>(as_cl(&amdKernel->program().context()));
// Return the context, associated with the program
return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_PROGRAM: {
cl_program program = const_cast<cl_program>(as_cl(&amdKernel->program()));
// Return the program, associated with the kernel
return amd::clGetInfo(program, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_ATTRIBUTES: {
const char* name = amdKernel->signature().attributes().c_str();
// Return the kernel attributes
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Returns information about the arguments of a kernel. Kernel
* argument information is only available if the program object associated
* with kernel is created with \a clCreateProgramWithSource and the program
* executable is built with the -cl-kernel-arg-info option specified in
* options argument to clBuildProgram or clCompileProgram.
*
* \param kernel specifies the kernel object being queried.
*
* \param param_name specifies the information to query.
*
* \param param_value is a pointer to memory where the appropriate result
* being queried is returned. If \a param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by \a param_value. This size must be >= size of return type.
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
* specified by \a param_value_size is < size of return type and
* \a param_value is not NULL
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
*
* \version 1.2r07
*/
RUNTIME_ENTRY(cl_int, clGetKernelArgInfo,
(cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name,
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
// Check if we have a valid kernel
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
amd::Kernel* amdKernel = as_amd(kernel);
const amd::KernelSignature& signature = amdKernel->signature();
if (arg_indx >= signature.numParameters()) {
return CL_INVALID_ARG_INDEX;
}
const amd::KernelParameterDescriptor& desc = signature.at(arg_indx);
// Get the corresponded parameters
switch (param_name) {
case CL_KERNEL_ARG_ADDRESS_QUALIFIER: {
cl_kernel_arg_address_qualifier qualifier = desc.addressQualifier_;
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_ARG_ACCESS_QUALIFIER: {
cl_kernel_arg_access_qualifier qualifier = desc.accessQualifier_;
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_ARG_TYPE_NAME: {
const char* typeName = desc.typeName_.c_str();
// Return the argument's type name
return amd::clGetInfo(typeName, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_ARG_TYPE_QUALIFIER: {
cl_kernel_arg_type_qualifier qualifier = desc.typeQualifier_;
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_ARG_NAME: {
const char* name = desc.name_.c_str();
// Return the argument's name
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Return information about the kernel object that may be specific
* to a device.
*
* \param kernel specifies the kernel object being queried.
*
* \param device identifies a specific device in the list of devices associated
* with \a kernel. The list of devices is the list of devices in the OpenCL
* context that is associated with \a kernel. If the list of devices associated
* with kernel is a single device, \a device can be a NULL value.
*
* \param param_name specifies the information to query
*
* \param param_value is a pointer to memory where the appropriate result being
* queried is returned. If \a param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by \a param_value. This size must be >= size of return type.
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
*
* \return One of the following values:
* - CL_SUCCESS if the function is executed successfully,
* - CL_INVALID_DEVICE if \a device is not in the list of devices associated
* with \a kernel or if \a device is NULL but there are more than one
* devices in the associated with \a kernel
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
* specified by \a param_value_size is < size of return type and
* \a param_value is not NULL
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
*
* \version 1.2r15
*/
RUNTIME_ENTRY(cl_int, clGetKernelWorkGroupInfo,
(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name,
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
// Check if we have a valid device
if (!is_valid(device)) {
return CL_INVALID_DEVICE;
}
// Check if we have a valid kernel
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
const amd::Device& amdDevice = *as_amd(device);
// Find the kernel, associated with the specified device
const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice);
// Make sure we found a valid kernel
if (devKernel == NULL) {
return CL_INVALID_KERNEL;
}
// Get the corresponded parameters
switch (param_name) {
case CL_KERNEL_WORK_GROUP_SIZE: {
// Return workgroup size
return amd::clGetInfo(devKernel->workGroupInfo()->size_, param_value_size, param_value,
param_value_size_ret);
}
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: {
// Return the compile workgroup size
return amd::clGetInfo(devKernel->workGroupInfo()->compileSize_, param_value_size, param_value,
param_value_size_ret);
}
case CL_KERNEL_LOCAL_MEM_SIZE: {
// Return the amount of used local memory
const size_t align = amdDevice.info().minDataTypeAlignSize_;
cl_ulong memSize = as_amd(kernel)->parameters().localMemSize(align) +
amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align);
return amd::clGetInfo(memSize, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
// Return the compile workgroup size
return amd::clGetInfo(devKernel->workGroupInfo()->preferredSizeMultiple_, param_value_size,
param_value, param_value_size_ret);
}
case CL_KERNEL_PRIVATE_MEM_SIZE: {
// Return the compile workgroup size
return amd::clGetInfo(devKernel->workGroupInfo()->privateMemSize_, param_value_size,
param_value, param_value_size_ret);
}
case CL_KERNEL_GLOBAL_WORK_SIZE: {
return CL_INVALID_VALUE;
}
case CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD: {
return amd::clGetInfo(amdDevice.info().maxSemaphoreSize_, param_value_size, param_value,
param_value_size_ret);
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! \brief Returns information about the kernel object.
*
* \param kernel specifies the kernel object being queried.
*
* \param device identifies a specific device in the list of devices associated
* with kernel. The list of devices is the list of devices in the OpenCL context
* that is associated with kernel. If the list of devices associated with kernel
* is a single device, device can be a NULL value.
*
* \param param_name specifies the information to query. The list of supported
* param_name types and the information returned in param_value by
* clGetKernelSubGroupInfo is described in the table below.
*
* \param input_value_size is used to specify the size in bytes of memory
* pointed to by input_value. This size must be == size of input type as
* described in the table below.
*
* \param input_value is a pointer to memory where the appropriate
* parameterization of the query is passed from. If input_value is NULL, it is
* ignored.
*
* \param param_value is a pointer to memory where the appropriate result being
* queried is returned. If param_value is NULL, it is ignored.
*
* \param param_value_size is used to specify the size in bytes of memory
* pointed to by param_value. This size must be >= size of return type as
* described in the table below.
*
* \param param_value_size_ret returns the actual size in bytes of data copied
* to param_value. If param_value_size_ret is NULL, it is ignored.
*
* \return CL_SUCCESS if the function is executed successfully.
* Otherwise, it returns one of the following errors:
*
* - CL_INVALID_DEVICE if device is not in the list of devices associated with
* kernel or if device is NULL but there is more than one device associated
* with kernel.
* - CL_INVALID_VALUE if param_name is not valid, or if size in bytes specified
* by param_value_size is < size of return type as described in the table
* above and param_value is not NULL.
* - CL_INVALID_VALUE if param_name is CL_KERNEL_SUB_GROUP_SIZE_FOR_NDRANGE and
* the size in bytes specified by input_value_size is not valid or if
* input_value is NULL.
* - CL_INVALID_KERNEL if kernel is a not a valid kernel object.
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
* the OpenCL implementation on the device.
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
* by the OpenCL implementation on the host.
*
* \version 2.0r12
*/
RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfo,
(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name,
size_t input_value_size, const void* input_value, size_t param_value_size,
void* param_value, size_t* param_value_size_ret)) {
// Check if we have a valid device
if (!is_valid(device)) {
return CL_INVALID_DEVICE;
}
// Check if we have a valid kernel
if (!is_valid(kernel)) {
return CL_INVALID_KERNEL;
}
const amd::Device& amdDevice = *as_amd(device);
// Find the kernel, associated with the specified device
const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice);
// Make sure we found a valid kernel
if (devKernel == NULL) {
return CL_INVALID_KERNEL;
}
// Get the corresponded parameters
switch (param_name) {
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: {
// Infer the number of dimensions from 'input_value_size'
size_t dims = input_value_size / sizeof(size_t);
if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) {
return CL_INVALID_VALUE;
}
// Get the linear workgroup size
size_t workGroupSize = ((size_t*)input_value)[0];
for (size_t i = 1; i < dims; ++i) {
workGroupSize *= ((size_t*)input_value)[i];
}
// Get the subgroup size. GPU devices sub-groups are wavefronts.
size_t subGroupSize = as_amd(device)->info().wavefrontWidth_;
size_t numSubGroups = (workGroupSize + subGroupSize - 1) / subGroupSize;
return amd::clGetInfo((param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR)
? subGroupSize
: numSubGroups,
param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: {
size_t numSubGroups = 0;
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_MAX_NUM_SUB_GROUPS: {
size_t waveSize = as_amd(device)->info().wavefrontWidth_;
size_t numSubGroups = (devKernel->workGroupInfo()->size_ + waveSize - 1) / waveSize;
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: {
if (input_value_size != sizeof(size_t)) {
return CL_INVALID_VALUE;
}
size_t numSubGroups = ((size_t*)input_value)[0];
// Infer the number of dimensions from 'param_value_size'
size_t dims = param_value_size / sizeof(size_t);
if (dims == 0 || dims > 3 || param_value_size != dims * sizeof(size_t)) {
return CL_INVALID_VALUE;
}
*not_null(param_value_size_ret) = param_value_size;
size_t localSize;
localSize = numSubGroups * as_amd(device)->info().wavefrontWidth_;
if (localSize > devKernel->workGroupInfo()->size_) {
::memset(param_value, '\0', dims * sizeof(size_t));
return CL_SUCCESS;
}
switch (dims) {
case 3:
((size_t*)param_value)[2] = 1;
case 2:
((size_t*)param_value)[1] = 1;
case 1:
((size_t*)param_value)[0] = localSize;
}
return CL_SUCCESS;
}
default:
return CL_INVALID_VALUE;
}
return CL_SUCCESS;
}
RUNTIME_EXIT
/*! @}
* @}
* @}
*/