1986 строки
80 KiB
C++
1986 строки
80 KiB
C++
//
|
|
// Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved.
|
|
//
|
|
|
|
#include "cl_common.hpp"
|
|
#include "vdi_common.hpp"
|
|
#include "platform/context.hpp"
|
|
#include "platform/program.hpp"
|
|
#include "platform/kernel.hpp"
|
|
#include "platform/sampler.hpp"
|
|
#include "cl_semaphore_amd.h"
|
|
|
|
#include <vector>
|
|
|
|
static amd::Program* createProgram(cl_context context, cl_uint num_devices,
|
|
const cl_device_id* device_list, cl_int* errcode_ret) {
|
|
// Create the program
|
|
amd::Program* program = new amd::Program(*as_amd(context));
|
|
if (program == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return NULL;
|
|
}
|
|
|
|
// Add programs for all devices in the context.
|
|
if (device_list == NULL) {
|
|
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
|
|
for (const auto& it : devices) {
|
|
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return NULL;
|
|
}
|
|
}
|
|
return program;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
for (cl_uint i = 0; i < num_devices; ++i) {
|
|
cl_device_id device = device_list[i];
|
|
|
|
if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) {
|
|
*not_null(errcode_ret) = CL_INVALID_DEVICE;
|
|
program->release();
|
|
return NULL;
|
|
}
|
|
|
|
cl_int status = program->addDeviceProgram(*as_amd(device));
|
|
if (status == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return NULL;
|
|
}
|
|
}
|
|
return program;
|
|
}
|
|
|
|
/*! \addtogroup API
|
|
* @{
|
|
*
|
|
* \addtogroup CL_Programs
|
|
*
|
|
* An OpenCL program consists of a set of kernels that are identified as
|
|
* functions declared with the __kernel qualifier in the program source.
|
|
* OpenCL programs may also contain auxiliary functions and constant data that
|
|
* can be used by __kernel functions. The program executable can be generated
|
|
* online or offline by the OpenCL compiler for the appropriate
|
|
* target device(s).
|
|
*
|
|
* @{
|
|
*
|
|
* \addtogroup CL_CreatingPrograms
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Create a program object for a context, and loads the source code
|
|
* specified by the text strings in the strings array into the program object.
|
|
*
|
|
* \param context must be a valid OpenCL context.
|
|
*
|
|
* \param count is the number of pointers in \a strings
|
|
*
|
|
* \param strings is an array of \a count pointers to optionally
|
|
* null-terminated character strings that make up the source code.
|
|
*
|
|
* \param lengths is an array with the number of chars in each string (the
|
|
* string length). If an element in lengths is zero, its accompanying string
|
|
* is null-terminated. If lengths is NULL, all strings in the strings argument
|
|
* are considered null-terminated.
|
|
*
|
|
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
|
|
* is NULL, no error code is returned.
|
|
*
|
|
* \return A valid non-zero program object and errcode_ret is set to
|
|
* \a CL_SUCCESS if the program object is created successfully. It returns a
|
|
* NULL value with one of the following error values returned in
|
|
* \a errcode_ret:
|
|
* - CL_INVALID_CONTEXT if \a context is not a valid context.
|
|
* - CL_INVALID_VALUE if \a count is zero or if \a strings or any entry in
|
|
* \a strings is NULL.
|
|
* - CL_COMPILER_NOT_AVAILABLE if a compiler is not available.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithSource,
|
|
(cl_context context, cl_uint count, const char** strings, const size_t* lengths,
|
|
cl_int* errcode_ret)) {
|
|
if (!is_valid(context)) {
|
|
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
|
|
return (cl_program)0;
|
|
}
|
|
if (count == 0 || strings == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
std::string sourceCode;
|
|
for (cl_uint i = 0; i < count; ++i) {
|
|
if (strings[i] == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
if (lengths && lengths[i] != 0) {
|
|
sourceCode.append(strings[i], lengths[i]);
|
|
} else {
|
|
sourceCode.append(strings[i]);
|
|
}
|
|
}
|
|
if (sourceCode.empty()) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Create the program
|
|
amd::Program* program = new amd::Program(*as_amd(context), sourceCode, amd::Program::OpenCL_C);
|
|
if (program == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Add programs for all devices in the context.
|
|
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
|
|
for (const auto& it : devices) {
|
|
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(program);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Create a program object for a context, and loads the IL into the
|
|
* program object.
|
|
*
|
|
* \param context must be a valid OpenCL context.
|
|
*
|
|
* \param string is a pointer to IL.
|
|
*
|
|
* \param length is the size in bytes of IL.
|
|
*
|
|
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
|
|
* is NULL, no error code is returned.
|
|
*
|
|
* \return A valid non-zero program object and errcode_ret is set to
|
|
* \a CL_SUCCESS if the program object is created successfully. It returns a
|
|
* NULL value with one of the following error values returned in
|
|
* \a errcode_ret:
|
|
* - CL_INVALID_CONTEXT if \a context is not a valid context.
|
|
* - CL_INVALID_VALUE if \a il is NULL or \a length is zero.
|
|
* - CL_INVALID_VALUE if the \a length-byte memory pointed to by \a il does
|
|
* not contain well-formed intermediate language input appropriate for the
|
|
* deployment environment in which the OpenCL platform is running.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
|
|
* required by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithIL,
|
|
(cl_context context, const void* il, size_t length, cl_int* errcode_ret)) {
|
|
if (!is_valid(context)) {
|
|
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
|
|
return (cl_program)0;
|
|
}
|
|
if (length == 0 || il == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Create the program
|
|
amd::Program* program = new amd::Program(*as_amd(context), amd::Program::SPIRV);
|
|
if (program == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Add programs for all devices in the context.
|
|
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
|
|
for (const auto& it : devices) {
|
|
if (program->addDeviceProgram(*it, il, length) == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(program);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Create a program object for a context, and loads the binary images
|
|
* into the program object.
|
|
*
|
|
* \param context must be a valid OpenCL context.
|
|
*
|
|
* \param device_list is a pointer to a list of devices that are in context.
|
|
* \a device_list must be a non-NULL value. The binaries are loaded for devices
|
|
* specified in this list.
|
|
*
|
|
* \param num_devices is the number of devices listed in \a device_list.
|
|
*
|
|
* \param device_list The devices associated with the program object. The
|
|
* list of devices specified by \a device_list must be devices associated with
|
|
* \a context.
|
|
*
|
|
* \param lengths is an array of the size in bytes of the program binaries to
|
|
* be loaded for devices specified by \a device_list.
|
|
*
|
|
* \param binaries is an array of pointers to program binaries to be loaded
|
|
* for devices specified by \a device_list. For each device given by
|
|
* \a device_list[i], the pointer to the program binary for that device is
|
|
* given by \a binaries[i] and the length of this corresponding binary is given
|
|
* by \a lengths[i]. \a lengths[i] cannot be zero and \a binaries[i] cannot be
|
|
* a NULL pointer. The program binaries specified by binaries contain the bits
|
|
* that describe the program executable that will be run on the device(s)
|
|
* associated with context. The program binary can consist of either or both:
|
|
* - Device-specific executable(s)
|
|
* - Implementation specific intermediate representation (IR) which will be
|
|
* converted to the device-specific executable.
|
|
*
|
|
* \param binary_status returns whether the program binary for each device
|
|
* specified in \a device_list was loaded successfully or not. It is an array
|
|
* of \a num_devices entries and returns CL_SUCCESS in \a binary_status[i] if
|
|
* binary was successfully loaded for device specified by \a device_list[i];
|
|
* otherwise returns CL_INVALID_VALUE if \a lengths[i] is zero or if
|
|
* \a binaries[i] is a NULL value or CL_INVALID_BINARY in \a binary_status[i]
|
|
* if program binary is not a valid binary for the specified device.
|
|
* If \a binary_status is NULL, it is ignored.
|
|
*
|
|
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
|
|
* is NULL, no error code is returned.
|
|
*
|
|
* \return A valid non-zero program object and \a errcode_ret is set to
|
|
* CL_SUCCESS if the program object is created successfully. It returns a NULL
|
|
* value with one of the following error values returned in \a errcode_ret:
|
|
* - CL_INVALID_CONTEXT if \a context is not a valid context.
|
|
* - CL_INVALID_VALUE if \a device_list is NULL or \a num_devices is zero.
|
|
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
|
|
* the list of devices associated with \a context
|
|
* - CL_INVALID_VALUE if \a lengths or \a binaries are NULL or if any entry
|
|
* in \a lengths[i] is zero or \a binaries[i] is NULL.
|
|
* - CL_INVALID_BINARY if an invalid program binary was encountered for any
|
|
* device. \a binary_status will return specific status for each device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBinary,
|
|
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
|
|
const size_t* lengths, const unsigned char** binaries, cl_int* binary_status,
|
|
cl_int* errcode_ret)) {
|
|
if (!is_valid(context)) {
|
|
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
|
|
return (cl_program)0;
|
|
}
|
|
if (num_devices == 0 || device_list == NULL || binaries == NULL || lengths == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
amd::Program* program = new amd::Program(*as_amd(context));
|
|
if (program == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
for (cl_uint i = 0; i < num_devices; ++i) {
|
|
cl_device_id device = device_list[i];
|
|
|
|
if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) {
|
|
*not_null(errcode_ret) = CL_INVALID_DEVICE;
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
if (binaries[i] == NULL || lengths[i] == 0) {
|
|
if (binary_status != NULL) {
|
|
binary_status[i] = CL_INVALID_VALUE;
|
|
}
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
continue;
|
|
}
|
|
|
|
cl_int status = program->addDeviceProgram(*as_amd(device), binaries[i], lengths[i]);
|
|
|
|
*not_null(errcode_ret) = status;
|
|
|
|
if (status == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
|
|
if (binary_status != NULL) {
|
|
binary_status[i] = status;
|
|
}
|
|
}
|
|
return as_cl(program);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithAssemblyAMD,
|
|
(cl_context context, cl_uint count, const char** strings, const size_t* lengths,
|
|
cl_int* errcode_ret)) {
|
|
if (!is_valid(context)) {
|
|
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
|
|
return (cl_program)0;
|
|
}
|
|
if (count == 0 || strings == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
std::string assembly;
|
|
for (cl_uint i = 0; i < count; ++i) {
|
|
if (strings[i] == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
if (lengths && lengths[i] != 0) {
|
|
assembly.append(strings[i], lengths[i]);
|
|
} else {
|
|
assembly.append(strings[i]);
|
|
}
|
|
}
|
|
if (assembly.empty()) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Create the program
|
|
amd::Program* program = new amd::Program(*as_amd(context), assembly, amd::Program::Assembly);
|
|
if (program == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
// Add programs for all devices in the context.
|
|
const std::vector<amd::Device*>& devices = as_amd(context)->devices();
|
|
for (const auto& it : devices) {
|
|
if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(program);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Increment the program reference count.
|
|
*
|
|
* clCreateProgram does an implicit retain.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully. It returns
|
|
* CL_INVALID_PROGRAM if \a program is not a valid program object.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clRetainProgram, (cl_program program)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
as_amd(program)->retain();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Decrement the program reference count.
|
|
*
|
|
* The program object is deleted after all kernel objects associated with
|
|
* \a program have been deleted and the program reference count becomes zero.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully. It returns
|
|
* CL_INVALID_PROGRAM if \a program is not a valid program object.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clReleaseProgram, (cl_program program)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
as_amd(program)->release();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_Build
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Build (compile & link) a program executable from the program source
|
|
* or binary for all the devices or a specific device(s) in the OpenCL context
|
|
* associated with program.
|
|
*
|
|
* OpenCL allows program executables to be built using the sources or binaries.
|
|
*
|
|
* \param program is the program object.
|
|
*
|
|
* \param device_list is a pointer to a list of devices associated with
|
|
* \a program. If \a device_list is a NULL value, the program executable is
|
|
* built for all devices associated with \a program for which a source or
|
|
* binary has been loaded. If \a device_list is a non-NULL value, the program
|
|
* executable is built for devices specified in this list for which a source
|
|
* or binary has been loaded.
|
|
*
|
|
* \param num_devices is the number of devices listed in \a device_list.
|
|
*
|
|
* \param options is a pointer to a string that describes the build options to
|
|
* be used for building the program executable.
|
|
*
|
|
* \param pfn_notify is a function pointer to a notification routine. The
|
|
* notification routine allows an application to register a callback function
|
|
* which will be called when the program executable has been built
|
|
* (successfully or unsuccessfully). If \a pfn_notify is not NULL,
|
|
* clBuildProgram does not need to wait for the build to complete and can
|
|
* return immediately. If \a pfn_notify is NULL, clBuildProgram does not
|
|
* return until the build has completed. This callback function may be called
|
|
* asynchronously by the OpenCL implementation. It is the application's
|
|
* responsibility to ensure that the callback function is thread-safe.
|
|
*
|
|
* \param user_data will be passed as the argument when \a pfn_notify is
|
|
* called. \a user_data can be NULL.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully
|
|
* - CL_INVALID_PROGRAM if \a program is not a valid program object
|
|
* - CL_INVALID_VALUE if \a device_list is NULL and \a num_devices is greater
|
|
* than zero, or if \a device_list is not NULL and \a num_devices is zero,
|
|
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
|
|
* the list of devices associated with \a program
|
|
* - CL_INVALID_BINARY if \a program is created with clCreateWithProgramBinary
|
|
* and devices listed in \a device_list do not have a valid program binary
|
|
* loaded
|
|
* - CL_INVALID_BUILD_OPTIONS if the build options specified by \a options are
|
|
* invalid
|
|
* - CL_INVALID_OPERATION if the build of a program executable for any of the
|
|
* devices listed in \a device_list by a previous call to clBuildProgram for
|
|
* \a program has not completed
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clBuildProgram,
|
|
(cl_program program, cl_uint num_devices, const cl_device_id* device_list,
|
|
const char* options,
|
|
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
|
|
void* user_data)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
amd::Program* amdProgram = as_amd(program);
|
|
|
|
if (device_list == NULL) {
|
|
// build for all devices in the context.
|
|
return amdProgram->build(amdProgram->context().devices(), options, pfn_notify, user_data);
|
|
}
|
|
|
|
std::vector<amd::Device*> devices(num_devices);
|
|
for (cl_uint i = 0; i < num_devices; ++i) {
|
|
amd::Device* device = as_amd(device_list[i]);
|
|
if (!amdProgram->context().containsDevice(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
devices[i] = device;
|
|
}
|
|
return amdProgram->build(devices, options, pfn_notify, user_data);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief compiles a program's source for all the devices or a specific
|
|
* device(s) in the OpenCL context associated with program. The pre-processor
|
|
* runs before the program sources are compiled.
|
|
* The compiled binary is built for all devices associated with program or
|
|
* the list of devices specified. The compiled binary can be queried using
|
|
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
|
|
* to \a clCreateProgramWithBinary to create a new program object.
|
|
*
|
|
* \param program is the program object that is the compilation target.
|
|
*
|
|
* \param device_list is a pointer to a list of devices associated with program.
|
|
* If device_list is a NULL value, the compile is performed for all devices
|
|
* associated with program. If device_list is a non-NULL value, the compile is
|
|
* performed for devices specified in this list.
|
|
*
|
|
* \param num_devices is the number of devices listed in \a device_list.
|
|
*
|
|
* \param options is a pointer to a null-terminated string of characters that
|
|
* describes the compilation options to be used for building the program
|
|
* executable. The list of supported options is as described in section 5.6.4.
|
|
*
|
|
* \param num_input_headers specifies the number of programs that describe
|
|
* headers in the array referenced by input_headers.
|
|
*
|
|
* \param input_headers is an array of program embedded headers created with
|
|
* \a clCreateProgramWithSource.
|
|
*
|
|
* \param header_include_names is an array that has a one to one correspondence
|
|
* with input_headers.
|
|
* Each entry in \a header_include_names specifies the include name used by
|
|
* source in program that comes from an embedded header. The corresponding entry
|
|
* in input_headers identifies the program object which contains the header
|
|
* source to be used. The embedded headers are first searched before the headers
|
|
* in the list of directories specified by the -I compile option (as described in
|
|
* section 5.6.4.1). If multiple entries in header_include_names refer to the same
|
|
* header name, the first one encountered will be used.
|
|
*
|
|
* \param pfn_notify is a function pointer to a notification routine. The
|
|
* notification routine is a callback function that an application can register
|
|
* and which will be called when the program executable has been built
|
|
* (successfully or unsuccessfully). If pfn_notify is not NULL,
|
|
* \a clCompileProgram does not need to wait for the compiler to complete and can
|
|
* return immediately. If \a pfn_notify is NULL, \a clCompileProgram does not
|
|
* return until the compiler has completed. This callback function may be called
|
|
* asynchronously by the OpenCL implementation. It is the application's
|
|
* responsibility to ensure that the callback function is thread-safe.
|
|
*
|
|
* \param user_data will be passed as an argument when pfn_notify is called.
|
|
* \a user_data can be NULL.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully. Otherwise, it
|
|
* returns one of the following errors:
|
|
* - CL_INVALID_PROGRAM if program is not a valid program object.
|
|
* - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than
|
|
* zero, or if \a device_list is not NULL and \a num_devices is zero.
|
|
* - CL_INVALID_VALUE if num_input_headers is zero and \a header_include_names
|
|
* or input_headers are not NULL or if num_input_headers is not zero and
|
|
* \a header_include_names or input_headers are NULL.
|
|
* - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL.
|
|
* - CL_INVALID_DEVICE if OpenCL devices listed in device_list are not in the
|
|
* list of devices associated with program
|
|
* - CL_INVALID_COMPILER_OPTIONS if the compiler options specified by options
|
|
* are invalid.
|
|
* - CL_INVALID_OPERATION if the compilation or build of a program executable
|
|
* for any of the devices listed in device_list by a previous call to
|
|
* \a clCompileProgram or \a clBuildProgram for program has not completed.
|
|
* - CL_COMPILER_NOT_AVAILABLE if a compiler is not available i.e.
|
|
* - CL_DEVICE_COMPILER_AVAILABLE specified in table 4.3 is set to CL_FALSE.
|
|
* - CL_COMPILE_PROGRAM_FAILURE if there is a failure to compile the program
|
|
* source. This error will be returned if clCompileProgram does not return
|
|
* until the compile has completed.
|
|
* - CL_INVALID_OPERATION if there are kernel objects attached to program.
|
|
* - CL_INVALID_OPERATION if program has no source i.e. it has not been created
|
|
* with \a clCreateProgramWithSource.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clCompileProgram,
|
|
(cl_program program, cl_uint num_devices, const cl_device_id* device_list,
|
|
const char* options, cl_uint num_input_headers, const cl_program* input_headers,
|
|
const char** header_include_names,
|
|
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
|
|
void* user_data)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
if ((num_input_headers > 0 && (input_headers == NULL || header_include_names == NULL)) ||
|
|
(num_input_headers == 0 && (input_headers != NULL || header_include_names != NULL))) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
if (pfn_notify == NULL && user_data != NULL) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
amd::Program* amdProgram = as_amd(program);
|
|
if (amdProgram->referenceCount() > 1) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
|
|
std::vector<const amd::Program*> headerPrograms(num_input_headers);
|
|
for (cl_uint i = 0; i < num_input_headers; ++i) {
|
|
if (!is_valid(input_headers[i])) {
|
|
return CL_INVALID_OPERATION;
|
|
}
|
|
const amd::Program* headerProgram = as_amd(input_headers[i]);
|
|
headerPrograms[i] = headerProgram;
|
|
}
|
|
|
|
if (device_list == NULL) {
|
|
// compile for all devices in the context.
|
|
return amdProgram->compile(amdProgram->context().devices(), num_input_headers, headerPrograms,
|
|
header_include_names, options, pfn_notify, user_data);
|
|
}
|
|
|
|
std::vector<amd::Device*> devices(num_devices);
|
|
|
|
for (cl_uint i = 0; i < num_devices; ++i) {
|
|
amd::Device* device = as_amd(device_list[i]);
|
|
if (!amdProgram->context().containsDevice(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
devices[i] = device;
|
|
}
|
|
|
|
return amdProgram->compile(devices, num_input_headers, headerPrograms, header_include_names,
|
|
options, pfn_notify, user_data);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief links a set of compiled program objects and libraries for all
|
|
* the devices or a specific device(s) in the OpenCL context and creates
|
|
* an executable. clLinkProgram creates a new program object which contains
|
|
* this executable. The executable binary can be queried using
|
|
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
|
|
* to \a clCreateProgramWithBinary to create a new program object.
|
|
* The devices associated with the returned program object will be the list
|
|
* of devices specified by device_list or if device_list is NULL it will be
|
|
* the list of devices associated with context.
|
|
*
|
|
* \param context must be a valid OpenCL context.
|
|
*
|
|
* \param device_list is a pointer to a list of devices that are in context.
|
|
* If device_list is a NULL value, the link is performed for all devices
|
|
* associated with context for which a compiled object is available.
|
|
* If device_list is a non-NULL value, the compile is performed for devices
|
|
* specified in this list for which a source has been loaded.
|
|
*
|
|
* \param num_devices is the number of devices listed in device_list.
|
|
*
|
|
* \param options is a pointer to a null-terminated string of characters
|
|
* that describes the link options to be used for building the program
|
|
* executable. The list of supported options is as described in section 5.6.5.
|
|
*
|
|
* \param num_input_programs specifies the number of programs in array
|
|
* referenced by input_programs.
|
|
*
|
|
* \param input_programs is an array of program objects that are compiled
|
|
* binaries or libraries that are to be linked to create the program executable.
|
|
* For each device in device_list or if device_list is NULL the list of devices
|
|
* associated with context, the following cases occur:
|
|
* All programs specified by input_programs contain a compiled binary or
|
|
* library for the device. In this case, a link is performed to generate
|
|
* a program executable for this device. None of the programs contain
|
|
* a compiled binary or library for that device. In this case, no link is
|
|
* performed and there will be no program executable generated for this device.
|
|
* All other cases will return a CL_INVALID_OPERATION error.
|
|
*
|
|
* \param pfn_notify is a function pointer to a notification routine.
|
|
* The notification routine is a callback function that an application can
|
|
* register and which will be called when the program executable has been built
|
|
* (successfully or unsuccessfully). If \a pfn_notify is not NULL,
|
|
* \a clLinkProgram does not need to wait for the linker to complete and can
|
|
* return immediately. Once the linker has completed, the \a pfn_notify
|
|
* callback function is called with a valid program object (if the link was
|
|
* successful) or NULL (if the link encountered a failure). This callback
|
|
* function may be called asynchronously by the OpenCL implementation. It is
|
|
* the application's responsibility to ensure that the callback function is
|
|
* thread-safe. If \a pfn_notify is NULL, \a clLinkProgram does not return
|
|
* until the linker has completed. clLinkProgram returns a valid non-zero
|
|
* program object (if the link was successful) or NULL (if the link
|
|
* encountered a failure).
|
|
*
|
|
* \a user_data will be passed as an argument when \a pfn_notify is called.
|
|
* user_data can be NULL.
|
|
*
|
|
* \return a valid non-zero program object and errcode_ret is set to CL_SUCCESS
|
|
* if the link was successful in generating a program executable for at least
|
|
* one device and the program object was created successfully. If \a pfn_notify
|
|
* is not NULL, \a clLinkProgram returns a NULL program object and
|
|
* \a errcode_ret is set to CL_SUCCESS if the function was executed
|
|
* successfully. Otherwise, it returns one of the following errors:
|
|
* - CL_INVALID_CONTEXT if context is not a valid context.
|
|
* - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than
|
|
* zero, or if \a device_list is not NULL and \a num_devices is zero.
|
|
* - CL_INVALID_VALUE if \a num_input_programs is zero and \a input_programs
|
|
* is NULL or if \a num_input_programs is zero and \a input_programs is not
|
|
* NULL or if \a num_input_programs is not zero and \a input_programs is NULL.
|
|
* - CL_INVALID_PROGRAM if programs specified in \a input_programs are not
|
|
* valid program objects.
|
|
* - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL.
|
|
* - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in
|
|
* the list of devices associated with context
|
|
* - CL_INVALID_LINKER_OPTIONS if the linker options specified by options are
|
|
* invalid.
|
|
* - CL_INVALID_OPERATION if the compilation or build of a program executable
|
|
* for any of the devices listed in \a device_list by a previous call to
|
|
* clCompileProgram or clBuildProgram for program has not completed.
|
|
* - CL_INVALID_OPERATION if the rules for devices containing compiled binaries
|
|
* or libraries as described in \a input_programs argument above are
|
|
* not followed.
|
|
* - CL_LINKER_NOT_AVAILABLE if a linker is not available i.e.
|
|
* - CL_DEVICE_LINKER_AVAILABLE specified in table 4.3 is set to CL_FALSE.
|
|
* - CL_LINK_PROGRAM_FAILURE if there is a failure to link the compiled
|
|
* binaries and/or libraries.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_program, clLinkProgram,
|
|
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
|
|
const char* options, cl_uint num_input_programs,
|
|
const cl_program* input_programs,
|
|
void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data),
|
|
void* user_data, cl_int* errcode_ret)) {
|
|
if (!is_valid(context)) {
|
|
*not_null(errcode_ret) = CL_INVALID_CONTEXT;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
if (num_input_programs == 0 || input_programs == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
if (pfn_notify == NULL && user_data != NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_program)0;
|
|
}
|
|
|
|
std::vector<amd::Program*> inputPrograms(num_input_programs);
|
|
for (cl_uint i = 0; i < num_input_programs; ++i) {
|
|
if (!is_valid(input_programs[i])) {
|
|
*not_null(errcode_ret) = CL_INVALID_PROGRAM;
|
|
return (cl_program)0;
|
|
}
|
|
amd::Program* inputProgram = as_amd(input_programs[i]);
|
|
inputPrograms[i] = inputProgram;
|
|
}
|
|
|
|
amd::Program* program = createProgram(context, num_devices, device_list, errcode_ret);
|
|
if (program == NULL) return (cl_program)0;
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
cl_int status;
|
|
|
|
if (device_list == NULL) {
|
|
// compile for all devices in the context.
|
|
status = program->link(as_amd(context)->devices(), num_input_programs, inputPrograms, options,
|
|
pfn_notify, user_data);
|
|
} else {
|
|
std::vector<amd::Device*> devices(num_devices);
|
|
|
|
for (cl_uint i = 0; i < num_devices; ++i) {
|
|
amd::Device* device = as_amd(device_list[i]);
|
|
if (!as_amd(context)->containsDevice(device)) {
|
|
program->release();
|
|
*not_null(errcode_ret) = CL_INVALID_DEVICE;
|
|
return (cl_program)0;
|
|
}
|
|
devices[i] = device;
|
|
}
|
|
|
|
status =
|
|
program->link(devices, num_input_programs, inputPrograms, options, pfn_notify, user_data);
|
|
}
|
|
*not_null(errcode_ret) = status;
|
|
if (status == CL_SUCCESS) {
|
|
return as_cl(program);
|
|
}
|
|
|
|
program->release();
|
|
return (cl_program)0;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief creates a program object for a context, and loads the information
|
|
* related to the built-in kernels into a program object.
|
|
*
|
|
* \param context must be a valid OpenCL context.
|
|
*
|
|
* \param num_devices is the number of devices listed in device_list.
|
|
*
|
|
* \param device_list is a pointer to a list of devices that are in context.
|
|
* \a device_list must be a non-NULL value. The built-in kernels are loaded
|
|
* for devices specified in this list. The devices associated with the
|
|
* program object will be the list of devices specified by \a device_list.
|
|
* The list of devices specified by \a device_list must be devices associated
|
|
* with context.
|
|
*
|
|
* \param kernel_names is a semi-colon separated list of built-in kernel names.
|
|
*
|
|
* \return a valid non-zero program object and \a errcode_ret is set to
|
|
* CL_SUCCESS if the program object is created successfully. Otherwise, it
|
|
* returns a NULL value with one of the following error values returned
|
|
* in errcode_ret:
|
|
* - CL_INVALID_CONTEXT if context is not a valid context.
|
|
* - CL_INVALID_VALUE if device_list is NULL or num_devices is zero.
|
|
* - CL_INVALID_VALUE if kernel_names is NULL or kernel_names contains a kernel
|
|
* name that is not supported by any of the devices in \a device_list.
|
|
* - CL_INVALID_DEVICE if devices listed in device_list are not in the list
|
|
* of devices associated with context.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBuiltInKernels,
|
|
(cl_context context, cl_uint num_devices, const cl_device_id* device_list,
|
|
const char* kernel_names, cl_int* errcode_ret)) {
|
|
//!@todo Add implementation
|
|
amd::Program* program = NULL;
|
|
Unimplemented();
|
|
return as_cl(program);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_Unloading
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Allows the implementation to release the resources allocated by
|
|
* the OpenCL compiler for platform. This is a hint from the application
|
|
* and does not guarantee that the compiler will not be used in the future
|
|
* or that the compiler will actually be unloaded by the implementation.
|
|
* Calls to \a clBuildProgram, \a clCompileProgram or \a clLinkProgram after
|
|
* \a clUnloadPlatformCompiler will reload the compiler,
|
|
* if necessary, to build the appropriate program executable.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully.
|
|
* Otherwise, it returns one of the following errors:
|
|
* - CL_INVALID_PLATFORM if platform is not a valid platform.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clUnloadPlatformCompiler, (cl_platform_id platform)) {
|
|
if (platform != NULL && platform != AMD_PLATFORM) {
|
|
return CL_INVALID_PLATFORM;
|
|
}
|
|
|
|
//! @todo: Implement Compiler::unload()
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Allow to runtime to release the resources allocated by the OpenCL
|
|
* compiler.
|
|
*
|
|
* This is a hint from the application and does not guarantee that the compiler
|
|
* will not be used in the future or that the compiler will actually be
|
|
* unloaded by the implementation.
|
|
*
|
|
* Calls to clBuildProgram after clUnloadCompiler may reload the compiler,
|
|
* if necessary, to build the appropriate program executable.
|
|
*
|
|
* \return This call currently always returns CL_SUCCESS
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clUnloadCompiler, (void)) {
|
|
//! @todo: Implement Compiler::unload()
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_ProgramQueries
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Return information about the program object.
|
|
*
|
|
* \param program specifies the program object being queried.
|
|
*
|
|
* \param param_name specifies the information to query.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result
|
|
* being queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by \a param_value_size is < size of return type and
|
|
* \a param_value is not NULL
|
|
* - CL_INVALID_PROGRAM_EXECUTABLE if param_name is
|
|
* CL_PROGRAM_NUM_KERNELS or CL_PROGRAM_KERNEL_NAMES and a successful
|
|
* program executable has not been built for at least one device in the list
|
|
* of devices associated with program.
|
|
* - CL_INVALID_PROGRAM if \a program is a not a valid program object
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetProgramInfo,
|
|
(cl_program program, cl_program_info param_name, size_t param_value_size,
|
|
void* param_value, size_t* param_value_size_ret)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
|
|
switch (param_name) {
|
|
case CL_PROGRAM_REFERENCE_COUNT: {
|
|
cl_uint count = as_amd(program)->referenceCount();
|
|
return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_CONTEXT: {
|
|
cl_context context = const_cast<cl_context>(as_cl(&as_amd(program)->context()));
|
|
return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_NUM_DEVICES: {
|
|
cl_uint numDevices = (cl_uint)as_amd(program)->deviceList().size();
|
|
return amd::clGetInfo(numDevices, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_DEVICES: {
|
|
const amd::Program::devicelist_t& devices = as_amd(program)->deviceList();
|
|
const size_t numDevices = devices.size();
|
|
const size_t valueSize = numDevices * sizeof(cl_device_id);
|
|
|
|
if (param_value != NULL && param_value_size < valueSize) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
*not_null(param_value_size_ret) = valueSize;
|
|
if (param_value != NULL) {
|
|
cl_device_id* device_list = (cl_device_id*)param_value;
|
|
for (const auto& it : devices) {
|
|
*device_list++ = const_cast<cl_device_id>(as_cl(it));
|
|
}
|
|
if (param_value_size > valueSize) {
|
|
::memset(static_cast<address>(param_value) + valueSize, '\0',
|
|
param_value_size - valueSize);
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
case CL_PROGRAM_SOURCE: {
|
|
const char* source = as_amd(program)->sourceCode().c_str();
|
|
return amd::clGetInfo(source, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_BINARY_SIZES: {
|
|
amd::Program* amdProgram = as_amd(program);
|
|
const amd::Program::devicelist_t& devices = amdProgram->deviceList();
|
|
const size_t numBinaries = devices.size();
|
|
const size_t valueSize = numBinaries * sizeof(size_t);
|
|
|
|
if (param_value != NULL && param_value_size < valueSize) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
*not_null(param_value_size_ret) = valueSize;
|
|
if (param_value != NULL) {
|
|
size_t* binary_sizes = (size_t*)param_value;
|
|
for (const auto& it : devices) {
|
|
*binary_sizes++ = amdProgram->getDeviceProgram(*it)->binary().second;
|
|
}
|
|
if (param_value_size > valueSize) {
|
|
::memset(static_cast<address>(param_value) + valueSize, '\0',
|
|
param_value_size - valueSize);
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
case CL_PROGRAM_BINARIES: {
|
|
amd::Program* amdProgram = as_amd(program);
|
|
const amd::Program::devicelist_t& devices = amdProgram->deviceList();
|
|
const size_t numBinaries = devices.size();
|
|
const size_t valueSize = numBinaries * sizeof(char*);
|
|
|
|
if (param_value != NULL && param_value_size < valueSize) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
*not_null(param_value_size_ret) = valueSize;
|
|
if (param_value != NULL) {
|
|
char** binaries = (char**)param_value;
|
|
for (const auto& it : devices) {
|
|
const device::Program::binary_t& binary = amdProgram->getDeviceProgram(*it)->binary();
|
|
// If an entry value in the array is NULL,
|
|
// then runtime should skip copying the program binary
|
|
if (*binaries != NULL) {
|
|
::memcpy(*binaries, binary.first, binary.second);
|
|
}
|
|
binaries++;
|
|
}
|
|
if (param_value_size > valueSize) {
|
|
::memset(static_cast<address>(param_value) + valueSize, '\0',
|
|
param_value_size - valueSize);
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
case CL_PROGRAM_NUM_KERNELS: {
|
|
if (as_amd(program)->symbolsPtr() == NULL) {
|
|
return CL_INVALID_PROGRAM_EXECUTABLE;
|
|
}
|
|
size_t numKernels = as_amd(program)->symbols().size();
|
|
return amd::clGetInfo(numKernels, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_KERNEL_NAMES: {
|
|
const char* kernelNames = as_amd(program)->kernelNames().c_str();
|
|
return amd::clGetInfo(kernelNames, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Return build information for each device in the program object.
|
|
*
|
|
* \param program specifies the program object being queried.
|
|
*
|
|
* \param device specifies the device for which build information is being
|
|
* queried. device must be a valid device associated with \a program.
|
|
*
|
|
* \param param_name specifies the information to query.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result being
|
|
* queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully
|
|
* - CL_INVALID_DEVICE if \a device is not in the list of devices associated
|
|
* with \a program
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by \a param_value_size is < size of return type and
|
|
* \a param_value is not NULL
|
|
* - CL_INVALID_PROGRAM if \a program is a not a valid program object
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetProgramBuildInfo,
|
|
(cl_program program, cl_device_id device, cl_program_build_info param_name,
|
|
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
if (!is_valid(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
const device::Program* devProgram = as_amd(program)->getDeviceProgram(*as_amd(device));
|
|
if (devProgram == NULL) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
switch (param_name) {
|
|
case CL_PROGRAM_BUILD_STATUS: {
|
|
cl_build_status status = devProgram->buildStatus();
|
|
return amd::clGetInfo(status, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_BUILD_OPTIONS: {
|
|
const std::string optionsStr = devProgram->lastBuildOptionsArg();
|
|
const char* options = optionsStr.c_str();
|
|
return amd::clGetInfo(options, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_BUILD_LOG: {
|
|
const std::string logstr = as_amd(program)->programLog() + devProgram->buildLog().c_str();
|
|
const char* log = logstr.c_str();
|
|
return amd::clGetInfo(log, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_BINARY_TYPE: {
|
|
const device::Program::type_t devProgramType = devProgram->type();
|
|
cl_uint type;
|
|
switch (devProgramType) {
|
|
case device::Program::TYPE_NONE: {
|
|
type = CL_PROGRAM_BINARY_TYPE_NONE;
|
|
break;
|
|
}
|
|
case device::Program::TYPE_COMPILED: {
|
|
type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
|
|
break;
|
|
}
|
|
case device::Program::TYPE_LIBRARY: {
|
|
type = CL_PROGRAM_BINARY_TYPE_LIBRARY;
|
|
break;
|
|
}
|
|
case device::Program::TYPE_EXECUTABLE: {
|
|
type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE;
|
|
break;
|
|
}
|
|
case device::Program::TYPE_INTERMEDIATE: {
|
|
type = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE;
|
|
break;
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: {
|
|
size_t size = devProgram->globalVariableTotalSize();
|
|
return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Sets the values of a SPIR-V specialization constants.
|
|
*
|
|
* \param program must be a valid OpenCL program created from a SPIR-V module.
|
|
*
|
|
* \param spec id_ identifies the SPIR-V specialization constant whose value will be set.
|
|
*
|
|
* \param spec_size specifies the size in bytes of the data pointed to by spec_value. This should
|
|
* be 1 for boolean constants. For all other constant types this should match the size of the
|
|
* specialization constant in the SPIR-V module.
|
|
*
|
|
* \param spec_value is a pointer to the memory location that contains the value of the
|
|
* specialization constant. The data pointed to by \a spec_value are copied and can be safely
|
|
* reused by the application after \a clSetProgramSpecializationConstant returns. This
|
|
* specialization value will be used by subsequent calls to \a clBuildProgram until another call to
|
|
* \a clSetProgramSpecializationConstant changes it. If a specialization constant is a boolean
|
|
* constant, _spec value_should be a pointer to a cl_uchar value. A value of zero will set the
|
|
* specialization constant to false; any other value will set it to true.
|
|
*
|
|
* Calling this function multiple times for the same specialization constant shall cause the last
|
|
* provided value to override any previously specified value. The values are used by a subsequent
|
|
* \a clBuildProgram call for the program.
|
|
*
|
|
* Application is not required to provide values for every specialization constant contained in
|
|
* SPIR-V module. SPIR-V provides default values for all specialization constants.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_PROGRAM if program is not a valid program object created from a SPIR-V module.
|
|
* - CL_INVALID_SPEC_ID if spec_id is not a valid specialization constant ID
|
|
* - CL_INVALID_VALUE if spec_size does not match the size of the specialization constant in the
|
|
* SPIR-V module, or if spec_value is NULL.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
|
* implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL
|
|
* implementation on the host.
|
|
*
|
|
* \version 2.2-3
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clSetProgramSpecializationConstant,
|
|
(cl_program program, cl_uint spec_id, size_t spec_size, const void* spec_value)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief registers a user callback function with a program object. Each call to
|
|
* \a clSetProgramReleaseCallback registers the specified user callback function on a callback stack
|
|
* associated with program. The registered user callback functions are called in the reverse order
|
|
* in which they were registered. The user callback functions are called after destructors (if any)
|
|
* for program scope global variables (if any) are called and before the program is released.
|
|
* This provides a mechanism for the application (and libraries) to be notified when destructors
|
|
* are complete.
|
|
*
|
|
* \param program is a valid program object
|
|
*
|
|
* \param pfn_notify is the callback function that can be registered by the application. This
|
|
* callback function may be called asynchronously by the OpenCL implementation. It is the
|
|
* application's responsibility to ensure that the callback function is thread safe. The parameters
|
|
* to this callback function are:
|
|
* - \a prog is the program object whose destructors are being called. When the user callback is
|
|
* called by the implementation, this program object is not longer valid. \a prog is only provided
|
|
* for reference purposes.
|
|
* - \a user_data is a pointer to user supplied data. \a user_data will be passed as the
|
|
* \a user_data argument when pfn_notify is called. user data can be NULL.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully.
|
|
* - CL_INVALID_PROGRAM if program is not a valid program object.
|
|
* - CL_INVALID_VALUE if pfn_notify is NULL.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL
|
|
* implementation on the device.
|
|
*
|
|
* \version 2.2-3
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clSetProgramReleaseCallback,
|
|
(cl_program program, void (CL_CALLBACK *pfn_notify)(
|
|
cl_program program, void *user_data
|
|
), void *user_data)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* @}
|
|
*
|
|
* \addtogroup CL_Kernels
|
|
*
|
|
* A kernel is a function declared in a program. A kernel is identified by the
|
|
* __kernel qualifier applied to any function in a program. A kernel object
|
|
* encapsulates the specific __kernel function declared in a program and
|
|
* the argument values to be used when executing this __kernel function.
|
|
*
|
|
* @{
|
|
*
|
|
* \addtogroup CL_CreateKernel
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Create a kernel object.
|
|
*
|
|
* \param program is a program object with a successfully built executable.
|
|
*
|
|
* \param kernel_name is a function name in the program declared with the
|
|
* __kernel qualifier.
|
|
*
|
|
* \param errcode_ret will return an appropriate error code. If \a errcode_ret
|
|
* is NULL, no error code is returned.
|
|
*
|
|
* \return A valid non-zero kernel object and \a errcode_ret is set to
|
|
* CL_SUCCESS if the kernel object is created successfully. It returns a NULL
|
|
* value with one of the following error values returned in \a errcode_ret:
|
|
* - CL_INVALID_PROGRAM if \a program is not a valid program object
|
|
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable
|
|
* for \a program.
|
|
* - CL_INVALID_KERNEL_NAME if \a kernel_name is not found in \a program.
|
|
* - CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel
|
|
* function given by \a kernel_name such as the number of arguments, the
|
|
* argument types are not the same for all devices for which the program
|
|
* executable has been built.
|
|
* - CL_INVALID_VALUE if \a kernel_name is NULL.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_kernel, clCreateKernel,
|
|
(cl_program program, const char* kernel_name, cl_int* errcode_ret)) {
|
|
if (!is_valid(program)) {
|
|
*not_null(errcode_ret) = CL_INVALID_PROGRAM;
|
|
return (cl_kernel)0;
|
|
}
|
|
if (kernel_name == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_VALUE;
|
|
return (cl_kernel)0;
|
|
}
|
|
/* FIXME_lmoriche, FIXME_spec: What are we supposed to do here?
|
|
* if (!as_amd(program)->containsOneSuccesfullyBuiltProgram())
|
|
* {
|
|
* *NotNull(errcode) = CL_INVALID_PROGRAM_EXECUTABLE;
|
|
* return (cl_kernel) 0;
|
|
* }
|
|
*/
|
|
amd::Program* amd_program = as_amd(program);
|
|
const amd::Symbol* symbol = amd_program->findSymbol(kernel_name);
|
|
if (symbol == NULL) {
|
|
*not_null(errcode_ret) = CL_INVALID_KERNEL_NAME;
|
|
return (cl_kernel)0;
|
|
}
|
|
|
|
amd::Kernel* kernel = new amd::Kernel(*amd_program, *symbol, kernel_name);
|
|
if (kernel == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_kernel)0;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(kernel);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Create kernel objects for all kernel functions in program.
|
|
*
|
|
* Kernel objects may not be created for any __kernel functions in program
|
|
* that do not have the same function definition across all devices for which
|
|
* a program executable has been successfully built.
|
|
*
|
|
* \param program is a program object with a successfully built executable.
|
|
*
|
|
* \param num_kernels is the size of memory pointed to by \a kernels specified
|
|
* as the number of cl_kernel entries.
|
|
*
|
|
* \param kernels is the buffer where the kernel objects for kernels in
|
|
* \a program will be returned. If \a kernels is NULL, it is ignored.
|
|
* If \a kernels is not NULL, \a num_kernels must be greater than or equal
|
|
* to the number of kernels in program.
|
|
*
|
|
* \param num_kernels_ret is the number of kernels in program. If
|
|
* \a num_kernels_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the kernel objects were successfully allocated
|
|
* - CL_INVALID_PROGRAM if \a program is not a valid program object
|
|
* - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable
|
|
* for any device in \a program
|
|
* - CL_INVALID_VALUE if \a kernels is not NULL and \a num_kernels is less
|
|
* than the number of kernels in program
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the runtime.
|
|
*
|
|
* Kernel objects can only be created once you have a program object with a
|
|
* valid program source or binary loaded into the program object and the
|
|
* program executable has been successfully built for one or more devices
|
|
* associated with \a program. No changes to the program executable are
|
|
* allowed while there are kernel objects associated with a program object.
|
|
* This means that calls to clBuildProgram return CL_INVALID_OPERATION if there
|
|
* are kernel objects attached to a program object. The OpenCL context
|
|
* associated with program will be the context associated with kernel.
|
|
* Devices associated with a program object for which a valid program
|
|
* executable has been built can be used to execute kernels declared in the
|
|
* program object.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clCreateKernelsInProgram, (cl_program program, cl_uint num_kernels,
|
|
cl_kernel* kernels, cl_uint* num_kernels_ret)) {
|
|
if (!is_valid(program)) {
|
|
return CL_INVALID_PROGRAM;
|
|
}
|
|
|
|
cl_uint numKernels = (cl_uint)as_amd(program)->symbols().size();
|
|
|
|
if (kernels != NULL && num_kernels < numKernels) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
*not_null(num_kernels_ret) = numKernels;
|
|
if (kernels == NULL) {
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
const amd::Program::symbols_t& symbols = as_amd(program)->symbols();
|
|
cl_kernel* result = kernels;
|
|
|
|
for (const auto& it : symbols) {
|
|
amd::Kernel* kernel = new amd::Kernel(*as_amd(program), it.second, it.first);
|
|
if (kernel == NULL) {
|
|
while (--result >= kernels) {
|
|
as_amd(*result)->release();
|
|
}
|
|
return CL_OUT_OF_HOST_MEMORY;
|
|
}
|
|
*result++ = as_cl(kernel);
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Increment the kernel reference count.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully. It returns
|
|
* CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
|
|
*
|
|
* clCreateKernel or clCreateKernelsInProgram do an implicit retain.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clRetainKernel, (cl_kernel kernel)) {
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
as_amd(kernel)->retain();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Decrement the kernel reference count.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully. It returns
|
|
* CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
|
|
*
|
|
* The kernel object is deleted once the number of instances that are retained
|
|
* to \a kernel become zero and after all queued execution instances of
|
|
* \a kernel have finished.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clReleaseKernel, (cl_kernel kernel)) {
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
as_amd(kernel)->release();
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Makes a shallow copy of the kernel object, its arguments and any
|
|
* information passed to the kernel object using \a clSetKernelExecInfo. If
|
|
* the kernel object was ready to be enqueued before copying it, the clone of
|
|
* the kernel object is ready to enqueue.
|
|
*
|
|
* \param source_kernel is a valid cl_kernel object that will be copied.
|
|
* source_kernel will not be modified in any way by this function.
|
|
*
|
|
* \param errcode_ret will be assigned an appropriate error code. If
|
|
* errcode_ret is NULL, no error code is returned.
|
|
*
|
|
* \return a valid non-zero kernel object and errcode_ret is set to
|
|
* CL_SUCCESS if the kernel is successfully copied. Otherwise it returns a
|
|
* NULL value with one of the following error values returned in errcode_ret:
|
|
* - CL_INVALID_KERNEL if kernel is not a valid kernel object.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
|
|
* required by the OpenCL implementation on the host.
|
|
*
|
|
* \version 2.1r01
|
|
*/
|
|
RUNTIME_ENTRY_RET(cl_kernel, clCloneKernel,
|
|
(cl_kernel source_kernel, cl_int* errcode_ret)) {
|
|
if (!is_valid(source_kernel)) {
|
|
*not_null(errcode_ret) = CL_INVALID_KERNEL;
|
|
return (cl_kernel)0;
|
|
}
|
|
|
|
amd::Kernel* kernel = new amd::Kernel(*as_amd(source_kernel));
|
|
if (kernel == NULL) {
|
|
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
|
|
return (cl_kernel)0;
|
|
}
|
|
|
|
*not_null(errcode_ret) = CL_SUCCESS;
|
|
return as_cl(kernel);
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_SettingArgs
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Set the argument value for a specific argument of a kernel.
|
|
*
|
|
* \param kernel is a valid kernel object.
|
|
*
|
|
* \param arg_index is the argument index. Arguments to the kernel are referred
|
|
* by indices that go from 0 for the leftmost argument to n - 1, where n is the
|
|
* total number of arguments declared by a kernel.
|
|
*
|
|
* \param arg_value is a pointer to data that should be used as the argument
|
|
* value for argument specified by \a arg_index. The argument data pointed to
|
|
* by \a arg_value is copied and the \a arg_value pointer can therefore be
|
|
* reused by the application after clSetKernelArg returns. If the argument is
|
|
* a memory object (buffer or image), the \a arg_value entry will be a pointer
|
|
* to the appropriate buffer or image object. The memory object must be created
|
|
* with the context associated with the kernel object. If the argument is
|
|
* declared with the __local qualifier, the \a arg_value entry must be NULL.
|
|
* For all other kernel arguments, the \a arg_value entry must be a pointer to
|
|
* the actual data to be used as argument value. The memory object specified
|
|
* as argument value must be a buffer object if the argument is declared to be
|
|
* a pointer of a built-in or user defined type with the __global or __constant
|
|
* qualifier. If the argument is declared with the __constant qualifier, the
|
|
* size in bytes of the memory object cannot exceed
|
|
* CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE and the number of arguments declared
|
|
* with the __constant qualifier cannot exceed CL_DEVICE_MAX_CONSTANT_ARGS. The
|
|
* memory object specified as argument value must be a 2D image object if the
|
|
* argument is declared to be of type image2d_t. The memory object specified as
|
|
* argument value must be a 3D image object if argument is declared to be of
|
|
* type image3d_t. If the argument is of type sampler_t, the arg_value entry
|
|
* must be a pointer to the sampler object.
|
|
*
|
|
* \param arg_size specifies the size of the argument value. If the argument is
|
|
* a memory object, the size is the size of the buffer or image object type.
|
|
* For arguments declared with the __local qualifier, the size specified will
|
|
* be the size in bytes of the buffer that must be allocated for the __local
|
|
* argument. If the argument is of type sampler_t, the arg_size value must be
|
|
* equal to sizeof(cl_sampler). For all other arguments, the size will be the
|
|
* size of argument type.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function was executed successfully
|
|
* - CL_INVALID_KERNEL if \a kernel is not a valid kernel object.
|
|
* - CL_INVALID_ARG_INDEX if \a arg_index is not a valid argument index.
|
|
* - CL_INVALID_ARG_VALUE if \a arg_value specified is NULL for an argument
|
|
* that is not declared with the __local qualifier or vice-versa.
|
|
* - CL_INVALID_MEM_OBJECT for an argument declared to be a memory object but
|
|
* the specified \a arg_value is not a valid memory object.
|
|
* - CL_INVALID_SAMPLER for an argument declared to be of type sampler_t but
|
|
* the specified \a arg_value is not a valid sampler object.
|
|
* - CL_INVALID_ARG_SIZE if \a arg_size does not match the size of the data
|
|
* type for an argument that is not a memory object or if the argument is a
|
|
* memory object and \a arg_size != sizeof(cl_mem) or if \a arg_size is zero
|
|
* and the argument is declared with the __local qualifier or if the
|
|
* argument is a sampler and arg_size != sizeof(cl_sampler).
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clSetKernelArg,
|
|
(cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void* arg_value)) {
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
const amd::KernelSignature& signature = as_amd(kernel)->signature();
|
|
if (arg_index >= signature.numParameters()) {
|
|
return CL_INVALID_ARG_INDEX;
|
|
}
|
|
|
|
const amd::KernelParameterDescriptor& desc = signature.at(arg_index);
|
|
const bool is_local = (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL);
|
|
if (((arg_value == NULL) && !is_local && (desc.type_ != T_POINTER)) ||
|
|
((arg_value != NULL) && is_local)) {
|
|
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
|
|
return CL_INVALID_ARG_VALUE;
|
|
}
|
|
if (!is_local && (desc.type_ == T_POINTER) && (arg_value != NULL)) {
|
|
cl_mem memObj = *static_cast<const cl_mem*>(arg_value);
|
|
amd::RuntimeObject* pObject = as_amd(memObj);
|
|
if (NULL != memObj && amd::RuntimeObject::ObjectTypeMemory != pObject->objectType()) {
|
|
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
|
|
return CL_INVALID_MEM_OBJECT;
|
|
}
|
|
} else if ((desc.type_ == T_SAMPLER) && !is_valid(*static_cast<const cl_sampler*>(arg_value))) {
|
|
return CL_INVALID_SAMPLER;
|
|
} else if (desc.type_ == T_QUEUE) {
|
|
cl_command_queue queue = *static_cast<const cl_command_queue*>(arg_value);
|
|
if (!is_valid(queue)) {
|
|
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
|
|
return CL_INVALID_DEVICE_QUEUE;
|
|
}
|
|
if (NULL == as_amd(queue)->asDeviceQueue()) {
|
|
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
|
|
return CL_INVALID_DEVICE_QUEUE;
|
|
}
|
|
}
|
|
if ((!is_local && (arg_size != desc.size_)) || (is_local && (arg_size == 0))) {
|
|
if (LP64_ONLY(true ||) ((desc.type_ != T_POINTER) && (desc.type_ != T_SAMPLER)) ||
|
|
(arg_size != sizeof(void*))) {
|
|
as_amd(kernel)->parameters().reset(static_cast<size_t>(arg_index));
|
|
return CL_INVALID_ARG_SIZE;
|
|
}
|
|
}
|
|
|
|
as_amd(kernel)->parameters().set(static_cast<size_t>(arg_index), arg_size, arg_value);
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* \addtogroup CL_KernelQuery
|
|
* @{
|
|
*/
|
|
|
|
/*! \brief Return information about the kernel object.
|
|
*
|
|
* \param kernel specifies the kernel object being queried.
|
|
*
|
|
* \param param_name specifies the information to query.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result
|
|
* being queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by \a param_value_size is < size of return type and
|
|
* \a param_value is not NULL
|
|
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
|
|
*
|
|
* \version 1.0r33
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetKernelInfo,
|
|
(cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size,
|
|
void* param_value, size_t* param_value_size_ret)) {
|
|
// Check if we have a valid kernel
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
const amd::Kernel* amdKernel = as_amd(kernel);
|
|
|
|
// Get the corresponded parameters
|
|
switch (param_name) {
|
|
case CL_KERNEL_FUNCTION_NAME: {
|
|
const char* name = amdKernel->name().c_str();
|
|
// Return the kernel's name
|
|
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_NUM_ARGS: {
|
|
cl_uint numParam = static_cast<cl_uint>(amdKernel->signature().numParameters());
|
|
// Return the number of kernel's parameters
|
|
return amd::clGetInfo(numParam, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_REFERENCE_COUNT: {
|
|
cl_uint count = amdKernel->referenceCount();
|
|
// Return the reference counter
|
|
return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_CONTEXT: {
|
|
cl_context context = const_cast<cl_context>(as_cl(&amdKernel->program().context()));
|
|
// Return the context, associated with the program
|
|
return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_PROGRAM: {
|
|
cl_program program = const_cast<cl_program>(as_cl(&amdKernel->program()));
|
|
// Return the program, associated with the kernel
|
|
return amd::clGetInfo(program, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_ATTRIBUTES: {
|
|
const char* name = amdKernel->signature().attributes().c_str();
|
|
// Return the kernel attributes
|
|
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Returns information about the arguments of a kernel. Kernel
|
|
* argument information is only available if the program object associated
|
|
* with kernel is created with \a clCreateProgramWithSource and the program
|
|
* executable is built with the -cl-kernel-arg-info option specified in
|
|
* options argument to clBuildProgram or clCompileProgram.
|
|
*
|
|
* \param kernel specifies the kernel object being queried.
|
|
*
|
|
* \param param_name specifies the information to query.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result
|
|
* being queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by \a param_value_size is < size of return type and
|
|
* \a param_value is not NULL
|
|
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
|
|
*
|
|
* \version 1.2r07
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetKernelArgInfo,
|
|
(cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name,
|
|
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
|
|
// Check if we have a valid kernel
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
amd::Kernel* amdKernel = as_amd(kernel);
|
|
|
|
const amd::KernelSignature& signature = amdKernel->signature();
|
|
if (arg_indx >= signature.numParameters()) {
|
|
return CL_INVALID_ARG_INDEX;
|
|
}
|
|
|
|
const amd::KernelParameterDescriptor& desc = signature.at(arg_indx);
|
|
|
|
// Get the corresponded parameters
|
|
switch (param_name) {
|
|
case CL_KERNEL_ARG_ADDRESS_QUALIFIER: {
|
|
cl_kernel_arg_address_qualifier qualifier = desc.addressQualifier_;
|
|
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_ARG_ACCESS_QUALIFIER: {
|
|
cl_kernel_arg_access_qualifier qualifier = desc.accessQualifier_;
|
|
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_ARG_TYPE_NAME: {
|
|
const char* typeName = desc.typeName_.c_str();
|
|
// Return the argument's type name
|
|
return amd::clGetInfo(typeName, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_ARG_TYPE_QUALIFIER: {
|
|
cl_kernel_arg_type_qualifier qualifier = desc.typeQualifier_;
|
|
return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_ARG_NAME: {
|
|
const char* name = desc.name_.c_str();
|
|
// Return the argument's name
|
|
return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Return information about the kernel object that may be specific
|
|
* to a device.
|
|
*
|
|
* \param kernel specifies the kernel object being queried.
|
|
*
|
|
* \param device identifies a specific device in the list of devices associated
|
|
* with \a kernel. The list of devices is the list of devices in the OpenCL
|
|
* context that is associated with \a kernel. If the list of devices associated
|
|
* with kernel is a single device, \a device can be a NULL value.
|
|
*
|
|
* \param param_name specifies the information to query
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result being
|
|
* queried is returned. If \a param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by \a param_value. This size must be >= size of return type.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to \a param_value. If \a param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return One of the following values:
|
|
* - CL_SUCCESS if the function is executed successfully,
|
|
* - CL_INVALID_DEVICE if \a device is not in the list of devices associated
|
|
* with \a kernel or if \a device is NULL but there are more than one
|
|
* devices in the associated with \a kernel
|
|
* - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes
|
|
* specified by \a param_value_size is < size of return type and
|
|
* \a param_value is not NULL
|
|
* - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object.
|
|
*
|
|
* \version 1.2r15
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetKernelWorkGroupInfo,
|
|
(cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name,
|
|
size_t param_value_size, void* param_value, size_t* param_value_size_ret)) {
|
|
// Check if we have a valid device
|
|
if (!is_valid(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
// Check if we have a valid kernel
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
|
|
const amd::Device& amdDevice = *as_amd(device);
|
|
// Find the kernel, associated with the specified device
|
|
const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice);
|
|
|
|
// Make sure we found a valid kernel
|
|
if (devKernel == NULL) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
// Get the corresponded parameters
|
|
switch (param_name) {
|
|
case CL_KERNEL_WORK_GROUP_SIZE: {
|
|
// Return workgroup size
|
|
return amd::clGetInfo(devKernel->workGroupInfo()->size_, param_value_size, param_value,
|
|
param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: {
|
|
// Return the compile workgroup size
|
|
return amd::clGetInfo(devKernel->workGroupInfo()->compileSize_, param_value_size, param_value,
|
|
param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_LOCAL_MEM_SIZE: {
|
|
// Return the amount of used local memory
|
|
const size_t align = amdDevice.info().minDataTypeAlignSize_;
|
|
cl_ulong memSize = as_amd(kernel)->parameters().localMemSize(align) +
|
|
amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align);
|
|
return amd::clGetInfo(memSize, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: {
|
|
// Return the compile workgroup size
|
|
return amd::clGetInfo(devKernel->workGroupInfo()->preferredSizeMultiple_, param_value_size,
|
|
param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_PRIVATE_MEM_SIZE: {
|
|
// Return the compile workgroup size
|
|
return amd::clGetInfo(devKernel->workGroupInfo()->privateMemSize_, param_value_size,
|
|
param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_GLOBAL_WORK_SIZE: {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
case CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD: {
|
|
return amd::clGetInfo(amdDevice.info().maxSemaphoreSize_, param_value_size, param_value,
|
|
param_value_size_ret);
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! \brief Returns information about the kernel object.
|
|
*
|
|
* \param kernel specifies the kernel object being queried.
|
|
*
|
|
* \param device identifies a specific device in the list of devices associated
|
|
* with kernel. The list of devices is the list of devices in the OpenCL context
|
|
* that is associated with kernel. If the list of devices associated with kernel
|
|
* is a single device, device can be a NULL value.
|
|
*
|
|
* \param param_name specifies the information to query. The list of supported
|
|
* param_name types and the information returned in param_value by
|
|
* clGetKernelSubGroupInfo is described in the table below.
|
|
*
|
|
* \param input_value_size is used to specify the size in bytes of memory
|
|
* pointed to by input_value. This size must be == size of input type as
|
|
* described in the table below.
|
|
*
|
|
* \param input_value is a pointer to memory where the appropriate
|
|
* parameterization of the query is passed from. If input_value is NULL, it is
|
|
* ignored.
|
|
*
|
|
* \param param_value is a pointer to memory where the appropriate result being
|
|
* queried is returned. If param_value is NULL, it is ignored.
|
|
*
|
|
* \param param_value_size is used to specify the size in bytes of memory
|
|
* pointed to by param_value. This size must be >= size of return type as
|
|
* described in the table below.
|
|
*
|
|
* \param param_value_size_ret returns the actual size in bytes of data copied
|
|
* to param_value. If param_value_size_ret is NULL, it is ignored.
|
|
*
|
|
* \return CL_SUCCESS if the function is executed successfully.
|
|
* Otherwise, it returns one of the following errors:
|
|
*
|
|
* - CL_INVALID_DEVICE if device is not in the list of devices associated with
|
|
* kernel or if device is NULL but there is more than one device associated
|
|
* with kernel.
|
|
* - CL_INVALID_VALUE if param_name is not valid, or if size in bytes specified
|
|
* by param_value_size is < size of return type as described in the table
|
|
* above and param_value is not NULL.
|
|
* - CL_INVALID_VALUE if param_name is CL_KERNEL_SUB_GROUP_SIZE_FOR_NDRANGE and
|
|
* the size in bytes specified by input_value_size is not valid or if
|
|
* input_value is NULL.
|
|
* - CL_INVALID_KERNEL if kernel is a not a valid kernel object.
|
|
* - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by
|
|
* the OpenCL implementation on the device.
|
|
* - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required
|
|
* by the OpenCL implementation on the host.
|
|
*
|
|
* \version 2.0r12
|
|
*/
|
|
RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfo,
|
|
(cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name,
|
|
size_t input_value_size, const void* input_value, size_t param_value_size,
|
|
void* param_value, size_t* param_value_size_ret)) {
|
|
// Check if we have a valid device
|
|
if (!is_valid(device)) {
|
|
return CL_INVALID_DEVICE;
|
|
}
|
|
|
|
// Check if we have a valid kernel
|
|
if (!is_valid(kernel)) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
|
|
const amd::Device& amdDevice = *as_amd(device);
|
|
// Find the kernel, associated with the specified device
|
|
const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice);
|
|
|
|
// Make sure we found a valid kernel
|
|
if (devKernel == NULL) {
|
|
return CL_INVALID_KERNEL;
|
|
}
|
|
|
|
// Get the corresponded parameters
|
|
switch (param_name) {
|
|
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
|
|
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: {
|
|
// Infer the number of dimensions from 'input_value_size'
|
|
size_t dims = input_value_size / sizeof(size_t);
|
|
if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
// Get the linear workgroup size
|
|
size_t workGroupSize = ((size_t*)input_value)[0];
|
|
for (size_t i = 1; i < dims; ++i) {
|
|
workGroupSize *= ((size_t*)input_value)[i];
|
|
}
|
|
|
|
// Get the subgroup size. GPU devices sub-groups are wavefronts.
|
|
size_t subGroupSize = as_amd(device)->info().wavefrontWidth_;
|
|
|
|
size_t numSubGroups = (workGroupSize + subGroupSize - 1) / subGroupSize;
|
|
|
|
|
|
return amd::clGetInfo((param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR)
|
|
? subGroupSize
|
|
: numSubGroups,
|
|
param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: {
|
|
size_t numSubGroups = 0;
|
|
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_MAX_NUM_SUB_GROUPS: {
|
|
size_t waveSize = as_amd(device)->info().wavefrontWidth_;
|
|
size_t numSubGroups = (devKernel->workGroupInfo()->size_ + waveSize - 1) / waveSize;
|
|
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
|
|
}
|
|
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: {
|
|
if (input_value_size != sizeof(size_t)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
size_t numSubGroups = ((size_t*)input_value)[0];
|
|
|
|
// Infer the number of dimensions from 'param_value_size'
|
|
size_t dims = param_value_size / sizeof(size_t);
|
|
if (dims == 0 || dims > 3 || param_value_size != dims * sizeof(size_t)) {
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
*not_null(param_value_size_ret) = param_value_size;
|
|
|
|
size_t localSize;
|
|
localSize = numSubGroups * as_amd(device)->info().wavefrontWidth_;
|
|
if (localSize > devKernel->workGroupInfo()->size_) {
|
|
::memset(param_value, '\0', dims * sizeof(size_t));
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
switch (dims) {
|
|
case 3:
|
|
((size_t*)param_value)[2] = 1;
|
|
case 2:
|
|
((size_t*)param_value)[1] = 1;
|
|
case 1:
|
|
((size_t*)param_value)[0] = localSize;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
default:
|
|
return CL_INVALID_VALUE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
RUNTIME_EXIT
|
|
|
|
/*! @}
|
|
* @}
|
|
* @}
|
|
*/
|