// // Copyright (c) 2008 Advanced Micro Devices, Inc. All rights reserved. // #include "cl_common.hpp" #include "vdi_common.hpp" #include "platform/context.hpp" #include "platform/program.hpp" #include "platform/kernel.hpp" #include "platform/sampler.hpp" #include "cl_semaphore_amd.h" #include static amd::Program* createProgram(cl_context context, cl_uint num_devices, const cl_device_id* device_list, cl_int* errcode_ret) { // Create the program amd::Program* program = new amd::Program(*as_amd(context)); if (program == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return NULL; } // Add programs for all devices in the context. if (device_list == NULL) { const std::vector& devices = as_amd(context)->devices(); for (const auto& it : devices) { if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return NULL; } } return program; } *not_null(errcode_ret) = CL_SUCCESS; for (cl_uint i = 0; i < num_devices; ++i) { cl_device_id device = device_list[i]; if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) { *not_null(errcode_ret) = CL_INVALID_DEVICE; program->release(); return NULL; } cl_int status = program->addDeviceProgram(*as_amd(device)); if (status == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return NULL; } } return program; } /*! \addtogroup API * @{ * * \addtogroup CL_Programs * * An OpenCL program consists of a set of kernels that are identified as * functions declared with the __kernel qualifier in the program source. * OpenCL programs may also contain auxiliary functions and constant data that * can be used by __kernel functions. The program executable can be generated * online or offline by the OpenCL compiler for the appropriate * target device(s). * * @{ * * \addtogroup CL_CreatingPrograms * @{ */ /*! \brief Create a program object for a context, and loads the source code * specified by the text strings in the strings array into the program object. * * \param context must be a valid OpenCL context. * * \param count is the number of pointers in \a strings * * \param strings is an array of \a count pointers to optionally * null-terminated character strings that make up the source code. * * \param lengths is an array with the number of chars in each string (the * string length). If an element in lengths is zero, its accompanying string * is null-terminated. If lengths is NULL, all strings in the strings argument * are considered null-terminated. * * \param errcode_ret will return an appropriate error code. If \a errcode_ret * is NULL, no error code is returned. * * \return A valid non-zero program object and errcode_ret is set to * \a CL_SUCCESS if the program object is created successfully. It returns a * NULL value with one of the following error values returned in * \a errcode_ret: * - CL_INVALID_CONTEXT if \a context is not a valid context. * - CL_INVALID_VALUE if \a count is zero or if \a strings or any entry in * \a strings is NULL. * - CL_COMPILER_NOT_AVAILABLE if a compiler is not available. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the runtime. * * \version 1.0r33 */ RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithSource, (cl_context context, cl_uint count, const char** strings, const size_t* lengths, cl_int* errcode_ret)) { if (!is_valid(context)) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; } if (count == 0 || strings == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } std::string sourceCode; for (cl_uint i = 0; i < count; ++i) { if (strings[i] == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } if (lengths && lengths[i] != 0) { sourceCode.append(strings[i], lengths[i]); } else { sourceCode.append(strings[i]); } } if (sourceCode.empty()) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } // Create the program amd::Program* program = new amd::Program(*as_amd(context), sourceCode, amd::Program::OpenCL_C); if (program == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_program)0; } // Add programs for all devices in the context. const std::vector& devices = as_amd(context)->devices(); for (const auto& it : devices) { if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return (cl_program)0; } } *not_null(errcode_ret) = CL_SUCCESS; return as_cl(program); } RUNTIME_EXIT /*! \brief Create a program object for a context, and loads the IL into the * program object. * * \param context must be a valid OpenCL context. * * \param string is a pointer to IL. * * \param length is the size in bytes of IL. * * \param errcode_ret will return an appropriate error code. If \a errcode_ret * is NULL, no error code is returned. * * \return A valid non-zero program object and errcode_ret is set to * \a CL_SUCCESS if the program object is created successfully. It returns a * NULL value with one of the following error values returned in * \a errcode_ret: * - CL_INVALID_CONTEXT if \a context is not a valid context. * - CL_INVALID_VALUE if \a il is NULL or \a length is zero. * - CL_INVALID_VALUE if the \a length-byte memory pointed to by \a il does * not contain well-formed intermediate language input appropriate for the * deployment environment in which the OpenCL platform is running. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources * required by the OpenCL implementation on the host. * * \version 1.0r33 */ RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithIL, (cl_context context, const void* il, size_t length, cl_int* errcode_ret)) { if (!is_valid(context)) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; } if (length == 0 || il == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } // Create the program amd::Program* program = new amd::Program(*as_amd(context), amd::Program::SPIRV); if (program == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_program)0; } // Add programs for all devices in the context. const std::vector& devices = as_amd(context)->devices(); for (const auto& it : devices) { if (program->addDeviceProgram(*it, il, length) == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return (cl_program)0; } } *not_null(errcode_ret) = CL_SUCCESS; return as_cl(program); } RUNTIME_EXIT /*! \brief Create a program object for a context, and loads the binary images * into the program object. * * \param context must be a valid OpenCL context. * * \param device_list is a pointer to a list of devices that are in context. * \a device_list must be a non-NULL value. The binaries are loaded for devices * specified in this list. * * \param num_devices is the number of devices listed in \a device_list. * * \param device_list The devices associated with the program object. The * list of devices specified by \a device_list must be devices associated with * \a context. * * \param lengths is an array of the size in bytes of the program binaries to * be loaded for devices specified by \a device_list. * * \param binaries is an array of pointers to program binaries to be loaded * for devices specified by \a device_list. For each device given by * \a device_list[i], the pointer to the program binary for that device is * given by \a binaries[i] and the length of this corresponding binary is given * by \a lengths[i]. \a lengths[i] cannot be zero and \a binaries[i] cannot be * a NULL pointer. The program binaries specified by binaries contain the bits * that describe the program executable that will be run on the device(s) * associated with context. The program binary can consist of either or both: * - Device-specific executable(s) * - Implementation specific intermediate representation (IR) which will be * converted to the device-specific executable. * * \param binary_status returns whether the program binary for each device * specified in \a device_list was loaded successfully or not. It is an array * of \a num_devices entries and returns CL_SUCCESS in \a binary_status[i] if * binary was successfully loaded for device specified by \a device_list[i]; * otherwise returns CL_INVALID_VALUE if \a lengths[i] is zero or if * \a binaries[i] is a NULL value or CL_INVALID_BINARY in \a binary_status[i] * if program binary is not a valid binary for the specified device. * If \a binary_status is NULL, it is ignored. * * \param errcode_ret will return an appropriate error code. If \a errcode_ret * is NULL, no error code is returned. * * \return A valid non-zero program object and \a errcode_ret is set to * CL_SUCCESS if the program object is created successfully. It returns a NULL * value with one of the following error values returned in \a errcode_ret: * - CL_INVALID_CONTEXT if \a context is not a valid context. * - CL_INVALID_VALUE if \a device_list is NULL or \a num_devices is zero. * - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in * the list of devices associated with \a context * - CL_INVALID_VALUE if \a lengths or \a binaries are NULL or if any entry * in \a lengths[i] is zero or \a binaries[i] is NULL. * - CL_INVALID_BINARY if an invalid program binary was encountered for any * device. \a binary_status will return specific status for each device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the runtime. * * \version 1.0r33 */ RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBinary, (cl_context context, cl_uint num_devices, const cl_device_id* device_list, const size_t* lengths, const unsigned char** binaries, cl_int* binary_status, cl_int* errcode_ret)) { if (!is_valid(context)) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; } if (num_devices == 0 || device_list == NULL || binaries == NULL || lengths == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } amd::Program* program = new amd::Program(*as_amd(context)); if (program == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_program)0; } *not_null(errcode_ret) = CL_SUCCESS; for (cl_uint i = 0; i < num_devices; ++i) { cl_device_id device = device_list[i]; if (!is_valid(device) || !as_amd(context)->containsDevice(as_amd(device))) { *not_null(errcode_ret) = CL_INVALID_DEVICE; program->release(); return (cl_program)0; } if (binaries[i] == NULL || lengths[i] == 0) { if (binary_status != NULL) { binary_status[i] = CL_INVALID_VALUE; } *not_null(errcode_ret) = CL_INVALID_VALUE; continue; } cl_int status = program->addDeviceProgram(*as_amd(device), binaries[i], lengths[i]); *not_null(errcode_ret) = status; if (status == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return (cl_program)0; } if (binary_status != NULL) { binary_status[i] = status; } } return as_cl(program); } RUNTIME_EXIT RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithAssemblyAMD, (cl_context context, cl_uint count, const char** strings, const size_t* lengths, cl_int* errcode_ret)) { if (!is_valid(context)) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; } if (count == 0 || strings == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } std::string assembly; for (cl_uint i = 0; i < count; ++i) { if (strings[i] == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } if (lengths && lengths[i] != 0) { assembly.append(strings[i], lengths[i]); } else { assembly.append(strings[i]); } } if (assembly.empty()) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } // Create the program amd::Program* program = new amd::Program(*as_amd(context), assembly, amd::Program::Assembly); if (program == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_program)0; } // Add programs for all devices in the context. const std::vector& devices = as_amd(context)->devices(); for (const auto& it : devices) { if (program->addDeviceProgram(*it) == CL_OUT_OF_HOST_MEMORY) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; program->release(); return (cl_program)0; } } *not_null(errcode_ret) = CL_SUCCESS; return as_cl(program); } RUNTIME_EXIT /*! \brief Increment the program reference count. * * clCreateProgram does an implicit retain. * * \return CL_SUCCESS if the function is executed successfully. It returns * CL_INVALID_PROGRAM if \a program is not a valid program object. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clRetainProgram, (cl_program program)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } as_amd(program)->retain(); return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Decrement the program reference count. * * The program object is deleted after all kernel objects associated with * \a program have been deleted and the program reference count becomes zero. * * \return CL_SUCCESS if the function is executed successfully. It returns * CL_INVALID_PROGRAM if \a program is not a valid program object. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clReleaseProgram, (cl_program program)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } as_amd(program)->release(); return CL_SUCCESS; } RUNTIME_EXIT /*! @} * \addtogroup CL_Build * @{ */ /*! \brief Build (compile & link) a program executable from the program source * or binary for all the devices or a specific device(s) in the OpenCL context * associated with program. * * OpenCL allows program executables to be built using the sources or binaries. * * \param program is the program object. * * \param device_list is a pointer to a list of devices associated with * \a program. If \a device_list is a NULL value, the program executable is * built for all devices associated with \a program for which a source or * binary has been loaded. If \a device_list is a non-NULL value, the program * executable is built for devices specified in this list for which a source * or binary has been loaded. * * \param num_devices is the number of devices listed in \a device_list. * * \param options is a pointer to a string that describes the build options to * be used for building the program executable. * * \param pfn_notify is a function pointer to a notification routine. The * notification routine allows an application to register a callback function * which will be called when the program executable has been built * (successfully or unsuccessfully). If \a pfn_notify is not NULL, * clBuildProgram does not need to wait for the build to complete and can * return immediately. If \a pfn_notify is NULL, clBuildProgram does not * return until the build has completed. This callback function may be called * asynchronously by the OpenCL implementation. It is the application's * responsibility to ensure that the callback function is thread-safe. * * \param user_data will be passed as the argument when \a pfn_notify is * called. \a user_data can be NULL. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully * - CL_INVALID_PROGRAM if \a program is not a valid program object * - CL_INVALID_VALUE if \a device_list is NULL and \a num_devices is greater * than zero, or if \a device_list is not NULL and \a num_devices is zero, * - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in * the list of devices associated with \a program * - CL_INVALID_BINARY if \a program is created with clCreateWithProgramBinary * and devices listed in \a device_list do not have a valid program binary * loaded * - CL_INVALID_BUILD_OPTIONS if the build options specified by \a options are * invalid * - CL_INVALID_OPERATION if the build of a program executable for any of the * devices listed in \a device_list by a previous call to clBuildProgram for * \a program has not completed * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the runtime. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clBuildProgram, (cl_program program, cl_uint num_devices, const cl_device_id* device_list, const char* options, void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* user_data)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { return CL_INVALID_VALUE; } amd::Program* amdProgram = as_amd(program); if (device_list == NULL) { // build for all devices in the context. return amdProgram->build(amdProgram->context().devices(), options, pfn_notify, user_data); } std::vector devices(num_devices); for (cl_uint i = 0; i < num_devices; ++i) { amd::Device* device = as_amd(device_list[i]); if (!amdProgram->context().containsDevice(device)) { return CL_INVALID_DEVICE; } devices[i] = device; } return amdProgram->build(devices, options, pfn_notify, user_data); } RUNTIME_EXIT /*! \brief compiles a program's source for all the devices or a specific * device(s) in the OpenCL context associated with program. The pre-processor * runs before the program sources are compiled. * The compiled binary is built for all devices associated with program or * the list of devices specified. The compiled binary can be queried using * \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified * to \a clCreateProgramWithBinary to create a new program object. * * \param program is the program object that is the compilation target. * * \param device_list is a pointer to a list of devices associated with program. * If device_list is a NULL value, the compile is performed for all devices * associated with program. If device_list is a non-NULL value, the compile is * performed for devices specified in this list. * * \param num_devices is the number of devices listed in \a device_list. * * \param options is a pointer to a null-terminated string of characters that * describes the compilation options to be used for building the program * executable. The list of supported options is as described in section 5.6.4. * * \param num_input_headers specifies the number of programs that describe * headers in the array referenced by input_headers. * * \param input_headers is an array of program embedded headers created with * \a clCreateProgramWithSource. * * \param header_include_names is an array that has a one to one correspondence * with input_headers. * Each entry in \a header_include_names specifies the include name used by * source in program that comes from an embedded header. The corresponding entry * in input_headers identifies the program object which contains the header * source to be used. The embedded headers are first searched before the headers * in the list of directories specified by the -I compile option (as described in * section 5.6.4.1). If multiple entries in header_include_names refer to the same * header name, the first one encountered will be used. * * \param pfn_notify is a function pointer to a notification routine. The * notification routine is a callback function that an application can register * and which will be called when the program executable has been built * (successfully or unsuccessfully). If pfn_notify is not NULL, * \a clCompileProgram does not need to wait for the compiler to complete and can * return immediately. If \a pfn_notify is NULL, \a clCompileProgram does not * return until the compiler has completed. This callback function may be called * asynchronously by the OpenCL implementation. It is the application's * responsibility to ensure that the callback function is thread-safe. * * \param user_data will be passed as an argument when pfn_notify is called. * \a user_data can be NULL. * * \return CL_SUCCESS if the function is executed successfully. Otherwise, it * returns one of the following errors: * - CL_INVALID_PROGRAM if program is not a valid program object. * - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than * zero, or if \a device_list is not NULL and \a num_devices is zero. * - CL_INVALID_VALUE if num_input_headers is zero and \a header_include_names * or input_headers are not NULL or if num_input_headers is not zero and * \a header_include_names or input_headers are NULL. * - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL. * - CL_INVALID_DEVICE if OpenCL devices listed in device_list are not in the * list of devices associated with program * - CL_INVALID_COMPILER_OPTIONS if the compiler options specified by options * are invalid. * - CL_INVALID_OPERATION if the compilation or build of a program executable * for any of the devices listed in device_list by a previous call to * \a clCompileProgram or \a clBuildProgram for program has not completed. * - CL_COMPILER_NOT_AVAILABLE if a compiler is not available i.e. * - CL_DEVICE_COMPILER_AVAILABLE specified in table 4.3 is set to CL_FALSE. * - CL_COMPILE_PROGRAM_FAILURE if there is a failure to compile the program * source. This error will be returned if clCompileProgram does not return * until the compile has completed. * - CL_INVALID_OPERATION if there are kernel objects attached to program. * - CL_INVALID_OPERATION if program has no source i.e. it has not been created * with \a clCreateProgramWithSource. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the OpenCL implementation on the host. * * \version 1.2r07 */ RUNTIME_ENTRY(cl_int, clCompileProgram, (cl_program program, cl_uint num_devices, const cl_device_id* device_list, const char* options, cl_uint num_input_headers, const cl_program* input_headers, const char** header_include_names, void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* user_data)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { return CL_INVALID_VALUE; } if ((num_input_headers > 0 && (input_headers == NULL || header_include_names == NULL)) || (num_input_headers == 0 && (input_headers != NULL || header_include_names != NULL))) { return CL_INVALID_VALUE; } if (pfn_notify == NULL && user_data != NULL) { return CL_INVALID_VALUE; } amd::Program* amdProgram = as_amd(program); if (amdProgram->referenceCount() > 1) { return CL_INVALID_OPERATION; } std::vector headerPrograms(num_input_headers); for (cl_uint i = 0; i < num_input_headers; ++i) { if (!is_valid(input_headers[i])) { return CL_INVALID_OPERATION; } const amd::Program* headerProgram = as_amd(input_headers[i]); headerPrograms[i] = headerProgram; } if (device_list == NULL) { // compile for all devices in the context. return amdProgram->compile(amdProgram->context().devices(), num_input_headers, headerPrograms, header_include_names, options, pfn_notify, user_data); } std::vector devices(num_devices); for (cl_uint i = 0; i < num_devices; ++i) { amd::Device* device = as_amd(device_list[i]); if (!amdProgram->context().containsDevice(device)) { return CL_INVALID_DEVICE; } devices[i] = device; } return amdProgram->compile(devices, num_input_headers, headerPrograms, header_include_names, options, pfn_notify, user_data); } RUNTIME_EXIT /*! \brief links a set of compiled program objects and libraries for all * the devices or a specific device(s) in the OpenCL context and creates * an executable. clLinkProgram creates a new program object which contains * this executable. The executable binary can be queried using * \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified * to \a clCreateProgramWithBinary to create a new program object. * The devices associated with the returned program object will be the list * of devices specified by device_list or if device_list is NULL it will be * the list of devices associated with context. * * \param context must be a valid OpenCL context. * * \param device_list is a pointer to a list of devices that are in context. * If device_list is a NULL value, the link is performed for all devices * associated with context for which a compiled object is available. * If device_list is a non-NULL value, the compile is performed for devices * specified in this list for which a source has been loaded. * * \param num_devices is the number of devices listed in device_list. * * \param options is a pointer to a null-terminated string of characters * that describes the link options to be used for building the program * executable. The list of supported options is as described in section 5.6.5. * * \param num_input_programs specifies the number of programs in array * referenced by input_programs. * * \param input_programs is an array of program objects that are compiled * binaries or libraries that are to be linked to create the program executable. * For each device in device_list or if device_list is NULL the list of devices * associated with context, the following cases occur: * All programs specified by input_programs contain a compiled binary or * library for the device. In this case, a link is performed to generate * a program executable for this device. None of the programs contain * a compiled binary or library for that device. In this case, no link is * performed and there will be no program executable generated for this device. * All other cases will return a CL_INVALID_OPERATION error. * * \param pfn_notify is a function pointer to a notification routine. * The notification routine is a callback function that an application can * register and which will be called when the program executable has been built * (successfully or unsuccessfully). If \a pfn_notify is not NULL, * \a clLinkProgram does not need to wait for the linker to complete and can * return immediately. Once the linker has completed, the \a pfn_notify * callback function is called with a valid program object (if the link was * successful) or NULL (if the link encountered a failure). This callback * function may be called asynchronously by the OpenCL implementation. It is * the application's responsibility to ensure that the callback function is * thread-safe. If \a pfn_notify is NULL, \a clLinkProgram does not return * until the linker has completed. clLinkProgram returns a valid non-zero * program object (if the link was successful) or NULL (if the link * encountered a failure). * * \a user_data will be passed as an argument when \a pfn_notify is called. * user_data can be NULL. * * \return a valid non-zero program object and errcode_ret is set to CL_SUCCESS * if the link was successful in generating a program executable for at least * one device and the program object was created successfully. If \a pfn_notify * is not NULL, \a clLinkProgram returns a NULL program object and * \a errcode_ret is set to CL_SUCCESS if the function was executed * successfully. Otherwise, it returns one of the following errors: * - CL_INVALID_CONTEXT if context is not a valid context. * - CL_INVALID_VALUE if device_list is NULL and num_devices is greater than * zero, or if \a device_list is not NULL and \a num_devices is zero. * - CL_INVALID_VALUE if \a num_input_programs is zero and \a input_programs * is NULL or if \a num_input_programs is zero and \a input_programs is not * NULL or if \a num_input_programs is not zero and \a input_programs is NULL. * - CL_INVALID_PROGRAM if programs specified in \a input_programs are not * valid program objects. * - CL_INVALID_VALUE if \a pfn_notify is NULL but \a user_data is not NULL. * - CL_INVALID_DEVICE if OpenCL devices listed in \a device_list are not in * the list of devices associated with context * - CL_INVALID_LINKER_OPTIONS if the linker options specified by options are * invalid. * - CL_INVALID_OPERATION if the compilation or build of a program executable * for any of the devices listed in \a device_list by a previous call to * clCompileProgram or clBuildProgram for program has not completed. * - CL_INVALID_OPERATION if the rules for devices containing compiled binaries * or libraries as described in \a input_programs argument above are * not followed. * - CL_LINKER_NOT_AVAILABLE if a linker is not available i.e. * - CL_DEVICE_LINKER_AVAILABLE specified in table 4.3 is set to CL_FALSE. * - CL_LINK_PROGRAM_FAILURE if there is a failure to link the compiled * binaries and/or libraries. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the OpenCL implementation on the host. * * \version 1.2r07 */ RUNTIME_ENTRY_RET(cl_program, clLinkProgram, (cl_context context, cl_uint num_devices, const cl_device_id* device_list, const char* options, cl_uint num_input_programs, const cl_program* input_programs, void(CL_CALLBACK* pfn_notify)(cl_program program, void* user_data), void* user_data, cl_int* errcode_ret)) { if (!is_valid(context)) { *not_null(errcode_ret) = CL_INVALID_CONTEXT; return (cl_program)0; } if ((num_devices > 0 && device_list == NULL) || (num_devices == 0 && device_list != NULL)) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } if (num_input_programs == 0 || input_programs == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } if (pfn_notify == NULL && user_data != NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_program)0; } std::vector inputPrograms(num_input_programs); for (cl_uint i = 0; i < num_input_programs; ++i) { if (!is_valid(input_programs[i])) { *not_null(errcode_ret) = CL_INVALID_PROGRAM; return (cl_program)0; } amd::Program* inputProgram = as_amd(input_programs[i]); inputPrograms[i] = inputProgram; } amd::Program* program = createProgram(context, num_devices, device_list, errcode_ret); if (program == NULL) return (cl_program)0; *not_null(errcode_ret) = CL_SUCCESS; cl_int status; if (device_list == NULL) { // compile for all devices in the context. status = program->link(as_amd(context)->devices(), num_input_programs, inputPrograms, options, pfn_notify, user_data); } else { std::vector devices(num_devices); for (cl_uint i = 0; i < num_devices; ++i) { amd::Device* device = as_amd(device_list[i]); if (!as_amd(context)->containsDevice(device)) { program->release(); *not_null(errcode_ret) = CL_INVALID_DEVICE; return (cl_program)0; } devices[i] = device; } status = program->link(devices, num_input_programs, inputPrograms, options, pfn_notify, user_data); } *not_null(errcode_ret) = status; if (status == CL_SUCCESS) { return as_cl(program); } program->release(); return (cl_program)0; } RUNTIME_EXIT /*! \brief creates a program object for a context, and loads the information * related to the built-in kernels into a program object. * * \param context must be a valid OpenCL context. * * \param num_devices is the number of devices listed in device_list. * * \param device_list is a pointer to a list of devices that are in context. * \a device_list must be a non-NULL value. The built-in kernels are loaded * for devices specified in this list. The devices associated with the * program object will be the list of devices specified by \a device_list. * The list of devices specified by \a device_list must be devices associated * with context. * * \param kernel_names is a semi-colon separated list of built-in kernel names. * * \return a valid non-zero program object and \a errcode_ret is set to * CL_SUCCESS if the program object is created successfully. Otherwise, it * returns a NULL value with one of the following error values returned * in errcode_ret: * - CL_INVALID_CONTEXT if context is not a valid context. * - CL_INVALID_VALUE if device_list is NULL or num_devices is zero. * - CL_INVALID_VALUE if kernel_names is NULL or kernel_names contains a kernel * name that is not supported by any of the devices in \a device_list. * - CL_INVALID_DEVICE if devices listed in device_list are not in the list * of devices associated with context. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the OpenCL implementation on the host. * * \version 1.2r07 */ RUNTIME_ENTRY_RET(cl_program, clCreateProgramWithBuiltInKernels, (cl_context context, cl_uint num_devices, const cl_device_id* device_list, const char* kernel_names, cl_int* errcode_ret)) { //!@todo Add implementation amd::Program* program = NULL; Unimplemented(); return as_cl(program); } RUNTIME_EXIT /*! @} * \addtogroup CL_Unloading * @{ */ /*! \brief Allows the implementation to release the resources allocated by * the OpenCL compiler for platform. This is a hint from the application * and does not guarantee that the compiler will not be used in the future * or that the compiler will actually be unloaded by the implementation. * Calls to \a clBuildProgram, \a clCompileProgram or \a clLinkProgram after * \a clUnloadPlatformCompiler will reload the compiler, * if necessary, to build the appropriate program executable. * * \return CL_SUCCESS if the function is executed successfully. * Otherwise, it returns one of the following errors: * - CL_INVALID_PLATFORM if platform is not a valid platform. * * \version 1.2r07 */ RUNTIME_ENTRY(cl_int, clUnloadPlatformCompiler, (cl_platform_id platform)) { if (platform != NULL && platform != AMD_PLATFORM) { return CL_INVALID_PLATFORM; } //! @todo: Implement Compiler::unload() return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Allow to runtime to release the resources allocated by the OpenCL * compiler. * * This is a hint from the application and does not guarantee that the compiler * will not be used in the future or that the compiler will actually be * unloaded by the implementation. * * Calls to clBuildProgram after clUnloadCompiler may reload the compiler, * if necessary, to build the appropriate program executable. * * \return This call currently always returns CL_SUCCESS * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clUnloadCompiler, (void)) { //! @todo: Implement Compiler::unload() return CL_SUCCESS; } RUNTIME_EXIT /*! @} * \addtogroup CL_ProgramQueries * @{ */ /*! \brief Return information about the program object. * * \param program specifies the program object being queried. * * \param param_name specifies the information to query. * * \param param_value is a pointer to memory where the appropriate result * being queried is returned. If \a param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by \a param_value. This size must be >= size of return type. * * \param param_value_size_ret returns the actual size in bytes of data copied * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL * - CL_INVALID_PROGRAM_EXECUTABLE if param_name is * CL_PROGRAM_NUM_KERNELS or CL_PROGRAM_KERNEL_NAMES and a successful * program executable has not been built for at least one device in the list * of devices associated with program. * - CL_INVALID_PROGRAM if \a program is a not a valid program object * * \version 1.2r07 */ RUNTIME_ENTRY(cl_int, clGetProgramInfo, (cl_program program, cl_program_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } switch (param_name) { case CL_PROGRAM_REFERENCE_COUNT: { cl_uint count = as_amd(program)->referenceCount(); return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_CONTEXT: { cl_context context = const_cast(as_cl(&as_amd(program)->context())); return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_NUM_DEVICES: { cl_uint numDevices = (cl_uint)as_amd(program)->deviceList().size(); return amd::clGetInfo(numDevices, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_DEVICES: { const amd::Program::devicelist_t& devices = as_amd(program)->deviceList(); const size_t numDevices = devices.size(); const size_t valueSize = numDevices * sizeof(cl_device_id); if (param_value != NULL && param_value_size < valueSize) { return CL_INVALID_VALUE; } *not_null(param_value_size_ret) = valueSize; if (param_value != NULL) { cl_device_id* device_list = (cl_device_id*)param_value; for (const auto& it : devices) { *device_list++ = const_cast(as_cl(it)); } if (param_value_size > valueSize) { ::memset(static_cast
(param_value) + valueSize, '\0', param_value_size - valueSize); } } return CL_SUCCESS; } case CL_PROGRAM_SOURCE: { const char* source = as_amd(program)->sourceCode().c_str(); return amd::clGetInfo(source, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BINARY_SIZES: { amd::Program* amdProgram = as_amd(program); const amd::Program::devicelist_t& devices = amdProgram->deviceList(); const size_t numBinaries = devices.size(); const size_t valueSize = numBinaries * sizeof(size_t); if (param_value != NULL && param_value_size < valueSize) { return CL_INVALID_VALUE; } *not_null(param_value_size_ret) = valueSize; if (param_value != NULL) { size_t* binary_sizes = (size_t*)param_value; for (const auto& it : devices) { *binary_sizes++ = amdProgram->getDeviceProgram(*it)->binary().second; } if (param_value_size > valueSize) { ::memset(static_cast
(param_value) + valueSize, '\0', param_value_size - valueSize); } } return CL_SUCCESS; } case CL_PROGRAM_BINARIES: { amd::Program* amdProgram = as_amd(program); const amd::Program::devicelist_t& devices = amdProgram->deviceList(); const size_t numBinaries = devices.size(); const size_t valueSize = numBinaries * sizeof(char*); if (param_value != NULL && param_value_size < valueSize) { return CL_INVALID_VALUE; } *not_null(param_value_size_ret) = valueSize; if (param_value != NULL) { char** binaries = (char**)param_value; for (const auto& it : devices) { const device::Program::binary_t& binary = amdProgram->getDeviceProgram(*it)->binary(); // If an entry value in the array is NULL, // then runtime should skip copying the program binary if (*binaries != NULL) { ::memcpy(*binaries, binary.first, binary.second); } binaries++; } if (param_value_size > valueSize) { ::memset(static_cast
(param_value) + valueSize, '\0', param_value_size - valueSize); } } return CL_SUCCESS; } case CL_PROGRAM_NUM_KERNELS: { if (as_amd(program)->symbolsPtr() == NULL) { return CL_INVALID_PROGRAM_EXECUTABLE; } size_t numKernels = as_amd(program)->symbols().size(); return amd::clGetInfo(numKernels, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_KERNEL_NAMES: { const char* kernelNames = as_amd(program)->kernelNames().c_str(); return amd::clGetInfo(kernelNames, param_value_size, param_value, param_value_size_ret); } default: break; } return CL_INVALID_VALUE; } RUNTIME_EXIT /*! \brief Return build information for each device in the program object. * * \param program specifies the program object being queried. * * \param device specifies the device for which build information is being * queried. device must be a valid device associated with \a program. * * \param param_name specifies the information to query. * * \param param_value is a pointer to memory where the appropriate result being * queried is returned. If \a param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by \a param_value. This size must be >= size of return type * * \param param_value_size_ret returns the actual size in bytes of data copied * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully * - CL_INVALID_DEVICE if \a device is not in the list of devices associated * with \a program * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL * - CL_INVALID_PROGRAM if \a program is a not a valid program object * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clGetProgramBuildInfo, (cl_program program, cl_device_id device, cl_program_build_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } if (!is_valid(device)) { return CL_INVALID_DEVICE; } const device::Program* devProgram = as_amd(program)->getDeviceProgram(*as_amd(device)); if (devProgram == NULL) { return CL_INVALID_DEVICE; } switch (param_name) { case CL_PROGRAM_BUILD_STATUS: { cl_build_status status = devProgram->buildStatus(); return amd::clGetInfo(status, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_OPTIONS: { const std::string optionsStr = devProgram->lastBuildOptionsArg(); const char* options = optionsStr.c_str(); return amd::clGetInfo(options, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_LOG: { const std::string logstr = as_amd(program)->programLog() + devProgram->buildLog().c_str(); const char* log = logstr.c_str(); return amd::clGetInfo(log, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BINARY_TYPE: { const device::Program::type_t devProgramType = devProgram->type(); cl_uint type; switch (devProgramType) { case device::Program::TYPE_NONE: { type = CL_PROGRAM_BINARY_TYPE_NONE; break; } case device::Program::TYPE_COMPILED: { type = CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT; break; } case device::Program::TYPE_LIBRARY: { type = CL_PROGRAM_BINARY_TYPE_LIBRARY; break; } case device::Program::TYPE_EXECUTABLE: { type = CL_PROGRAM_BINARY_TYPE_EXECUTABLE; break; } case device::Program::TYPE_INTERMEDIATE: { type = CL_PROGRAM_BINARY_TYPE_INTERMEDIATE; break; } default: return CL_INVALID_VALUE; } return amd::clGetInfo(type, param_value_size, param_value, param_value_size_ret); } case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: { size_t size = devProgram->globalVariableTotalSize(); return amd::clGetInfo(size, param_value_size, param_value, param_value_size_ret); } default: break; } return CL_INVALID_VALUE; } RUNTIME_EXIT /*! \brief Sets the values of a SPIR-V specialization constants. * * \param program must be a valid OpenCL program created from a SPIR-V module. * * \param spec id_ identifies the SPIR-V specialization constant whose value will be set. * * \param spec_size specifies the size in bytes of the data pointed to by spec_value. This should * be 1 for boolean constants. For all other constant types this should match the size of the * specialization constant in the SPIR-V module. * * \param spec_value is a pointer to the memory location that contains the value of the * specialization constant. The data pointed to by \a spec_value are copied and can be safely * reused by the application after \a clSetProgramSpecializationConstant returns. This * specialization value will be used by subsequent calls to \a clBuildProgram until another call to * \a clSetProgramSpecializationConstant changes it. If a specialization constant is a boolean * constant, _spec value_should be a pointer to a cl_uchar value. A value of zero will set the * specialization constant to false; any other value will set it to true. * * Calling this function multiple times for the same specialization constant shall cause the last * provided value to override any previously specified value. The values are used by a subsequent * \a clBuildProgram call for the program. * * Application is not required to provide values for every specialization constant contained in * SPIR-V module. SPIR-V provides default values for all specialization constants. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_PROGRAM if program is not a valid program object created from a SPIR-V module. * - CL_INVALID_SPEC_ID if spec_id is not a valid specialization constant ID * - CL_INVALID_VALUE if spec_size does not match the size of the specialization constant in the * SPIR-V module, or if spec_value is NULL. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL * implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL * implementation on the host. * * \version 2.2-3 */ RUNTIME_ENTRY(cl_int, clSetProgramSpecializationConstant, (cl_program program, cl_uint spec_id, size_t spec_size, const void* spec_value)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } return CL_INVALID_VALUE; } RUNTIME_EXIT /*! \brief registers a user callback function with a program object. Each call to * \a clSetProgramReleaseCallback registers the specified user callback function on a callback stack * associated with program. The registered user callback functions are called in the reverse order * in which they were registered. The user callback functions are called after destructors (if any) * for program scope global variables (if any) are called and before the program is released. * This provides a mechanism for the application (and libraries) to be notified when destructors * are complete. * * \param program is a valid program object * * \param pfn_notify is the callback function that can be registered by the application. This * callback function may be called asynchronously by the OpenCL implementation. It is the * application's responsibility to ensure that the callback function is thread safe. The parameters * to this callback function are: * - \a prog is the program object whose destructors are being called. When the user callback is * called by the implementation, this program object is not longer valid. \a prog is only provided * for reference purposes. * - \a user_data is a pointer to user supplied data. \a user_data will be passed as the * \a user_data argument when pfn_notify is called. user data can be NULL. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully. * - CL_INVALID_PROGRAM if program is not a valid program object. * - CL_INVALID_VALUE if pfn_notify is NULL. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by the OpenCL * implementation on the device. * * \version 2.2-3 */ RUNTIME_ENTRY(cl_int, clSetProgramReleaseCallback, (cl_program program, void (CL_CALLBACK *pfn_notify)( cl_program program, void *user_data ), void *user_data)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } return CL_INVALID_VALUE; } RUNTIME_EXIT /*! @} * @} * * \addtogroup CL_Kernels * * A kernel is a function declared in a program. A kernel is identified by the * __kernel qualifier applied to any function in a program. A kernel object * encapsulates the specific __kernel function declared in a program and * the argument values to be used when executing this __kernel function. * * @{ * * \addtogroup CL_CreateKernel * @{ */ /*! \brief Create a kernel object. * * \param program is a program object with a successfully built executable. * * \param kernel_name is a function name in the program declared with the * __kernel qualifier. * * \param errcode_ret will return an appropriate error code. If \a errcode_ret * is NULL, no error code is returned. * * \return A valid non-zero kernel object and \a errcode_ret is set to * CL_SUCCESS if the kernel object is created successfully. It returns a NULL * value with one of the following error values returned in \a errcode_ret: * - CL_INVALID_PROGRAM if \a program is not a valid program object * - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable * for \a program. * - CL_INVALID_KERNEL_NAME if \a kernel_name is not found in \a program. * - CL_INVALID_KERNEL_DEFINITION if the function definition for __kernel * function given by \a kernel_name such as the number of arguments, the * argument types are not the same for all devices for which the program * executable has been built. * - CL_INVALID_VALUE if \a kernel_name is NULL. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the runtime. * * \version 1.0r33 */ RUNTIME_ENTRY_RET(cl_kernel, clCreateKernel, (cl_program program, const char* kernel_name, cl_int* errcode_ret)) { if (!is_valid(program)) { *not_null(errcode_ret) = CL_INVALID_PROGRAM; return (cl_kernel)0; } if (kernel_name == NULL) { *not_null(errcode_ret) = CL_INVALID_VALUE; return (cl_kernel)0; } /* FIXME_lmoriche, FIXME_spec: What are we supposed to do here? * if (!as_amd(program)->containsOneSuccesfullyBuiltProgram()) * { * *NotNull(errcode) = CL_INVALID_PROGRAM_EXECUTABLE; * return (cl_kernel) 0; * } */ amd::Program* amd_program = as_amd(program); const amd::Symbol* symbol = amd_program->findSymbol(kernel_name); if (symbol == NULL) { *not_null(errcode_ret) = CL_INVALID_KERNEL_NAME; return (cl_kernel)0; } amd::Kernel* kernel = new amd::Kernel(*amd_program, *symbol, kernel_name); if (kernel == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_kernel)0; } *not_null(errcode_ret) = CL_SUCCESS; return as_cl(kernel); } RUNTIME_EXIT /*! \brief Create kernel objects for all kernel functions in program. * * Kernel objects may not be created for any __kernel functions in program * that do not have the same function definition across all devices for which * a program executable has been successfully built. * * \param program is a program object with a successfully built executable. * * \param num_kernels is the size of memory pointed to by \a kernels specified * as the number of cl_kernel entries. * * \param kernels is the buffer where the kernel objects for kernels in * \a program will be returned. If \a kernels is NULL, it is ignored. * If \a kernels is not NULL, \a num_kernels must be greater than or equal * to the number of kernels in program. * * \param num_kernels_ret is the number of kernels in program. If * \a num_kernels_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the kernel objects were successfully allocated * - CL_INVALID_PROGRAM if \a program is not a valid program object * - CL_INVALID_PROGRAM_EXECUTABLE if there is no successfully built executable * for any device in \a program * - CL_INVALID_VALUE if \a kernels is not NULL and \a num_kernels is less * than the number of kernels in program * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the runtime. * * Kernel objects can only be created once you have a program object with a * valid program source or binary loaded into the program object and the * program executable has been successfully built for one or more devices * associated with \a program. No changes to the program executable are * allowed while there are kernel objects associated with a program object. * This means that calls to clBuildProgram return CL_INVALID_OPERATION if there * are kernel objects attached to a program object. The OpenCL context * associated with program will be the context associated with kernel. * Devices associated with a program object for which a valid program * executable has been built can be used to execute kernels declared in the * program object. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clCreateKernelsInProgram, (cl_program program, cl_uint num_kernels, cl_kernel* kernels, cl_uint* num_kernels_ret)) { if (!is_valid(program)) { return CL_INVALID_PROGRAM; } cl_uint numKernels = (cl_uint)as_amd(program)->symbols().size(); if (kernels != NULL && num_kernels < numKernels) { return CL_INVALID_VALUE; } *not_null(num_kernels_ret) = numKernels; if (kernels == NULL) { return CL_SUCCESS; } const amd::Program::symbols_t& symbols = as_amd(program)->symbols(); cl_kernel* result = kernels; for (const auto& it : symbols) { amd::Kernel* kernel = new amd::Kernel(*as_amd(program), it.second, it.first); if (kernel == NULL) { while (--result >= kernels) { as_amd(*result)->release(); } return CL_OUT_OF_HOST_MEMORY; } *result++ = as_cl(kernel); } return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Increment the kernel reference count. * * \return CL_SUCCESS if the function is executed successfully. It returns * CL_INVALID_KERNEL if \a kernel is not a valid kernel object. * * clCreateKernel or clCreateKernelsInProgram do an implicit retain. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clRetainKernel, (cl_kernel kernel)) { if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } as_amd(kernel)->retain(); return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Decrement the kernel reference count. * * \return CL_SUCCESS if the function is executed successfully. It returns * CL_INVALID_KERNEL if \a kernel is not a valid kernel object. * * The kernel object is deleted once the number of instances that are retained * to \a kernel become zero and after all queued execution instances of * \a kernel have finished. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clReleaseKernel, (cl_kernel kernel)) { if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } as_amd(kernel)->release(); return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Makes a shallow copy of the kernel object, its arguments and any * information passed to the kernel object using \a clSetKernelExecInfo. If * the kernel object was ready to be enqueued before copying it, the clone of * the kernel object is ready to enqueue. * * \param source_kernel is a valid cl_kernel object that will be copied. * source_kernel will not be modified in any way by this function. * * \param errcode_ret will be assigned an appropriate error code. If * errcode_ret is NULL, no error code is returned. * * \return a valid non-zero kernel object and errcode_ret is set to * CL_SUCCESS if the kernel is successfully copied. Otherwise it returns a * NULL value with one of the following error values returned in errcode_ret: * - CL_INVALID_KERNEL if kernel is not a valid kernel object. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required * by the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources * required by the OpenCL implementation on the host. * * \version 2.1r01 */ RUNTIME_ENTRY_RET(cl_kernel, clCloneKernel, (cl_kernel source_kernel, cl_int* errcode_ret)) { if (!is_valid(source_kernel)) { *not_null(errcode_ret) = CL_INVALID_KERNEL; return (cl_kernel)0; } amd::Kernel* kernel = new amd::Kernel(*as_amd(source_kernel)); if (kernel == NULL) { *not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY; return (cl_kernel)0; } *not_null(errcode_ret) = CL_SUCCESS; return as_cl(kernel); } RUNTIME_EXIT /*! @} * \addtogroup CL_SettingArgs * @{ */ /*! \brief Set the argument value for a specific argument of a kernel. * * \param kernel is a valid kernel object. * * \param arg_index is the argument index. Arguments to the kernel are referred * by indices that go from 0 for the leftmost argument to n - 1, where n is the * total number of arguments declared by a kernel. * * \param arg_value is a pointer to data that should be used as the argument * value for argument specified by \a arg_index. The argument data pointed to * by \a arg_value is copied and the \a arg_value pointer can therefore be * reused by the application after clSetKernelArg returns. If the argument is * a memory object (buffer or image), the \a arg_value entry will be a pointer * to the appropriate buffer or image object. The memory object must be created * with the context associated with the kernel object. If the argument is * declared with the __local qualifier, the \a arg_value entry must be NULL. * For all other kernel arguments, the \a arg_value entry must be a pointer to * the actual data to be used as argument value. The memory object specified * as argument value must be a buffer object if the argument is declared to be * a pointer of a built-in or user defined type with the __global or __constant * qualifier. If the argument is declared with the __constant qualifier, the * size in bytes of the memory object cannot exceed * CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE and the number of arguments declared * with the __constant qualifier cannot exceed CL_DEVICE_MAX_CONSTANT_ARGS. The * memory object specified as argument value must be a 2D image object if the * argument is declared to be of type image2d_t. The memory object specified as * argument value must be a 3D image object if argument is declared to be of * type image3d_t. If the argument is of type sampler_t, the arg_value entry * must be a pointer to the sampler object. * * \param arg_size specifies the size of the argument value. If the argument is * a memory object, the size is the size of the buffer or image object type. * For arguments declared with the __local qualifier, the size specified will * be the size in bytes of the buffer that must be allocated for the __local * argument. If the argument is of type sampler_t, the arg_size value must be * equal to sizeof(cl_sampler). For all other arguments, the size will be the * size of argument type. * * \return One of the following values: * - CL_SUCCESS if the function was executed successfully * - CL_INVALID_KERNEL if \a kernel is not a valid kernel object. * - CL_INVALID_ARG_INDEX if \a arg_index is not a valid argument index. * - CL_INVALID_ARG_VALUE if \a arg_value specified is NULL for an argument * that is not declared with the __local qualifier or vice-versa. * - CL_INVALID_MEM_OBJECT for an argument declared to be a memory object but * the specified \a arg_value is not a valid memory object. * - CL_INVALID_SAMPLER for an argument declared to be of type sampler_t but * the specified \a arg_value is not a valid sampler object. * - CL_INVALID_ARG_SIZE if \a arg_size does not match the size of the data * type for an argument that is not a memory object or if the argument is a * memory object and \a arg_size != sizeof(cl_mem) or if \a arg_size is zero * and the argument is declared with the __local qualifier or if the * argument is a sampler and arg_size != sizeof(cl_sampler). * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clSetKernelArg, (cl_kernel kernel, cl_uint arg_index, size_t arg_size, const void* arg_value)) { if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } const amd::KernelSignature& signature = as_amd(kernel)->signature(); if (arg_index >= signature.numParameters()) { return CL_INVALID_ARG_INDEX; } const amd::KernelParameterDescriptor& desc = signature.at(arg_index); const bool is_local = (desc.addressQualifier_ == CL_KERNEL_ARG_ADDRESS_LOCAL); if (((arg_value == NULL) && !is_local && (desc.type_ != T_POINTER)) || ((arg_value != NULL) && is_local)) { as_amd(kernel)->parameters().reset(static_cast(arg_index)); return CL_INVALID_ARG_VALUE; } if (!is_local && (desc.type_ == T_POINTER) && (arg_value != NULL)) { cl_mem memObj = *static_cast(arg_value); amd::RuntimeObject* pObject = as_amd(memObj); if (NULL != memObj && amd::RuntimeObject::ObjectTypeMemory != pObject->objectType()) { as_amd(kernel)->parameters().reset(static_cast(arg_index)); return CL_INVALID_MEM_OBJECT; } } else if ((desc.type_ == T_SAMPLER) && !is_valid(*static_cast(arg_value))) { return CL_INVALID_SAMPLER; } else if (desc.type_ == T_QUEUE) { cl_command_queue queue = *static_cast(arg_value); if (!is_valid(queue)) { as_amd(kernel)->parameters().reset(static_cast(arg_index)); return CL_INVALID_DEVICE_QUEUE; } if (NULL == as_amd(queue)->asDeviceQueue()) { as_amd(kernel)->parameters().reset(static_cast(arg_index)); return CL_INVALID_DEVICE_QUEUE; } } if ((!is_local && (arg_size != desc.size_)) || (is_local && (arg_size == 0))) { if (LP64_ONLY(true ||) ((desc.type_ != T_POINTER) && (desc.type_ != T_SAMPLER)) || (arg_size != sizeof(void*))) { as_amd(kernel)->parameters().reset(static_cast(arg_index)); return CL_INVALID_ARG_SIZE; } } as_amd(kernel)->parameters().set(static_cast(arg_index), arg_size, arg_value); return CL_SUCCESS; } RUNTIME_EXIT /*! @} * \addtogroup CL_KernelQuery * @{ */ /*! \brief Return information about the kernel object. * * \param kernel specifies the kernel object being queried. * * \param param_name specifies the information to query. * * \param param_value is a pointer to memory where the appropriate result * being queried is returned. If \a param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by \a param_value. This size must be >= size of return type. * * \param param_value_size_ret returns the actual size in bytes of data copied * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL * - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object. * * \version 1.0r33 */ RUNTIME_ENTRY(cl_int, clGetKernelInfo, (cl_kernel kernel, cl_kernel_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { // Check if we have a valid kernel if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } const amd::Kernel* amdKernel = as_amd(kernel); // Get the corresponded parameters switch (param_name) { case CL_KERNEL_FUNCTION_NAME: { const char* name = amdKernel->name().c_str(); // Return the kernel's name return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_NUM_ARGS: { cl_uint numParam = static_cast(amdKernel->signature().numParameters()); // Return the number of kernel's parameters return amd::clGetInfo(numParam, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_REFERENCE_COUNT: { cl_uint count = amdKernel->referenceCount(); // Return the reference counter return amd::clGetInfo(count, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_CONTEXT: { cl_context context = const_cast(as_cl(&amdKernel->program().context())); // Return the context, associated with the program return amd::clGetInfo(context, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_PROGRAM: { cl_program program = const_cast(as_cl(&amdKernel->program())); // Return the program, associated with the kernel return amd::clGetInfo(program, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ATTRIBUTES: { const char* name = amdKernel->signature().attributes().c_str(); // Return the kernel attributes return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret); } default: return CL_INVALID_VALUE; } return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Returns information about the arguments of a kernel. Kernel * argument information is only available if the program object associated * with kernel is created with \a clCreateProgramWithSource and the program * executable is built with the -cl-kernel-arg-info option specified in * options argument to clBuildProgram or clCompileProgram. * * \param kernel specifies the kernel object being queried. * * \param param_name specifies the information to query. * * \param param_value is a pointer to memory where the appropriate result * being queried is returned. If \a param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by \a param_value. This size must be >= size of return type. * * \param param_value_size_ret returns the actual size in bytes of data copied * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL * - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object. * * \version 1.2r07 */ RUNTIME_ENTRY(cl_int, clGetKernelArgInfo, (cl_kernel kernel, cl_uint arg_indx, cl_kernel_arg_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { // Check if we have a valid kernel if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } amd::Kernel* amdKernel = as_amd(kernel); const amd::KernelSignature& signature = amdKernel->signature(); if (arg_indx >= signature.numParameters()) { return CL_INVALID_ARG_INDEX; } const amd::KernelParameterDescriptor& desc = signature.at(arg_indx); // Get the corresponded parameters switch (param_name) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: { cl_kernel_arg_address_qualifier qualifier = desc.addressQualifier_; return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_ACCESS_QUALIFIER: { cl_kernel_arg_access_qualifier qualifier = desc.accessQualifier_; return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_TYPE_NAME: { const char* typeName = desc.typeName_.c_str(); // Return the argument's type name return amd::clGetInfo(typeName, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_TYPE_QUALIFIER: { cl_kernel_arg_type_qualifier qualifier = desc.typeQualifier_; return amd::clGetInfo(qualifier, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_ARG_NAME: { const char* name = desc.name_.c_str(); // Return the argument's name return amd::clGetInfo(name, param_value_size, param_value, param_value_size_ret); } default: return CL_INVALID_VALUE; } return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Return information about the kernel object that may be specific * to a device. * * \param kernel specifies the kernel object being queried. * * \param device identifies a specific device in the list of devices associated * with \a kernel. The list of devices is the list of devices in the OpenCL * context that is associated with \a kernel. If the list of devices associated * with kernel is a single device, \a device can be a NULL value. * * \param param_name specifies the information to query * * \param param_value is a pointer to memory where the appropriate result being * queried is returned. If \a param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by \a param_value. This size must be >= size of return type. * * \param param_value_size_ret returns the actual size in bytes of data copied * to \a param_value. If \a param_value_size_ret is NULL, it is ignored. * * \return One of the following values: * - CL_SUCCESS if the function is executed successfully, * - CL_INVALID_DEVICE if \a device is not in the list of devices associated * with \a kernel or if \a device is NULL but there are more than one * devices in the associated with \a kernel * - CL_INVALID_VALUE if \a param_name is not valid, or if size in bytes * specified by \a param_value_size is < size of return type and * \a param_value is not NULL * - CL_INVALID_KERNEL if \a kernel is a not a valid kernel object. * * \version 1.2r15 */ RUNTIME_ENTRY(cl_int, clGetKernelWorkGroupInfo, (cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { // Check if we have a valid device if (!is_valid(device)) { return CL_INVALID_DEVICE; } // Check if we have a valid kernel if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } const amd::Device& amdDevice = *as_amd(device); // Find the kernel, associated with the specified device const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice); // Make sure we found a valid kernel if (devKernel == NULL) { return CL_INVALID_KERNEL; } // Get the corresponded parameters switch (param_name) { case CL_KERNEL_WORK_GROUP_SIZE: { // Return workgroup size return amd::clGetInfo(devKernel->workGroupInfo()->size_, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: { // Return the compile workgroup size return amd::clGetInfo(devKernel->workGroupInfo()->compileSize_, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_LOCAL_MEM_SIZE: { // Return the amount of used local memory const size_t align = amdDevice.info().minDataTypeAlignSize_; cl_ulong memSize = as_amd(kernel)->parameters().localMemSize(align) + amd::alignUp(devKernel->workGroupInfo()->localMemSize_, align); return amd::clGetInfo(memSize, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: { // Return the compile workgroup size return amd::clGetInfo(devKernel->workGroupInfo()->preferredSizeMultiple_, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_PRIVATE_MEM_SIZE: { // Return the compile workgroup size return amd::clGetInfo(devKernel->workGroupInfo()->privateMemSize_, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_GLOBAL_WORK_SIZE: { return CL_INVALID_VALUE; } case CL_KERNEL_MAX_SEMAPHORE_SIZE_AMD: { return amd::clGetInfo(amdDevice.info().maxSemaphoreSize_, param_value_size, param_value, param_value_size_ret); } default: return CL_INVALID_VALUE; } return CL_SUCCESS; } RUNTIME_EXIT /*! \brief Returns information about the kernel object. * * \param kernel specifies the kernel object being queried. * * \param device identifies a specific device in the list of devices associated * with kernel. The list of devices is the list of devices in the OpenCL context * that is associated with kernel. If the list of devices associated with kernel * is a single device, device can be a NULL value. * * \param param_name specifies the information to query. The list of supported * param_name types and the information returned in param_value by * clGetKernelSubGroupInfo is described in the table below. * * \param input_value_size is used to specify the size in bytes of memory * pointed to by input_value. This size must be == size of input type as * described in the table below. * * \param input_value is a pointer to memory where the appropriate * parameterization of the query is passed from. If input_value is NULL, it is * ignored. * * \param param_value is a pointer to memory where the appropriate result being * queried is returned. If param_value is NULL, it is ignored. * * \param param_value_size is used to specify the size in bytes of memory * pointed to by param_value. This size must be >= size of return type as * described in the table below. * * \param param_value_size_ret returns the actual size in bytes of data copied * to param_value. If param_value_size_ret is NULL, it is ignored. * * \return CL_SUCCESS if the function is executed successfully. * Otherwise, it returns one of the following errors: * * - CL_INVALID_DEVICE if device is not in the list of devices associated with * kernel or if device is NULL but there is more than one device associated * with kernel. * - CL_INVALID_VALUE if param_name is not valid, or if size in bytes specified * by param_value_size is < size of return type as described in the table * above and param_value is not NULL. * - CL_INVALID_VALUE if param_name is CL_KERNEL_SUB_GROUP_SIZE_FOR_NDRANGE and * the size in bytes specified by input_value_size is not valid or if * input_value is NULL. * - CL_INVALID_KERNEL if kernel is a not a valid kernel object. * - CL_OUT_OF_RESOURCES if there is a failure to allocate resources required by * the OpenCL implementation on the device. * - CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required * by the OpenCL implementation on the host. * * \version 2.0r12 */ RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfo, (cl_kernel kernel, cl_device_id device, cl_kernel_sub_group_info param_name, size_t input_value_size, const void* input_value, size_t param_value_size, void* param_value, size_t* param_value_size_ret)) { // Check if we have a valid device if (!is_valid(device)) { return CL_INVALID_DEVICE; } // Check if we have a valid kernel if (!is_valid(kernel)) { return CL_INVALID_KERNEL; } const amd::Device& amdDevice = *as_amd(device); // Find the kernel, associated with the specified device const device::Kernel* devKernel = as_amd(kernel)->getDeviceKernel(amdDevice); // Make sure we found a valid kernel if (devKernel == NULL) { return CL_INVALID_KERNEL; } // Get the corresponded parameters switch (param_name) { case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE: case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: { // Infer the number of dimensions from 'input_value_size' size_t dims = input_value_size / sizeof(size_t); if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) { return CL_INVALID_VALUE; } // Get the linear workgroup size size_t workGroupSize = ((size_t*)input_value)[0]; for (size_t i = 1; i < dims; ++i) { workGroupSize *= ((size_t*)input_value)[i]; } // Get the subgroup size. GPU devices sub-groups are wavefronts. size_t subGroupSize = as_amd(device)->info().wavefrontWidth_; size_t numSubGroups = (workGroupSize + subGroupSize - 1) / subGroupSize; return amd::clGetInfo((param_name == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR) ? subGroupSize : numSubGroups, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: { size_t numSubGroups = 0; return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_MAX_NUM_SUB_GROUPS: { size_t waveSize = as_amd(device)->info().wavefrontWidth_; size_t numSubGroups = (devKernel->workGroupInfo()->size_ + waveSize - 1) / waveSize; return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret); } case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: { if (input_value_size != sizeof(size_t)) { return CL_INVALID_VALUE; } size_t numSubGroups = ((size_t*)input_value)[0]; // Infer the number of dimensions from 'param_value_size' size_t dims = param_value_size / sizeof(size_t); if (dims == 0 || dims > 3 || param_value_size != dims * sizeof(size_t)) { return CL_INVALID_VALUE; } *not_null(param_value_size_ret) = param_value_size; size_t localSize; localSize = numSubGroups * as_amd(device)->info().wavefrontWidth_; if (localSize > devKernel->workGroupInfo()->size_) { ::memset(param_value, '\0', dims * sizeof(size_t)); return CL_SUCCESS; } switch (dims) { case 3: ((size_t*)param_value)[2] = 1; case 2: ((size_t*)param_value)[1] = 1; case 1: ((size_t*)param_value)[0] = localSize; } return CL_SUCCESS; } default: return CL_INVALID_VALUE; } return CL_SUCCESS; } RUNTIME_EXIT /*! @} * @} * @} */