P4 to Git Change 1475393 by lmoriche@lmoriche_opencl_dev2 on 2017/10/26 17:01:11

SWDEV-126884 - OCL 2.1 Platform APIs
	- Implement clGetKernelSubGroupInfo, clCloneKernel, clGetHostTimer and clGetHostAndDeviceTimer

Affected files ...

... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_device.cpp#68 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_execute.cpp#25 edit
... //depot/stg/opencl/drivers/opencl/api/opencl/amdocl/cl_program.cpp#43 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.cpp#24 edit
... //depot/stg/opencl/drivers/opencl/runtime/platform/kernel.hpp#19 edit
Этот коммит содержится в:
foreman
2017-10-26 17:08:01 -04:00
родитель 33724edae7
Коммит 687e1fdd9c
3 изменённых файлов: 77 добавлений и 54 удалений
+5
Просмотреть файл
@@ -7,6 +7,7 @@
#include "device/device.hpp"
#include "platform/runtime.hpp"
#include "utils/versions.hpp"
#include "os/os.hpp"
#include "cl_semaphore_amd.h"
#include "CL/cl_ext.h"
@@ -129,6 +130,10 @@ RUNTIME_ENTRY(cl_int, clGetPlatformInfo,
size_t max_keys = OCL_MAX_KEYS;
return amd::clGetInfo(max_keys, param_value_size, param_value, param_value_size_ret);
}
case CL_PLATFORM_HOST_TIMER_RESOLUTION: {
cl_ulong resolution = (cl_ulong)amd::Os::timerResolutionNanos();
return amd::clGetInfo(resolution, param_value_size, param_value, param_value_size_ret);
}
default:
break;
}
+13 -6
Просмотреть файл
@@ -8,6 +8,7 @@
#include "platform/ndrange.hpp"
#include "platform/command.hpp"
#include "platform/program.hpp"
#include "os/os.hpp"
#include <icd/icd_dispatch.h>
@@ -967,10 +968,14 @@ RUNTIME_ENTRY(cl_int, clGetDeviceAndHostTimer,
return CL_INVALID_DEVICE;
}
// TODO: Implement get device and host timer logic
LogWarning("Device support for clGetDeviceAndHostTimer() has not been implemented.");
if (!device_timestamp || !host_timestamp) {
return CL_INVALID_VALUE;
}
return CL_INVALID_VALUE;
// The device timestamp and host timestamp use the same timebase.
*device_timestamp = *host_timestamp = amd::Os::timeNanos();
return CL_SUCCESS;
}
RUNTIME_EXIT
@@ -1011,10 +1016,12 @@ RUNTIME_ENTRY(cl_int, clGetHostTimer,
return CL_INVALID_DEVICE;
}
// TODO: Implement get host timer logic
LogWarning("Device support for clGetHostTimer() has not been implemented.");
if (!host_timestamp) {
return CL_INVALID_VALUE;
}
return CL_INVALID_VALUE;
*host_timestamp = amd::Os::timeNanos();
return CL_SUCCESS;
}
RUNTIME_EXIT
+59 -48
Просмотреть файл
@@ -456,12 +456,12 @@ RUNTIME_ENTRY(cl_int, clBuildProgram,
}
RUNTIME_EXIT
/*! \brief compiles a programs source for all the devices or a specific
/*! \brief compiles a program's source for all the devices or a specific
* device(s) in the OpenCL context associated with program. The pre-processor
* runs before the program sources are compiled.
* The compiled binary is built for all devices associated with program or
* the list of devices specified. The compiled binary can be queried using
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ) and can be specified
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
* to \a clCreateProgramWithBinary to create a new program object.
*
* \param program is the program object that is the compilation target.
@@ -489,7 +489,7 @@ RUNTIME_EXIT
* source in program that comes from an embedded header. The corresponding entry
* in input_headers identifies the program object which contains the header
* source to be used. The embedded headers are first searched before the headers
* in the list of directories specified by the I compile option (as described in
* in the list of directories specified by the -I compile option (as described in
* section 5.6.4.1). If multiple entries in header_include_names refer to the same
* header name, the first one encountered will be used.
*
@@ -500,7 +500,7 @@ RUNTIME_EXIT
* \a clCompileProgram does not need to wait for the compiler to complete and can
* return immediately. If \a pfn_notify is NULL, \a clCompileProgram does not
* return until the compiler has completed. This callback function may be called
* asynchronously by the OpenCL implementation. It is the applications
* asynchronously by the OpenCL implementation. It is the application's
* responsibility to ensure that the callback function is thread-safe.
*
* \param user_data will be passed as an argument when pfn_notify is called.
@@ -596,7 +596,7 @@ RUNTIME_EXIT
* the devices or a specific device(s) in the OpenCL context and creates
* an executable. clLinkProgram creates a new program object which contains
* this executable. The executable binary can be queried using
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ) and can be specified
* \a clGetProgramInfo(program, CL_PROGRAM_BINARIES, ...) and can be specified
* to \a clCreateProgramWithBinary to create a new program object.
* The devices associated with the returned program object will be the list
* of devices specified by device_list or if device_list is NULL it will be
@@ -639,7 +639,7 @@ RUNTIME_EXIT
* callback function is called with a valid program object (if the link was
* successful) or NULL (if the link encountered a failure). This callback
* function may be called asynchronously by the OpenCL implementation. It is
* the applications responsibility to ensure that the callback function is
* the application's responsibility to ensure that the callback function is
* thread-safe. If \a pfn_notify is NULL, \a clLinkProgram does not return
* until the linker has completed. clLinkProgram returns a valid non-zero
* program object (if the link was successful) or NULL (if the link
@@ -1320,45 +1320,13 @@ RUNTIME_ENTRY_RET(cl_kernel, clCloneKernel,
return (cl_kernel)0;
}
amd::Kernel* srcKernel = as_amd(source_kernel);
amd::Program* program = &(srcKernel->program());
const char* kernelName = srcKernel->name().c_str();
const amd::Symbol* symbol = program->findSymbol(kernelName);
if (symbol == NULL) {
*not_null(errcode_ret) = CL_INVALID_KERNEL_NAME;
return (cl_kernel)0;
}
amd::Kernel* kernel = new amd::Kernel(*program, *symbol, kernelName);
amd::Kernel* kernel = new amd::Kernel(*as_amd(source_kernel));
if (kernel == NULL) {
*not_null(errcode_ret) = CL_OUT_OF_HOST_MEMORY;
return (cl_kernel)0;
}
//TODO: implemente the clone kernel logic
LogWarning("Device support for clCloneKernel() has not been implemented");
#if 0
// clone kernel logic - unverified
// clone the parameter values_, defined_, svmBound_ arrays
amd::KernelParameters* srcParameters = &(srcKernel->parameters());
amd::KernelParameters* parameters = &(kernel->parameters());
const amd::KernelSignature& signature = kernel->signature();
size_t size = signature.paramsSize() + signature.numParameters() * sizeof(bool) * 2;
::memcpy(parameters->values(), srcParameters->values(), size);
// clone the exec info
parameters->setExecInfoOffset(srcParameters->getExecInfoOffset());
parameters->addSvmPtr(srcParameters->getExecSvmPtr(), srcParameters->getNumberOfSvmPtr());
parameters->setSvmSystemPointersSupport(srcParameters->getSvmSystemPointersSupport());
parameters->setValidated(srcParameters->getValidated());
parameters->setExecNewVcop(srcParameters->getExecNewVcop());
parameters->setExecPfpaVcop(srcParameters->getExecPfpaVcop());
#endif
*not_null(errcode_ret) = CL_INVALID_VALUE;
*not_null(errcode_ret) = CL_SUCCESS;
return as_cl(kernel);
}
RUNTIME_EXIT
@@ -1800,8 +1768,8 @@ RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfo,
// Get the corresponded parameters
switch (param_name) {
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR: {
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: {
// Infer the number of dimensions from 'input_value_size'
size_t dims = input_value_size / sizeof(size_t);
if (dims == 0 || dims > 3 || input_value_size != dims * sizeof(size_t)) {
@@ -1828,12 +1796,55 @@ RUNTIME_ENTRY(cl_int, clGetKernelSubGroupInfo,
: numSubGroups,
param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT:
case CL_KERNEL_MAX_NUM_SUB_GROUPS:
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS:
//TODO: implemente the kernel subgroup info query
LogWarning("Device support for clGetKernelSubGroupInfo() query has not been implemented.");
return CL_INVALID_VALUE;
case CL_KERNEL_COMPILE_NUM_SUB_GROUPS: {
size_t numSubGroups = 0;
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_MAX_NUM_SUB_GROUPS: {
size_t waveSize = as_amd(device)->info().wavefrontWidth_;
size_t numSubGroups = as_amd(device)->type() == CL_DEVICE_TYPE_CPU
? 1 : (devKernel->workGroupInfo()->size_ + waveSize - 1) / waveSize;
return amd::clGetInfo(numSubGroups, param_value_size, param_value, param_value_size_ret);
}
case CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT: {
if (input_value_size != sizeof(size_t)) {
return CL_INVALID_VALUE;
}
size_t numSubGroups = ((size_t*)input_value)[0];
// Infer the number of dimensions from 'param_value_size'
size_t dims = param_value_size / sizeof(size_t);
if (dims == 0 || dims > 3 || param_value_size != dims * sizeof(size_t)) {
return CL_INVALID_VALUE;
}
*not_null(param_value_size_ret) = param_value_size;
size_t localSize;
if (as_amd(device)->type() == CL_DEVICE_TYPE_CPU) {
if (numSubGroups != 1) {
::memset(param_value, '\0', dims * sizeof(size_t));
return CL_SUCCESS;
}
localSize = devKernel->workGroupInfo()->size_;
}
else {
localSize = numSubGroups * as_amd(device)->info().wavefrontWidth_;
if (localSize > devKernel->workGroupInfo()->size_) {
::memset(param_value, '\0', dims * sizeof(size_t));
return CL_SUCCESS;
}
}
switch (dims) {
case 3:
((size_t*)param_value)[2] = 1;
case 2:
((size_t*)param_value)[1] = 1;
case 1:
((size_t*)param_value)[0] = localSize;
}
return CL_SUCCESS;
}
default:
return CL_INVALID_VALUE;
}