SWDEV-257789 - Initial change to skip kernel arg copy
The optimization is controlled with ROCR_SKIP_KERNEL_ARG_COPY. This is initial check-in for experiments. Extra changes are necessary for full support: - handle graph capture with the original sysmem alloc - avoid memobject references, otherwise there is a race condition with reusage of the arg buffer - Remove arg setup from hip Change-Id: Ib0af710f93e79834711fa4049a7c66093711e68b
Этот коммит содержится в:
@@ -633,7 +633,8 @@ int32_t NDRangeKernelCommand::captureAndValidate() {
|
||||
|
||||
int32_t error;
|
||||
uint64_t lclMemSize = kernel().getDeviceKernel(device)->workGroupInfo()->localMemSize_;
|
||||
parameters_ = kernel().parameters().capture(device, sharedMemBytes_ + lclMemSize, &error);
|
||||
parameters_ = kernel().parameters().capture(*queue()->vdev(),
|
||||
sharedMemBytes_ + lclMemSize, &error);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
Ссылка в новой задаче
Block a user