diff --git a/rocclr/cmake/ROCclrPAL.cmake b/rocclr/cmake/ROCclrPAL.cmake index 304a425669..4c0ec03234 100644 --- a/rocclr/cmake/ROCclrPAL.cmake +++ b/rocclr/cmake/ROCclrPAL.cmake @@ -65,7 +65,7 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/pal/palprintf.cpp ${ROCCLR_SRC_DIR}/device/pal/palprogram.cpp ${ROCCLR_SRC_DIR}/device/pal/palresource.cpp - ${ROCCLR_SRC_DIR}/device/pal/palschedcl.cpp + ${ROCCLR_SRC_DIR}/device/pal/palblitcl.cpp ${ROCCLR_SRC_DIR}/device/pal/palsettings.cpp ${ROCCLR_SRC_DIR}/device/pal/palsignal.cpp ${ROCCLR_SRC_DIR}/device/pal/palthreadtrace.cpp diff --git a/rocclr/device/device.cpp b/rocclr/device/device.cpp index fce513ab96..4900fc9d45 100644 --- a/rocclr/device/device.cpp +++ b/rocclr/device/device.cpp @@ -357,7 +357,7 @@ Device::BlitProgram::~BlitProgram() { } bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKernels, - const char* extraOptions) { + const std::string& extraOptions) { std::vector devices; devices.push_back(device); std::string kernels(device::BlitLinearSourceCode); @@ -383,7 +383,7 @@ bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKe opt += "-Wf,--force_disable_spir -fno-lib-no-inline -fno-sc-keep-calls "; } - if (extraOptions != nullptr) { + if (!extraOptions.empty()) { opt += extraOptions; } if (!GPU_DUMP_BLIT_KERNELS) { diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index e6aae8fa74..7e4a64b449 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -1533,7 +1533,7 @@ class Device : public RuntimeObject { //! Creates blit program for this device bool create(Device* device, //!< Device object const std::string& extraKernel, //!< Extra kernels from the device layer - const char* extraOptions = NULL //!< Extra compilation options + const std::string& extraOptions //!< Extra compilation options ); }; diff --git a/rocclr/device/gpu/gpudevice.cpp b/rocclr/device/gpu/gpudevice.cpp index ae764abfe3..654de2d355 100644 --- a/rocclr/device/gpu/gpudevice.cpp +++ b/rocclr/device/gpu/gpudevice.cpp @@ -1118,7 +1118,7 @@ bool Device::initializeHeapResources() { // Delay compilation due to brig_loader memory allocation if (settings().ciPlus_) { std::string CL20extraBlits; - const char* ocl20 = NULL; + const std::string ocl20; if (settings().oclVersion_ >= OpenCL20) { CL20extraBlits = SchedulerSourceCode; ocl20 = "-cl-std=CL2.0"; diff --git a/rocclr/device/pal/palschedcl.cpp b/rocclr/device/pal/palblitcl.cpp similarity index 83% rename from rocclr/device/pal/palschedcl.cpp rename to rocclr/device/pal/palblitcl.cpp index 344ee02d81..954fac9034 100644 --- a/rocclr/device/pal/palschedcl.cpp +++ b/rocclr/device/pal/palblitcl.cpp @@ -23,7 +23,6 @@ namespace pal { #define BLIT_KERNEL(...) #__VA_ARGS__ const char* SchedulerSourceCode = BLIT_KERNEL( -%s \n extern void __amd_scheduler(__global void*, __global void*, uint); \n @@ -32,6 +31,16 @@ __kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params } \n); +const char* SchedulerSourceCode20 = BLIT_KERNEL( +\n +extern void __amd_scheduler_pal(__global void*, __global void*, uint); +\n + __kernel void __amd_rocclr_scheduler(__global void* queue, __global void* params, + uint paramIdx) { + __amd_scheduler_pal(queue, params, paramIdx); +} +\n); + const char* GwsInitSourceCode = BLIT_KERNEL( \n extern void __ockl_gws_init(uint nwm1, uint rid); diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 2faacfdd6a..5ca95a2e4f 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -802,6 +802,7 @@ Device::~Device() { } extern const char* SchedulerSourceCode; +extern const char* SchedulerSourceCode20; extern const char* GwsInitSourceCode; Pal::IDevice* gDeviceList[Pal::MaxDevices] = {}; uint32_t gStartDevice = 0; @@ -2348,32 +2349,32 @@ bool Device::createBlitProgram() { bool result = true; // Delayed compilation due to brig_loader memory allocation - std::string blits; - const char* ocl20 = nullptr; - - std::string sch = SchedulerSourceCode; - if (settings().oclVersion_ >= OpenCL20) { - size_t loc = sch.find("%s"); - sch.replace(loc, 2, iDev()->GetDispatchKernelSource()); - if (settings().useLightning_) { - // For LC, replace "amd_scheduler" with "amd_scheduler_pal" - static const char AmdScheduler[] = "amd_scheduler"; - static const char AmdSchedulerPal[] = "amd_scheduler_pal"; - loc = sch.find(AmdScheduler); - sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal); - loc = sch.find(AmdScheduler, (loc + sizeof(AmdSchedulerPal) - 1)); - sch.replace(loc, sizeof(AmdScheduler) - 1, AmdSchedulerPal); - if (info().cooperativeGroups_) { - sch.append(GwsInitSourceCode); - } + std::string extraBlits; + std::string ocl20; + if (amd::IS_HIP) { + if (info().cooperativeGroups_) { + extraBlits = GwsInitSourceCode; + } + } + else { + if (settings().oclVersion_ >= OpenCL20) { + extraBlits = iDev()->GetDispatchKernelSource(); + if (settings().useLightning_) { + extraBlits.append(SchedulerSourceCode20); + } + else { + extraBlits.append(SchedulerSourceCode); + } + ocl20 = "-cl-std=CL2.0"; + } + else { + extraBlits = SchedulerSourceCode; } - blits = sch; - ocl20 = "-cl-std=CL2.0"; } blitProgram_ = new BlitProgram(context_); // Create blit programs - if (blitProgram_ == nullptr || !blitProgram_->create(this, blits, ocl20)) { + if (blitProgram_ == nullptr || !blitProgram_->create(this, extraBlits, ocl20)) { delete blitProgram_; blitProgram_ = nullptr; LogError("Couldn't create blit kernels!"); diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 39bc560e22..0ffceffbf4 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -853,7 +853,7 @@ bool Device::createBlitProgram() { blitProgram_ = new BlitProgram(context_); // Create blit programs - if (blitProgram_ == nullptr || !blitProgram_->create(this, extraKernel)) { + if (blitProgram_ == nullptr || !blitProgram_->create(this, extraKernel, "")) { delete blitProgram_; blitProgram_ = nullptr; LogError("Couldn't create blit kernels!");