diff --git a/rocclr/device/blitcl.cpp b/rocclr/device/blitcl.cpp index 22a98e70b6..cfae2b9430 100644 --- a/rocclr/device/blitcl.cpp +++ b/rocclr/device/blitcl.cpp @@ -39,6 +39,14 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( extern void __amd_copyBufferRectAligned(__global uint*, __global uint*, ulong4, ulong4, ulong4); + extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong); + + extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong); + + extern void __ockl_dm_init_v1(ulong, ulong, uint, uint); + + extern void __ockl_gws_init(uint nwm1, uint rid); + // Implementation __kernel void __amd_rocclr_fillBufferAligned(__global uchar* bufUChar, __global ushort* bufUShort, @@ -88,6 +96,24 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( __amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size); } + __kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong, + ulong value, ulong sizeBytes) { + __amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes); + } + + __kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong, + ulong value, ulong flags, ulong mask) { + __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); + } + + __kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks, + uint heap_size, uint number_of_initial_blocks) { + __ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks); + } + + __kernel void __amd_rocclr_gwsInit(uint value) { + __ockl_gws_init(value, 0); + } ); const char* BlitImageSourceCode = BLIT_KERNELS( diff --git a/rocclr/device/pal/palblitcl.cpp b/rocclr/device/pal/palblitcl.cpp index d85f68e016..a60d929152 100644 --- a/rocclr/device/pal/palblitcl.cpp +++ b/rocclr/device/pal/palblitcl.cpp @@ -22,32 +22,6 @@ namespace pal { #define BLIT_KERNEL(...) #__VA_ARGS__ -const char* palBlitLinearSourceCode = BLIT_KERNEL( -\n -extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong); -\n -extern void __amd_streamOpsWait(__global uint*,__global ulong*, ulong, ulong, ulong); -\n -extern void __ockl_dm_init_v1(ulong, ulong, uint, uint); -\n -__kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong sizeBytes) { - __amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes); -} -\n -__kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong flags, ulong mask) { - __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); -} -\n -__kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks, - uint heap_size, uint number_of_initial_blocks) { - __ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks); -} -\n); - - - const char* SchedulerSourceCode = BLIT_KERNEL( \n extern void __amd_scheduler(__global void*, __global void*, uint); @@ -67,13 +41,4 @@ extern void __amd_scheduler_pal(__global void*, __global void*, uint); } \n); -const char* GwsInitSourceCode = BLIT_KERNEL( -\n -extern void __ockl_gws_init(uint nwm1, uint rid); -\n -__kernel void __amd_rocclr_gwsInit(uint value) { - __ockl_gws_init(value, 0); -} -\n); - } // namespace pal diff --git a/rocclr/device/pal/paldevice.cpp b/rocclr/device/pal/paldevice.cpp index 9b951eb258..c50d80ed3a 100644 --- a/rocclr/device/pal/paldevice.cpp +++ b/rocclr/device/pal/paldevice.cpp @@ -835,8 +835,6 @@ Device::~Device() { extern const char* SchedulerSourceCode; extern const char* SchedulerSourceCode20; -extern const char* GwsInitSourceCode; -extern const char* palBlitLinearSourceCode; Pal::IDevice* gDeviceList[Pal::MaxDevices] = {}; uint32_t gStartDevice = 0; @@ -2506,12 +2504,7 @@ bool Device::createBlitProgram() { // Delayed compilation due to brig_loader memory allocation std::string extraBlits; std::string ocl20; - if (amd::IS_HIP) { - extraBlits = palBlitLinearSourceCode; - if (info().cooperativeGroups_) { - extraBlits.append(GwsInitSourceCode); - } - } else { + if (!amd::IS_HIP) { if (settings().oclVersion_ >= OpenCL20) { extraBlits = iDev()->GetDispatchKernelSource(); if (settings().useLightning_) { diff --git a/rocclr/device/rocm/rocblitcl.cpp b/rocclr/device/rocm/rocblitcl.cpp index 7ec622cbab..d845e53ce1 100644 --- a/rocclr/device/rocm/rocblitcl.cpp +++ b/rocclr/device/rocm/rocblitcl.cpp @@ -22,32 +22,6 @@ namespace roc { #define BLIT_KERNEL(...) #__VA_ARGS__ -const char* rocBlitLinearSourceCode = BLIT_KERNEL( - - // Extern - extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong); - - extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong); - - extern void __ockl_dm_init_v1(ulong, ulong, uint, uint); - // Implementation - __kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong sizeBytes) { - __amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes); - } - - __kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong flags, ulong mask) { - __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); - } - - __kernel void __amd_rocclr_initHeap(ulong heap_to_initialize, ulong initial_blocks, - uint heap_size, uint number_of_initial_blocks) { - __ockl_dm_init_v1(heap_to_initialize, initial_blocks, heap_size, number_of_initial_blocks); - } - -); - const char* SchedulerSourceCode = BLIT_KERNEL( extern void __amd_scheduler_rocm(__global void*); @@ -57,13 +31,4 @@ const char* SchedulerSourceCode = BLIT_KERNEL( } ); -const char* GwsInitSourceCode = BLIT_KERNEL( - - extern void __ockl_gws_init(uint nwm1, uint rid); - - __kernel void __amd_rocclr_gwsInit(uint value) { - __ockl_gws_init(value, 0); - } -); - } // namespace roc diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 22018bd945..8e0bd8f6ee 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -546,8 +546,6 @@ bool Device::init() { } extern const char* SchedulerSourceCode; -extern const char* GwsInitSourceCode; -extern const char* rocBlitLinearSourceCode; void Device::tearDown() { NullDevice::tearDown(); @@ -845,13 +843,7 @@ bool Device::createBlitProgram() { #if defined(USE_COMGR_LIBRARY) if (settings().useLightning_) { - if (amd::IS_HIP) { - extraKernel = rocBlitLinearSourceCode; - if (info().cooperativeGroups_) { - extraKernel.append(GwsInitSourceCode); - } - } - else { + if (!amd::IS_HIP) { extraKernel = SchedulerSourceCode; }