diff --git a/projects/clr/rocclr/cmake/ROCclrHSA.cmake b/projects/clr/rocclr/cmake/ROCclrHSA.cmake index ffb3a6b396..810fd5bea8 100644 --- a/projects/clr/rocclr/cmake/ROCclrHSA.cmake +++ b/projects/clr/rocclr/cmake/ROCclrHSA.cmake @@ -37,6 +37,7 @@ endif() target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/rocm/rocappprofile.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocblit.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocblitcl.cpp ${ROCCLR_SRC_DIR}/device/rocm/roccounters.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocdevice.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocglinterop.cpp diff --git a/projects/clr/rocclr/device/blitcl.cpp b/projects/clr/rocclr/device/blitcl.cpp index 13df6fcc0f..32bb5c9e90 100644 --- a/projects/clr/rocclr/device/blitcl.cpp +++ b/projects/clr/rocclr/device/blitcl.cpp @@ -35,10 +35,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( extern void __amd_copyBufferRectAligned(__global uint*, __global uint*, ulong4, ulong4, ulong4); - extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong); - - extern void __amd_streamOpsWait(__global uint*,__global ulong*, ulong, ulong, ulong); - // Implementation __kernel void __amd_rocclr_fillBufferAligned(__global uchar* bufUChar, __global ushort* bufUShort, @@ -73,16 +69,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( __amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size); } - __kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong sizeBytes) { - __amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes); - } - - __kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong, - ulong value, ulong flags, ulong mask) { - __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); - } - ); const char* BlitImageSourceCode = BLIT_KERNELS( diff --git a/projects/clr/rocclr/device/rocm/rocblitcl.cpp b/projects/clr/rocclr/device/rocm/rocblitcl.cpp new file mode 100755 index 0000000000..943f2ed46c --- /dev/null +++ b/projects/clr/rocclr/device/rocm/rocblitcl.cpp @@ -0,0 +1,45 @@ +/* Copyright (c) 2010 - 2021 Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +namespace roc { + +#define BLIT_KERNEL(...) #__VA_ARGS__ + +const char* rocBlitLinearSourceCode = BLIT_KERNEL( + + // Extern + extern void __amd_streamOpsWrite(__global uint*, __global ulong*, ulong, ulong); + + extern void __amd_streamOpsWait(__global uint*,__global ulong*, ulong, ulong, ulong); + + // Implementation + __kernel void __amd_rocclr_streamOpsWrite(__global uint* ptrInt, __global ulong* ptrUlong, + ulong value, ulong sizeBytes) { + __amd_streamOpsWrite(ptrInt, ptrUlong, value, sizeBytes); + } + + __kernel void __amd_rocclr_streamOpsWait(__global uint* ptrInt, __global ulong* ptrUlong, + ulong value, ulong flags, ulong mask) { + __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); + } + +); + +} // namespace roc diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 5577167822..92b54f5c9c 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -518,6 +518,7 @@ bool Device::init() { extern const char* SchedulerSourceCode; extern const char* GwsInitSourceCode; +extern const char* rocBlitLinearSourceCode; void Device::tearDown() { NullDevice::tearDown(); @@ -832,21 +833,28 @@ void Device::ReleaseExclusiveGpuAccess(VirtualGPU& vgpu) const { bool Device::createBlitProgram() { bool result = true; - const char* scheduler = nullptr; + const char* extraKernel = nullptr; #if defined(USE_COMGR_LIBRARY) - std::string sch = SchedulerSourceCode; + std::string rocKernel; if (settings().useLightning_) { - if (info().cooperativeGroups_) { - sch.append(GwsInitSourceCode); + if (amd::IS_HIP) { + rocKernel = rocBlitLinearSourceCode; + if (info().cooperativeGroups_) { + rocKernel.append(GwsInitSourceCode); + } } - scheduler = sch.c_str(); + else { + rocKernel = SchedulerSourceCode; + } + + extraKernel = rocKernel.c_str(); } #endif // USE_COMGR_LIBRARY blitProgram_ = new BlitProgram(context_); // Create blit programs - if (blitProgram_ == nullptr || !blitProgram_->create(this, scheduler)) { + if (blitProgram_ == nullptr || !blitProgram_->create(this, extraKernel)) { delete blitProgram_; blitProgram_ = nullptr; LogError("Couldn't create blit kernels!");