From ea2741f631a2644584fef686dbd62e5b38518b35 Mon Sep 17 00:00:00 2001 From: Julia Jiang Date: Wed, 17 Nov 2021 18:36:08 -0500 Subject: [PATCH] SWDEV-308644 - merge roc blit kernels Change-Id: I378e511959fe17c03fa45066022e9670a4d181f0 [ROCm/clr commit: f5c9ad5b1dc8d8a265922ce2f753571e3657d087] --- projects/clr/rocclr/cmake/ROCclrHSA.cmake | 1 - projects/clr/rocclr/device/device.cpp | 4 +- projects/clr/rocclr/device/device.hpp | 2 +- projects/clr/rocclr/device/gpu/gpudevice.cpp | 2 +- projects/clr/rocclr/device/pal/paldevice.cpp | 4 +- projects/clr/rocclr/device/rocm/rocblitcl.cpp | 17 ++++++++ projects/clr/rocclr/device/rocm/rocdevice.cpp | 14 +++--- .../clr/rocclr/device/rocm/rocschedcl.cpp | 43 ------------------- 8 files changed, 29 insertions(+), 58 deletions(-) mode change 100755 => 100644 projects/clr/rocclr/device/rocm/rocblitcl.cpp delete mode 100644 projects/clr/rocclr/device/rocm/rocschedcl.cpp diff --git a/projects/clr/rocclr/cmake/ROCclrHSA.cmake b/projects/clr/rocclr/cmake/ROCclrHSA.cmake index 810fd5bea8..3104df9620 100644 --- a/projects/clr/rocclr/cmake/ROCclrHSA.cmake +++ b/projects/clr/rocclr/cmake/ROCclrHSA.cmake @@ -45,7 +45,6 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/rocm/rocmemory.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocprintf.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocprogram.cpp - ${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 57f6fb5d49..fce513ab96 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -356,7 +356,7 @@ Device::BlitProgram::~BlitProgram() { } } -bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels, +bool Device::BlitProgram::create(amd::Device* device, const std::string& extraKernels, const char* extraOptions) { std::vector devices; devices.push_back(device); @@ -367,7 +367,7 @@ bool Device::BlitProgram::create(amd::Device* device, const char* extraKernels, kernels += image_kernels; } - if (extraKernels != nullptr) { + if (!extraKernels.empty()) { kernels += extraKernels; } diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 5464b1e05d..d1645e49dd 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -1529,7 +1529,7 @@ class Device : public RuntimeObject { //! Creates blit program for this device bool create(Device* device, //!< Device object - const char* extraKernel = NULL, //!< Extra kernels from the device layer + const std::string& extraKernel, //!< Extra kernels from the device layer const char* extraOptions = NULL //!< Extra compilation options ); }; diff --git a/projects/clr/rocclr/device/gpu/gpudevice.cpp b/projects/clr/rocclr/device/gpu/gpudevice.cpp index 1ff1aae971..ae764abfe3 100644 --- a/projects/clr/rocclr/device/gpu/gpudevice.cpp +++ b/projects/clr/rocclr/device/gpu/gpudevice.cpp @@ -1117,7 +1117,7 @@ bool Device::initializeHeapResources() { // Delay compilation due to brig_loader memory allocation if (settings().ciPlus_) { - const char* CL20extraBlits = NULL; + std::string CL20extraBlits; const char* ocl20 = NULL; if (settings().oclVersion_ >= OpenCL20) { CL20extraBlits = SchedulerSourceCode; diff --git a/projects/clr/rocclr/device/pal/paldevice.cpp b/projects/clr/rocclr/device/pal/paldevice.cpp index b03f936d8d..216011f976 100644 --- a/projects/clr/rocclr/device/pal/paldevice.cpp +++ b/projects/clr/rocclr/device/pal/paldevice.cpp @@ -2345,7 +2345,7 @@ bool Device::createBlitProgram() { bool result = true; // Delayed compilation due to brig_loader memory allocation - const char* blits = nullptr; + std::string blits; const char* ocl20 = nullptr; std::string sch = SchedulerSourceCode; @@ -2364,7 +2364,7 @@ bool Device::createBlitProgram() { sch.append(GwsInitSourceCode); } } - blits = sch.c_str(); + blits = sch; ocl20 = "-cl-std=CL2.0"; } diff --git a/projects/clr/rocclr/device/rocm/rocblitcl.cpp b/projects/clr/rocclr/device/rocm/rocblitcl.cpp old mode 100755 new mode 100644 index 943f2ed46c..5de59ca853 --- a/projects/clr/rocclr/device/rocm/rocblitcl.cpp +++ b/projects/clr/rocclr/device/rocm/rocblitcl.cpp @@ -39,7 +39,24 @@ const char* rocBlitLinearSourceCode = BLIT_KERNEL( ulong value, ulong flags, ulong mask) { __amd_streamOpsWait(ptrInt, ptrUlong, value, flags, mask); } +); +const char* SchedulerSourceCode = BLIT_KERNEL( + + extern void __amd_scheduler_rocm(__global void*); + + __kernel void __amd_rocclr_scheduler(__global void* params) { + __amd_scheduler_rocm(params); + } +); + +const char* GwsInitSourceCode = BLIT_KERNEL( + + extern void __ockl_gws_init(uint nwm1, uint rid); + + __kernel void __amd_rocclr_gwsInit(uint value) { + __ockl_gws_init(value, 0); + } ); } // namespace roc diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index 5c8bbcd5e1..1655992786 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -834,22 +834,20 @@ void Device::ReleaseExclusiveGpuAccess(VirtualGPU& vgpu) const { bool Device::createBlitProgram() { bool result = true; - const char* extraKernel = nullptr; + std::string extraKernel; #if defined(USE_COMGR_LIBRARY) - std::string rocKernel; if (settings().useLightning_) { if (amd::IS_HIP) { - rocKernel = rocBlitLinearSourceCode; + extraKernel = rocBlitLinearSourceCode; if (info().cooperativeGroups_) { - rocKernel.append(GwsInitSourceCode); - } + extraKernel.append(GwsInitSourceCode); + } } else { - rocKernel = SchedulerSourceCode; + extraKernel = SchedulerSourceCode; } - - extraKernel = rocKernel.c_str(); + } #endif // USE_COMGR_LIBRARY diff --git a/projects/clr/rocclr/device/rocm/rocschedcl.cpp b/projects/clr/rocclr/device/rocm/rocschedcl.cpp deleted file mode 100644 index 051c28a519..0000000000 --- a/projects/clr/rocclr/device/rocm/rocschedcl.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright (c) 2018 - 2021 Advanced Micro Devices, Inc. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. */ - -namespace roc { - -#define BLIT_KERNEL(...) #__VA_ARGS__ - -const char* SchedulerSourceCode = BLIT_KERNEL( -\n -extern void __amd_scheduler_rocm(__global void*); -\n -__kernel void __amd_rocclr_scheduler(__global void* params) { - __amd_scheduler_rocm(params); -} -\n); - -const char* GwsInitSourceCode = BLIT_KERNEL( -\n -extern void __ockl_gws_init(uint nwm1, uint rid); -\n -__kernel void __amd_rocclr_gwsInit(uint value) { - __ockl_gws_init(value, 0); -} -\n); - -} // namespace roc