From 1712acdd2e8485f175deabcb3eb5d4dbc6f947aa Mon Sep 17 00:00:00 2001 From: Sourabh Betigeri Date: Mon, 2 Dec 2024 16:38:37 -0500 Subject: [PATCH] Revert "SWDEV-440866 - [hip-roclr] Adds support to batch memory operations APIs" This reverts commit ab0ff9163d9a06f0a39e35911ff6c0b431bd3850. Reason for revert: hipInfo fails on windows. Updating llvm amd-mainline-closed Change-Id: I57e1fa1945188b0bc0a799c4f3d540f2b7713003 [ROCm/clr commit: 2ca644cf2255f12ff187bac7911d671e9523e20b] --- .../include/hip/amd_detail/hip_api_trace.hpp | 7 +-- .../include/hip/amd_detail/hip_prof_str.h | 37 +--------------- projects/clr/hipamd/src/CMakeLists.txt | 2 +- projects/clr/hipamd/src/amdhip.def | 1 - projects/clr/hipamd/src/hip_api_trace.cpp | 10 +---- projects/clr/hipamd/src/hip_hcc.map.in | 1 - projects/clr/hipamd/src/hip_stream_ops.cpp | 41 +---------------- .../clr/hipamd/src/hip_table_interface.cpp | 4 -- projects/clr/rocclr/device/blit.hpp | 6 --- projects/clr/rocclr/device/blitcl.cpp | 7 +-- projects/clr/rocclr/device/device.hpp | 4 -- projects/clr/rocclr/device/pal/palblit.hpp | 7 --- projects/clr/rocclr/device/rocm/rocblit.cpp | 32 -------------- projects/clr/rocclr/device/rocm/rocblit.hpp | 10 ++--- .../clr/rocclr/device/rocm/rocvirtual.cpp | 18 ++------ .../clr/rocclr/device/rocm/rocvirtual.hpp | 1 - projects/clr/rocclr/platform/command.hpp | 44 +------------------ .../clr/rocclr/platform/command_utils.hpp | 1 - 18 files changed, 16 insertions(+), 217 deletions(-) diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp index acfbe1a2e0..7da13a9805 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp @@ -61,7 +61,7 @@ // - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases #define HIP_API_TABLE_STEP_VERSION 0 #define HIP_COMPILER_API_TABLE_STEP_VERSION 0 -#define HIP_RUNTIME_API_TABLE_STEP_VERSION 7 +#define HIP_RUNTIME_API_TABLE_STEP_VERSION 6 // HIP API interface typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem, @@ -722,8 +722,6 @@ typedef hipError_t (*t_hipStreamWriteValue32)(hipStream_t stream, void* ptr, uin unsigned int flags); typedef hipError_t (*t_hipStreamWriteValue64)(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags); -typedef hipError_t (*t_hipStreamBatchMemOp)(hipStream_t stream, unsigned int count, - hipStreamBatchMemOpParams* paramArray, unsigned int flags); typedef hipError_t (*t_hipTexObjectCreate)(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, const HIP_TEXTURE_DESC* pTexDesc, @@ -1521,9 +1519,6 @@ struct HipDispatchTable { // HIP_RUNTIME_API_TABLE_STEP_VERSION == 6 t_hipDeviceGetTexture1DLinearMaxWidth hipDeviceGetTexture1DLinearMaxWidth_fn; - // HIP_RUNTIME_API_TABLE_STEP_VERSION == 7 - t_hipStreamBatchMemOp hipStreamBatchMemOp_fn; - // DO NOT EDIT ABOVE! // HIP_RUNTIME_API_TABLE_STEP_VERSION == 7 diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h index 9cb1e275e6..e11b1abcf3 100644 --- a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -425,8 +425,7 @@ enum hip_api_id_t { HIP_API_ID_hipMemcpyHtoAAsync = 405, HIP_API_ID_hipSetValidDevices = 406, HIP_API_ID_hipExtHostAlloc = 407, - HIP_API_ID_hipStreamBatchMemOp = 408, - HIP_API_ID_LAST = 408, + HIP_API_ID_LAST = 407, HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), @@ -546,6 +545,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipEventQuery: return "hipEventQuery"; case HIP_API_ID_hipEventRecord: return "hipEventRecord"; case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize"; + case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount"; case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel"; case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice"; @@ -859,8 +859,6 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease"; case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; - case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError"; - case HIP_API_ID_hipStreamBatchMemOp: return "hipStreamBatchMemOp"; }; return "unknown"; }; @@ -1264,7 +1262,6 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease; if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; - if (strcmp("hipStreamBatchMemOp", name) == 0) return HIP_API_ID_hipStreamBatchMemOp; return HIP_API_ID_NONE; } @@ -3626,13 +3623,6 @@ typedef struct hip_api_data_s { unsigned int numExtSems; hipStream_t stream; } hipWaitExternalSemaphoresAsync; - struct { - hipStream_t stream; - unsigned int count; - hipStreamBatchMemOpParams* paramArray; - hipStreamBatchMemOpParams paramArray__val; - unsigned int flags; - } hipStreamBatchMemOp; } args; uint64_t *phase_data; } hip_api_data_t; @@ -5902,15 +5892,6 @@ typedef struct hip_api_data_s { cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \ cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \ }; - -// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'), -// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')] -#define INIT_hipStreamBatchMemOp_CB_ARGS_DATA(cb_data) { \ - cb_data.args.hipStreamBatchMemOp.stream = (hipStream_t)stream; \ - cb_data.args.hipStreamBatchMemOp.count = (unsigned int)count; \ - cb_data.args.hipStreamBatchMemOp.paramArray= (hipStreamBatchMemOpParams*)paramArray; \ - cb_data.args.hipStreamBatchMemOp.flags = (unsigned int)flags; \ -}; // hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] #define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \ @@ -7546,11 +7527,6 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')] case HIP_API_ID_hipStreamWriteValue64: break; -// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'), -// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')] - case HIP_API_ID_hipStreamBatchMemOp: - if (data->args.hipStreamBatchMemOp.paramArray) data->args.hipStreamBatchMemOp.paramArray__val = *(data->args.hipStreamBatchMemOp.paramArray); - break; // hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')] case HIP_API_ID_hipTexRefGetAddress: if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr); @@ -10638,15 +10614,6 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags); oss << ")"; break; - case HIP_API_ID_hipStreamBatchMemOp: - oss << "hipStreamBatchMemOp("; - oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.stream); - oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.count); - if (data->args.hipStreamBatchMemOp.paramArray == NULL) oss << ", paramArray=NULL"; - else { oss << ", paramArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.paramArray__val); } - oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.flags); - oss << ")"; - break; case HIP_API_ID_hipTexRefGetAddress: oss << "hipTexRefGetAddress("; if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL"; diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt index a276588825..3480dbbbfa 100644 --- a/projects/clr/hipamd/src/CMakeLists.txt +++ b/projects/clr/hipamd/src/CMakeLists.txt @@ -299,7 +299,7 @@ if(WIN32 OR NOT BUILD_SHARED_LIBS) # rocprofiler-register is not support on Windows set(HIP_ENABLE_ROCPROFILER_REGISTER OFF) else() - option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON) + option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON) endif() if(HIP_ENABLE_ROCPROFILER_REGISTER) diff --git a/projects/clr/hipamd/src/amdhip.def b/projects/clr/hipamd/src/amdhip.def index 355f180d7b..a119c5e3c4 100644 --- a/projects/clr/hipamd/src/amdhip.def +++ b/projects/clr/hipamd/src/amdhip.def @@ -480,4 +480,3 @@ hipGraphExecNodeSetParams hipDrvGraphMemcpyNodeSetParams hipDrvGraphMemcpyNodeGetParams hipExtHostAlloc -hipStreamBatchMemOp diff --git a/projects/clr/hipamd/src/hip_api_trace.cpp b/projects/clr/hipamd/src/hip_api_trace.cpp index fe88b3079a..c73b9cce60 100644 --- a/projects/clr/hipamd/src/hip_api_trace.cpp +++ b/projects/clr/hipamd/src/hip_api_trace.cpp @@ -602,8 +602,6 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, u uint64_t mask); hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags); hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags); -hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, hipStreamBatchMemOpParams* paramArray, - unsigned int flags); hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, const HIP_TEXTURE_DESC* pTexDesc, const HIP_RESOURCE_VIEW_DESC* pResViewDesc); @@ -1199,7 +1197,6 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) { ptrDispatchTable->hipStreamWaitValue64_fn = hip::hipStreamWaitValue64; ptrDispatchTable->hipStreamWriteValue32_fn = hip::hipStreamWriteValue32; ptrDispatchTable->hipStreamWriteValue64_fn = hip::hipStreamWriteValue64; - ptrDispatchTable->hipStreamBatchMemOp_fn = hip::hipStreamBatchMemOp; ptrDispatchTable->hipTexObjectCreate_fn = hip::hipTexObjectCreate; ptrDispatchTable->hipTexObjectDestroy_fn = hip::hipTexObjectDestroy; ptrDispatchTable->hipTexObjectGetResourceDesc_fn = hip::hipTexObjectGetResourceDesc; @@ -1890,9 +1887,6 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460) HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461) // HIP_RUNTIME_API_TABLE_STEP_VERSION == 6 HIP_ENFORCE_ABI(HipDispatchTable, hipDeviceGetTexture1DLinearMaxWidth_fn, 462) -// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7 -HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463); - // if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below // will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.: @@ -1900,9 +1894,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463); // HIP_ENFORCE_ABI(, , 8) // // HIP_ENFORCE_ABI_VERSIONING(
, 9) <- 8 + 1 = 9 -HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 464) +HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 463) -static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 7, +static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 6, "If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function " "pointers and then update this check so it is true"); #endif diff --git a/projects/clr/hipamd/src/hip_hcc.map.in b/projects/clr/hipamd/src/hip_hcc.map.in index 1b7c323563..ea09a35d53 100644 --- a/projects/clr/hipamd/src/hip_hcc.map.in +++ b/projects/clr/hipamd/src/hip_hcc.map.in @@ -583,7 +583,6 @@ local: hip_6.3 { global: hipExtHostAlloc; - hipStreamBatchMemOp; local: *; } hip_6.2; diff --git a/projects/clr/hipamd/src/hip_stream_ops.cpp b/projects/clr/hipamd/src/hip_stream_ops.cpp index 9662a08dd5..481e839ea9 100644 --- a/projects/clr/hipamd/src/hip_stream_ops.cpp +++ b/projects/clr/hipamd/src/hip_stream_ops.cpp @@ -23,36 +23,8 @@ #include "platform/command_utils.hpp" namespace hip { -hipError_t ihipBatchMemOperation(hipStream_t stream, cl_command_type cmdType, unsigned int count, - hipStreamBatchMemOpParams* paramArray, unsigned int flags) { - if (paramArray == nullptr || flags != 0 || count > 256) { - return hipErrorInvalidValue; - } - - if (!hip::isValid(stream)) { - return hipErrorContextIsDestroyed; - } - - hip::Stream* hip_stream = hip::getStream(stream); - amd::Command::EventWaitList waitList; - - amd::BatchMemoryOperationCommand* command = - new amd::BatchMemoryOperationCommand(*hip_stream, cmdType, count, - flags, waitList, paramArray, - sizeof(hipStreamBatchMemOpParams)); - - if (command == nullptr) { - return hipErrorOutOfMemory; - } - command->enqueue(); - command->release(); - HIP_RETURN(hipSuccess); -} - - hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr, - uint64_t value, uint64_t mask, unsigned int flags, - size_t sizeBytes) { + uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes) { size_t offset = 0; unsigned int outFlags = 0; @@ -164,15 +136,4 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, 0, // flags un-used for now set it to 0 sizeof(uint64_t))); } - -hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, - hipStreamBatchMemOpParams* paramArray, unsigned int flags) { - HIP_INIT_API(hipStreamBatchMemOp, count, paramArray, flags); - HIP_RETURN_DURATION(ihipBatchMemOperation( - stream, - ROCCLR_COMMAND_BATCH_STREAM, - count, - paramArray, - flags)); - } } // namespace hip diff --git a/projects/clr/hipamd/src/hip_table_interface.cpp b/projects/clr/hipamd/src/hip_table_interface.cpp index 237f0d8805..7d2ebf6fa2 100644 --- a/projects/clr/hipamd/src/hip_table_interface.cpp +++ b/projects/clr/hipamd/src/hip_table_interface.cpp @@ -1413,10 +1413,6 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags) { return hip::GetHipDispatchTable()->hipStreamWriteValue64_fn(stream, ptr, value, flags); } -hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, - hipStreamBatchMemOpParams* paramArray, unsigned int flags) { - return hip::GetHipDispatchTable()->hipStreamBatchMemOp_fn(stream, count, paramArray, flags); -} hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc, const HIP_TEXTURE_DESC* pTexDesc, const HIP_RESOURCE_VIEW_DESC* pResViewDesc) { diff --git a/projects/clr/rocclr/device/blit.hpp b/projects/clr/rocclr/device/blit.hpp index ec98886b38..4cd8c55be4 100644 --- a/projects/clr/rocclr/device/blit.hpp +++ b/projects/clr/rocclr/device/blit.hpp @@ -233,12 +233,6 @@ class BlitManager : public amd::HeapObject { uint64_t mask ) const = 0; - //! Stream batch memory operation - virtual bool batchMemOps(const void* paramArray, - size_t paramSize, - uint32_t count - ) const = 0; - //! Enables synchronization on blit operations void enableSynchronization() { syncOperation_ = true; } diff --git a/projects/clr/rocclr/device/blitcl.cpp b/projects/clr/rocclr/device/blitcl.cpp index 9abfc1e795..897ebf82c8 100644 --- a/projects/clr/rocclr/device/blitcl.cpp +++ b/projects/clr/rocclr/device/blitcl.cpp @@ -43,8 +43,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong); - extern void __amd_batchMemOp(__global void*, uint count); - extern void __ockl_dm_init_v1(ulong, ulong, uint, uint); extern void __ockl_gws_init(uint nwm1, uint rid); @@ -164,10 +162,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS( ulong4 srcRect, ulong4 dstRect, ulong4 size) { __amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size); } - - __kernel void __amd_rocclr_batchMemOp(__global void* params, uint count) { - __amd_batchMemOp(params, count); - } ); const char* HipExtraSourceCode = BLIT_KERNELS( @@ -260,6 +254,7 @@ const char* BlitImageSourceCode = BLIT_KERNELS( __amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, srcOrigin, dstOrigin, size, format, pitch); } + ); } // namespace amd::device diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 28852ca0ca..f5aa124cce 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -92,7 +92,6 @@ class SvmMapMemoryCommand; class SvmUnmapMemoryCommand; class SvmPrefetchAsyncCommand; class StreamOperationCommand; -class BatchMemoryOperationCommand; class VirtualMapCommand; class ExternalSemaphoreCmd; class Isa; @@ -1309,9 +1308,6 @@ class VirtualDevice : public amd::HeapObject { ShouldNotReachHere(); } virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); } - virtual void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) { - ShouldNotReachHere(); - } virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); } virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; } diff --git a/projects/clr/rocclr/device/pal/palblit.hpp b/projects/clr/rocclr/device/pal/palblit.hpp index 7df8731328..9099a9a47c 100644 --- a/projects/clr/rocclr/device/pal/palblit.hpp +++ b/projects/clr/rocclr/device/pal/palblit.hpp @@ -475,13 +475,6 @@ class KernelBlitManager : public DmaBlitManager { uint number_of_initial_blocks ) const; - //! Batch memory ops- Submits batch of streamWaits and streamWrite operations. - virtual bool batchMemOps(const void* paramArray, - size_t paramSize, - uint32_t count) const { - assert(!"Unimplemented"); - return false; - } private: static constexpr size_t MaxXferBuffers = 2; static constexpr uint TransferSplitSize = 3; diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp index 4936cbb3c6..737529aa0e 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.cpp +++ b/projects/clr/rocclr/device/rocm/rocblit.cpp @@ -2583,38 +2583,6 @@ bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, si return result; } -// ================================================================================================ -bool KernelBlitManager::batchMemOps(const void* paramArray, size_t paramSize, - uint32_t count) const { - amd::ScopedLock k(lockXferOps_); - bool result = false; - uint blitType = BatchMemOp; - size_t dim = 1; - - size_t globalWorkOffset[1] = { 0 }; - size_t globalWorkSize[1] = { count }; - size_t localWorkSize[1] = { 1 }; - - // Get constant buffer and copy the array of parameters - constexpr bool kDirectVa = true; - auto constBuf = gpu().allocKernArg((count * paramSize), kCBAlignment); - memcpy(constBuf, paramArray, (count * paramSize)); - - setArgument(kernels_[blitType], 0, sizeof(cl_mem), constBuf, 0, nullptr, kDirectVa); - setArgument(kernels_[blitType], 1, sizeof(cl_mem), &count); - - // Create ND range object for the kernel's execution - amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize); - - // Execute the blit - address parameters = captureArguments(kernels_[blitType]); - result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr); - releaseArguments(parameters); - synchronize(); - - return result; -} - // ================================================================================================ bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Memory* initial_blocks, uint heap_size, uint number_of_initial_blocks) const { diff --git a/projects/clr/rocclr/device/rocm/rocblit.hpp b/projects/clr/rocclr/device/rocm/rocblit.hpp index 8b7e095026..537fb4f628 100644 --- a/projects/clr/rocclr/device/rocm/rocblit.hpp +++ b/projects/clr/rocclr/device/rocm/rocblit.hpp @@ -292,7 +292,6 @@ class KernelBlitManager : public DmaBlitManager { BlitCopyBufferRectAligned, StreamOpsWrite, StreamOpsWait, - BatchMemOp, Scheduler, GwsInit, InitHeap, @@ -520,9 +519,6 @@ class KernelBlitManager : public DmaBlitManager { uint64_t mask ) const; - //! Batch memory ops- Submits batch of streamWaits and streamWrite operations. - virtual bool batchMemOps(const void* paramArray, size_t paramSize, uint32_t count) const; - virtual amd::Monitor* lockXfer() const { return &lockXferOps_; } virtual bool initHeap(device::Memory* heap_to_initialize, @@ -603,9 +599,9 @@ static const char* BlitName[KernelBlitManager::BlitTotal] = { "__amd_rocclr_fillBufferAligned", "__amd_rocclr_fillBufferAligned2D", "__amd_rocclr_copyBuffer", "__amd_rocclr_copyBufferAligned", "__amd_rocclr_copyBufferRect", "__amd_rocclr_copyBufferRectAligned", "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait", - "__amd_rocclr_batchMemOp", "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", - "__amd_rocclr_initHeap", "__amd_rocclr_fillImage", "__amd_rocclr_copyImage", - "__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage" + "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", "__amd_rocclr_initHeap", + "__amd_rocclr_fillImage", "__amd_rocclr_copyImage", "__amd_rocclr_copyImage1DA", + "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage" }; inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index, diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 3cf82009cf..7f13ae5797 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -2715,7 +2715,8 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { else { // mask is applied on value before performing // the comparision defined by 'condition' - bool result = blitMgr().streamOpsWait(*memory, value, offset, sizeBytes, flags, mask); + bool result = static_cast(blitMgr()).streamOpsWait(*memory, value, offset, + sizeBytes, flags, mask); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Waiting for value: 0x%lx." " Flags: 0x%lx mask: 0x%lx", value, flags, mask); if (!result) { @@ -2728,7 +2729,8 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { // Ensure memory ordering preceding the write dispatchBarrierPacket(kBarrierPacketReleaseHeader); - bool result = blitMgr().streamOpsWrite(*memory, value, offset, sizeBytes); + bool result = static_cast(blitMgr()).streamOpsWrite(*memory, value, + offset, sizeBytes); ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value); if (!result) { LogError("submitStreamOperation: Write failed!"); @@ -2739,18 +2741,6 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) { profilingEnd(cmd); } -// ================================================================================================ -void VirtualGPU::submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) { - // Make sure VirtualGPU has an exclusive access to the resources - amd::ScopedLock lock(execution()); - profilingBegin(cmd); - - bool result = blitMgr().batchMemOps(cmd.getParamPtr(), cmd.paramSize(), cmd.count()); - if (!result) { - LogError("submitBatchMemoryOperation failed!"); - } - profilingEnd(cmd); -} // ================================================================================================ void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) { // Make sure VirtualGPU has an exclusive access to the resources diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp index c30d334c07..d404ee57ba 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp @@ -353,7 +353,6 @@ class VirtualGPU : public device::VirtualDevice { void flush(amd::Command* list = nullptr, bool wait = false); void submitFillMemory(amd::FillMemoryCommand& cmd); void submitStreamOperation(amd::StreamOperationCommand& cmd); - void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd); void submitVirtualMap(amd::VirtualMapCommand& cmd); void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd); diff --git a/projects/clr/rocclr/platform/command.hpp b/projects/clr/rocclr/platform/command.hpp index 757bb4a9eb..e825bfff98 100644 --- a/projects/clr/rocclr/platform/command.hpp +++ b/projects/clr/rocclr/platform/command.hpp @@ -845,7 +845,7 @@ class FillMemoryCommand : public OneMemoryArgCommand { class StreamOperationCommand : public OneMemoryArgCommand { private: - uint64_t value_; // !< Value to Wait on or to Write. + uint64_t value_; // !< Value to Wait on or to Write. uint64_t mask_; // !< Mask to be applied on signal value for Wait operation. unsigned int flags_; // !< Flags defining the Wait condition. size_t offset_; // !< Offset into memory for Write @@ -888,47 +888,6 @@ class StreamOperationCommand : public OneMemoryArgCommand { const size_t sizeBytes() const { return sizeBytes_; } }; -/*! \brief A batch memory operation command. - * - * \details Batch operations to synchronize the stream via memory operations - * Operations are either 32-bit stream wait or write. - * Wait: All the commands issued after stream wait are not executed - * until the wait condition is true. - * Write: Writes a 32 or 64 bit vaue to the memory using a GPU Blit. - * The operations are enqueued in the order they appear in the array. - */ - -class BatchMemoryOperationCommand : public Command { - public: - BatchMemoryOperationCommand(HostQueue& queue, cl_command_type cmdType, uint32_t count, - uint32_t flags, EventWaitList& eventWaitList, const void* paramArray, - size_t paramSize) - : Command(queue, cmdType, eventWaitList), - count_(count), - paramArray_(paramArray), - flags_(flags), - paramSize_(paramSize) { - // Sanity check - assert(((cmdType == ROCCLR_COMMAND_BATCH_STREAM)) && - "Invalid batch memory operation"); - } - - virtual void submit(device::VirtualDevice& device) { device.submitBatchMemoryOperation(*this); } - - //! Returns the value - const uint64_t count() const { return count_; } - //! Return the pointer to the paramList - const void* getParamPtr() {return paramArray_; } - //! Return the size of a single mem op param in bytes - const size_t paramSize() const { return paramSize_; } - - private: - uint32_t count_; // !< The number of operations in the array. - uint32_t flags_; // !< Reserved for future expansion. Must be 0. - const void* paramArray_; // !< Pointer to the array of individual operations - size_t paramSize_; // !< size in bytes of the param array passed -}; - /*! \brief A generic copy memory command * * \details Used for both buffers and images. Backends are expected @@ -1899,7 +1858,6 @@ union ComputeCommand { CopyMemoryP2PCommand cmd25; SvmPrefetchAsyncCommand cmd26; VirtualMapCommand cmd27; - BatchMemoryOperationCommand cmd28; ComputeCommand() {} ~ComputeCommand() {} }; diff --git a/projects/clr/rocclr/platform/command_utils.hpp b/projects/clr/rocclr/platform/command_utils.hpp index 2a706050cb..254485bc95 100644 --- a/projects/clr/rocclr/platform/command_utils.hpp +++ b/projects/clr/rocclr/platform/command_utils.hpp @@ -24,7 +24,6 @@ // Dummy command types for Stream Wait and Write commands. #define ROCCLR_COMMAND_STREAM_WAIT_VALUE 0x4501 #define ROCCLR_COMMAND_STREAM_WRITE_VALUE 0x4502 -#define ROCCLR_COMMAND_BATCH_STREAM 0x4503 // Stream Wait Value Conidtions #define ROCCLR_STREAM_WAIT_VALUE_GTE 0x0