diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp
index acfbe1a2e0..7da13a9805 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp
+++ b/projects/clr/hipamd/include/hip/amd_detail/hip_api_trace.hpp
@@ -61,7 +61,7 @@
// - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases
#define HIP_API_TABLE_STEP_VERSION 0
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
-#define HIP_RUNTIME_API_TABLE_STEP_VERSION 7
+#define HIP_RUNTIME_API_TABLE_STEP_VERSION 6
// HIP API interface
typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
@@ -722,8 +722,6 @@ typedef hipError_t (*t_hipStreamWriteValue32)(hipStream_t stream, void* ptr, uin
unsigned int flags);
typedef hipError_t (*t_hipStreamWriteValue64)(hipStream_t stream, void* ptr, uint64_t value,
unsigned int flags);
-typedef hipError_t (*t_hipStreamBatchMemOp)(hipStream_t stream, unsigned int count,
- hipStreamBatchMemOpParams* paramArray, unsigned int flags);
typedef hipError_t (*t_hipTexObjectCreate)(hipTextureObject_t* pTexObject,
const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
@@ -1521,9 +1519,6 @@ struct HipDispatchTable {
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
t_hipDeviceGetTexture1DLinearMaxWidth hipDeviceGetTexture1DLinearMaxWidth_fn;
- // HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
- t_hipStreamBatchMemOp hipStreamBatchMemOp_fn;
-
// DO NOT EDIT ABOVE!
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
diff --git a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h
index 9cb1e275e6..e11b1abcf3 100644
--- a/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h
+++ b/projects/clr/hipamd/include/hip/amd_detail/hip_prof_str.h
@@ -425,8 +425,7 @@ enum hip_api_id_t {
HIP_API_ID_hipMemcpyHtoAAsync = 405,
HIP_API_ID_hipSetValidDevices = 406,
HIP_API_ID_hipExtHostAlloc = 407,
- HIP_API_ID_hipStreamBatchMemOp = 408,
- HIP_API_ID_LAST = 408,
+ HIP_API_ID_LAST = 407,
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -546,6 +545,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipEventQuery: return "hipEventQuery";
case HIP_API_ID_hipEventRecord: return "hipEventRecord";
case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
+ case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
@@ -859,8 +859,6 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease";
case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain";
case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync";
- case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
- case HIP_API_ID_hipStreamBatchMemOp: return "hipStreamBatchMemOp";
};
return "unknown";
};
@@ -1264,7 +1262,6 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease;
if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain;
if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync;
- if (strcmp("hipStreamBatchMemOp", name) == 0) return HIP_API_ID_hipStreamBatchMemOp;
return HIP_API_ID_NONE;
}
@@ -3626,13 +3623,6 @@ typedef struct hip_api_data_s {
unsigned int numExtSems;
hipStream_t stream;
} hipWaitExternalSemaphoresAsync;
- struct {
- hipStream_t stream;
- unsigned int count;
- hipStreamBatchMemOpParams* paramArray;
- hipStreamBatchMemOpParams paramArray__val;
- unsigned int flags;
- } hipStreamBatchMemOp;
} args;
uint64_t *phase_data;
} hip_api_data_t;
@@ -5902,15 +5892,6 @@ typedef struct hip_api_data_s {
cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \
cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \
};
-
-// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
-// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
-#define INIT_hipStreamBatchMemOp_CB_ARGS_DATA(cb_data) { \
- cb_data.args.hipStreamBatchMemOp.stream = (hipStream_t)stream; \
- cb_data.args.hipStreamBatchMemOp.count = (unsigned int)count; \
- cb_data.args.hipStreamBatchMemOp.paramArray= (hipStreamBatchMemOpParams*)paramArray; \
- cb_data.args.hipStreamBatchMemOp.flags = (unsigned int)flags; \
-};
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \
@@ -7546,11 +7527,6 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')]
case HIP_API_ID_hipStreamWriteValue64:
break;
-// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
-// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
- case HIP_API_ID_hipStreamBatchMemOp:
- if (data->args.hipStreamBatchMemOp.paramArray) data->args.hipStreamBatchMemOp.paramArray__val = *(data->args.hipStreamBatchMemOp.paramArray);
- break;
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
case HIP_API_ID_hipTexRefGetAddress:
if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr);
@@ -10638,15 +10614,6 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags);
oss << ")";
break;
- case HIP_API_ID_hipStreamBatchMemOp:
- oss << "hipStreamBatchMemOp(";
- oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.stream);
- oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.count);
- if (data->args.hipStreamBatchMemOp.paramArray == NULL) oss << ", paramArray=NULL";
- else { oss << ", paramArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.paramArray__val); }
- oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.flags);
- oss << ")";
- break;
case HIP_API_ID_hipTexRefGetAddress:
oss << "hipTexRefGetAddress(";
if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL";
diff --git a/projects/clr/hipamd/src/CMakeLists.txt b/projects/clr/hipamd/src/CMakeLists.txt
index a276588825..3480dbbbfa 100644
--- a/projects/clr/hipamd/src/CMakeLists.txt
+++ b/projects/clr/hipamd/src/CMakeLists.txt
@@ -299,7 +299,7 @@ if(WIN32 OR NOT BUILD_SHARED_LIBS)
# rocprofiler-register is not support on Windows
set(HIP_ENABLE_ROCPROFILER_REGISTER OFF)
else()
- option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
+ option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
endif()
if(HIP_ENABLE_ROCPROFILER_REGISTER)
diff --git a/projects/clr/hipamd/src/amdhip.def b/projects/clr/hipamd/src/amdhip.def
index 355f180d7b..a119c5e3c4 100644
--- a/projects/clr/hipamd/src/amdhip.def
+++ b/projects/clr/hipamd/src/amdhip.def
@@ -480,4 +480,3 @@ hipGraphExecNodeSetParams
hipDrvGraphMemcpyNodeSetParams
hipDrvGraphMemcpyNodeGetParams
hipExtHostAlloc
-hipStreamBatchMemOp
diff --git a/projects/clr/hipamd/src/hip_api_trace.cpp b/projects/clr/hipamd/src/hip_api_trace.cpp
index fe88b3079a..c73b9cce60 100644
--- a/projects/clr/hipamd/src/hip_api_trace.cpp
+++ b/projects/clr/hipamd/src/hip_api_trace.cpp
@@ -602,8 +602,6 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, u
uint64_t mask);
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags);
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags);
-hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, hipStreamBatchMemOpParams* paramArray,
- unsigned int flags);
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
@@ -1199,7 +1197,6 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipStreamWaitValue64_fn = hip::hipStreamWaitValue64;
ptrDispatchTable->hipStreamWriteValue32_fn = hip::hipStreamWriteValue32;
ptrDispatchTable->hipStreamWriteValue64_fn = hip::hipStreamWriteValue64;
- ptrDispatchTable->hipStreamBatchMemOp_fn = hip::hipStreamBatchMemOp;
ptrDispatchTable->hipTexObjectCreate_fn = hip::hipTexObjectCreate;
ptrDispatchTable->hipTexObjectDestroy_fn = hip::hipTexObjectDestroy;
ptrDispatchTable->hipTexObjectGetResourceDesc_fn = hip::hipTexObjectGetResourceDesc;
@@ -1890,9 +1887,6 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460)
HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461)
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
HIP_ENFORCE_ABI(HipDispatchTable, hipDeviceGetTexture1DLinearMaxWidth_fn, 462)
-// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
-HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463);
-
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
@@ -1900,9 +1894,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463);
// HIP_ENFORCE_ABI(
, , 8)
//
// HIP_ENFORCE_ABI_VERSIONING(, 9) <- 8 + 1 = 9
-HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 464)
+HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 463)
-static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 7,
+static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 6,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
"pointers and then update this check so it is true");
#endif
diff --git a/projects/clr/hipamd/src/hip_hcc.map.in b/projects/clr/hipamd/src/hip_hcc.map.in
index 1b7c323563..ea09a35d53 100644
--- a/projects/clr/hipamd/src/hip_hcc.map.in
+++ b/projects/clr/hipamd/src/hip_hcc.map.in
@@ -583,7 +583,6 @@ local:
hip_6.3 {
global:
hipExtHostAlloc;
- hipStreamBatchMemOp;
local:
*;
} hip_6.2;
diff --git a/projects/clr/hipamd/src/hip_stream_ops.cpp b/projects/clr/hipamd/src/hip_stream_ops.cpp
index 9662a08dd5..481e839ea9 100644
--- a/projects/clr/hipamd/src/hip_stream_ops.cpp
+++ b/projects/clr/hipamd/src/hip_stream_ops.cpp
@@ -23,36 +23,8 @@
#include "platform/command_utils.hpp"
namespace hip {
-hipError_t ihipBatchMemOperation(hipStream_t stream, cl_command_type cmdType, unsigned int count,
- hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
- if (paramArray == nullptr || flags != 0 || count > 256) {
- return hipErrorInvalidValue;
- }
-
- if (!hip::isValid(stream)) {
- return hipErrorContextIsDestroyed;
- }
-
- hip::Stream* hip_stream = hip::getStream(stream);
- amd::Command::EventWaitList waitList;
-
- amd::BatchMemoryOperationCommand* command =
- new amd::BatchMemoryOperationCommand(*hip_stream, cmdType, count,
- flags, waitList, paramArray,
- sizeof(hipStreamBatchMemOpParams));
-
- if (command == nullptr) {
- return hipErrorOutOfMemory;
- }
- command->enqueue();
- command->release();
- HIP_RETURN(hipSuccess);
-}
-
-
hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr,
- uint64_t value, uint64_t mask, unsigned int flags,
- size_t sizeBytes) {
+ uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes) {
size_t offset = 0;
unsigned int outFlags = 0;
@@ -164,15 +136,4 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
0, // flags un-used for now set it to 0
sizeof(uint64_t)));
}
-
-hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
- hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
- HIP_INIT_API(hipStreamBatchMemOp, count, paramArray, flags);
- HIP_RETURN_DURATION(ihipBatchMemOperation(
- stream,
- ROCCLR_COMMAND_BATCH_STREAM,
- count,
- paramArray,
- flags));
- }
} // namespace hip
diff --git a/projects/clr/hipamd/src/hip_table_interface.cpp b/projects/clr/hipamd/src/hip_table_interface.cpp
index 237f0d8805..7d2ebf6fa2 100644
--- a/projects/clr/hipamd/src/hip_table_interface.cpp
+++ b/projects/clr/hipamd/src/hip_table_interface.cpp
@@ -1413,10 +1413,6 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
unsigned int flags) {
return hip::GetHipDispatchTable()->hipStreamWriteValue64_fn(stream, ptr, value, flags);
}
-hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
- hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
- return hip::GetHipDispatchTable()->hipStreamBatchMemOp_fn(stream, count, paramArray, flags);
-}
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
const HIP_RESOURCE_VIEW_DESC* pResViewDesc) {
diff --git a/projects/clr/rocclr/device/blit.hpp b/projects/clr/rocclr/device/blit.hpp
index ec98886b38..4cd8c55be4 100644
--- a/projects/clr/rocclr/device/blit.hpp
+++ b/projects/clr/rocclr/device/blit.hpp
@@ -233,12 +233,6 @@ class BlitManager : public amd::HeapObject {
uint64_t mask
) const = 0;
- //! Stream batch memory operation
- virtual bool batchMemOps(const void* paramArray,
- size_t paramSize,
- uint32_t count
- ) const = 0;
-
//! Enables synchronization on blit operations
void enableSynchronization() { syncOperation_ = true; }
diff --git a/projects/clr/rocclr/device/blitcl.cpp b/projects/clr/rocclr/device/blitcl.cpp
index 9abfc1e795..897ebf82c8 100644
--- a/projects/clr/rocclr/device/blitcl.cpp
+++ b/projects/clr/rocclr/device/blitcl.cpp
@@ -43,8 +43,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
- extern void __amd_batchMemOp(__global void*, uint count);
-
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
extern void __ockl_gws_init(uint nwm1, uint rid);
@@ -164,10 +162,6 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
ulong4 srcRect, ulong4 dstRect, ulong4 size) {
__amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size);
}
-
- __kernel void __amd_rocclr_batchMemOp(__global void* params, uint count) {
- __amd_batchMemOp(params, count);
- }
);
const char* HipExtraSourceCode = BLIT_KERNELS(
@@ -260,6 +254,7 @@ const char* BlitImageSourceCode = BLIT_KERNELS(
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, srcOrigin, dstOrigin, size, format,
pitch);
}
+
);
} // namespace amd::device
diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp
index 28852ca0ca..f5aa124cce 100644
--- a/projects/clr/rocclr/device/device.hpp
+++ b/projects/clr/rocclr/device/device.hpp
@@ -92,7 +92,6 @@ class SvmMapMemoryCommand;
class SvmUnmapMemoryCommand;
class SvmPrefetchAsyncCommand;
class StreamOperationCommand;
-class BatchMemoryOperationCommand;
class VirtualMapCommand;
class ExternalSemaphoreCmd;
class Isa;
@@ -1309,9 +1308,6 @@ class VirtualDevice : public amd::HeapObject {
ShouldNotReachHere();
}
virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); }
- virtual void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) {
- ShouldNotReachHere();
- }
virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); }
virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; }
diff --git a/projects/clr/rocclr/device/pal/palblit.hpp b/projects/clr/rocclr/device/pal/palblit.hpp
index 7df8731328..9099a9a47c 100644
--- a/projects/clr/rocclr/device/pal/palblit.hpp
+++ b/projects/clr/rocclr/device/pal/palblit.hpp
@@ -475,13 +475,6 @@ class KernelBlitManager : public DmaBlitManager {
uint number_of_initial_blocks
) const;
- //! Batch memory ops- Submits batch of streamWaits and streamWrite operations.
- virtual bool batchMemOps(const void* paramArray,
- size_t paramSize,
- uint32_t count) const {
- assert(!"Unimplemented");
- return false;
- }
private:
static constexpr size_t MaxXferBuffers = 2;
static constexpr uint TransferSplitSize = 3;
diff --git a/projects/clr/rocclr/device/rocm/rocblit.cpp b/projects/clr/rocclr/device/rocm/rocblit.cpp
index 4936cbb3c6..737529aa0e 100644
--- a/projects/clr/rocclr/device/rocm/rocblit.cpp
+++ b/projects/clr/rocclr/device/rocm/rocblit.cpp
@@ -2583,38 +2583,6 @@ bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, si
return result;
}
-// ================================================================================================
-bool KernelBlitManager::batchMemOps(const void* paramArray, size_t paramSize,
- uint32_t count) const {
- amd::ScopedLock k(lockXferOps_);
- bool result = false;
- uint blitType = BatchMemOp;
- size_t dim = 1;
-
- size_t globalWorkOffset[1] = { 0 };
- size_t globalWorkSize[1] = { count };
- size_t localWorkSize[1] = { 1 };
-
- // Get constant buffer and copy the array of parameters
- constexpr bool kDirectVa = true;
- auto constBuf = gpu().allocKernArg((count * paramSize), kCBAlignment);
- memcpy(constBuf, paramArray, (count * paramSize));
-
- setArgument(kernels_[blitType], 0, sizeof(cl_mem), constBuf, 0, nullptr, kDirectVa);
- setArgument(kernels_[blitType], 1, sizeof(cl_mem), &count);
-
- // Create ND range object for the kernel's execution
- amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize);
-
- // Execute the blit
- address parameters = captureArguments(kernels_[blitType]);
- result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr);
- releaseArguments(parameters);
- synchronize();
-
- return result;
-}
-
// ================================================================================================
bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Memory* initial_blocks,
uint heap_size, uint number_of_initial_blocks) const {
diff --git a/projects/clr/rocclr/device/rocm/rocblit.hpp b/projects/clr/rocclr/device/rocm/rocblit.hpp
index 8b7e095026..537fb4f628 100644
--- a/projects/clr/rocclr/device/rocm/rocblit.hpp
+++ b/projects/clr/rocclr/device/rocm/rocblit.hpp
@@ -292,7 +292,6 @@ class KernelBlitManager : public DmaBlitManager {
BlitCopyBufferRectAligned,
StreamOpsWrite,
StreamOpsWait,
- BatchMemOp,
Scheduler,
GwsInit,
InitHeap,
@@ -520,9 +519,6 @@ class KernelBlitManager : public DmaBlitManager {
uint64_t mask
) const;
- //! Batch memory ops- Submits batch of streamWaits and streamWrite operations.
- virtual bool batchMemOps(const void* paramArray, size_t paramSize, uint32_t count) const;
-
virtual amd::Monitor* lockXfer() const { return &lockXferOps_; }
virtual bool initHeap(device::Memory* heap_to_initialize,
@@ -603,9 +599,9 @@ static const char* BlitName[KernelBlitManager::BlitTotal] = {
"__amd_rocclr_fillBufferAligned", "__amd_rocclr_fillBufferAligned2D", "__amd_rocclr_copyBuffer",
"__amd_rocclr_copyBufferAligned", "__amd_rocclr_copyBufferRect",
"__amd_rocclr_copyBufferRectAligned", "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait",
- "__amd_rocclr_batchMemOp", "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit",
- "__amd_rocclr_initHeap", "__amd_rocclr_fillImage", "__amd_rocclr_copyImage",
- "__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage"
+ "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", "__amd_rocclr_initHeap",
+ "__amd_rocclr_fillImage", "__amd_rocclr_copyImage", "__amd_rocclr_copyImage1DA",
+ "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage"
};
inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index,
diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp
index 3cf82009cf..7f13ae5797 100644
--- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp
+++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp
@@ -2715,7 +2715,8 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
else {
// mask is applied on value before performing
// the comparision defined by 'condition'
- bool result = blitMgr().streamOpsWait(*memory, value, offset, sizeBytes, flags, mask);
+ bool result = static_cast(blitMgr()).streamOpsWait(*memory, value, offset,
+ sizeBytes, flags, mask);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Waiting for value: 0x%lx."
" Flags: 0x%lx mask: 0x%lx", value, flags, mask);
if (!result) {
@@ -2728,7 +2729,8 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
// Ensure memory ordering preceding the write
dispatchBarrierPacket(kBarrierPacketReleaseHeader);
- bool result = blitMgr().streamOpsWrite(*memory, value, offset, sizeBytes);
+ bool result = static_cast(blitMgr()).streamOpsWrite(*memory, value,
+ offset, sizeBytes);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value);
if (!result) {
LogError("submitStreamOperation: Write failed!");
@@ -2739,18 +2741,6 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
profilingEnd(cmd);
}
-// ================================================================================================
-void VirtualGPU::submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) {
- // Make sure VirtualGPU has an exclusive access to the resources
- amd::ScopedLock lock(execution());
- profilingBegin(cmd);
-
- bool result = blitMgr().batchMemOps(cmd.getParamPtr(), cmd.paramSize(), cmd.count());
- if (!result) {
- LogError("submitBatchMemoryOperation failed!");
- }
- profilingEnd(cmd);
-}
// ================================================================================================
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.hpp b/projects/clr/rocclr/device/rocm/rocvirtual.hpp
index c30d334c07..d404ee57ba 100644
--- a/projects/clr/rocclr/device/rocm/rocvirtual.hpp
+++ b/projects/clr/rocclr/device/rocm/rocvirtual.hpp
@@ -353,7 +353,6 @@ class VirtualGPU : public device::VirtualDevice {
void flush(amd::Command* list = nullptr, bool wait = false);
void submitFillMemory(amd::FillMemoryCommand& cmd);
void submitStreamOperation(amd::StreamOperationCommand& cmd);
- void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd);
void submitVirtualMap(amd::VirtualMapCommand& cmd);
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
diff --git a/projects/clr/rocclr/platform/command.hpp b/projects/clr/rocclr/platform/command.hpp
index 757bb4a9eb..e825bfff98 100644
--- a/projects/clr/rocclr/platform/command.hpp
+++ b/projects/clr/rocclr/platform/command.hpp
@@ -845,7 +845,7 @@ class FillMemoryCommand : public OneMemoryArgCommand {
class StreamOperationCommand : public OneMemoryArgCommand {
private:
- uint64_t value_; // !< Value to Wait on or to Write.
+ uint64_t value_; // !< Value to Wait on or to Write.
uint64_t mask_; // !< Mask to be applied on signal value for Wait operation.
unsigned int flags_; // !< Flags defining the Wait condition.
size_t offset_; // !< Offset into memory for Write
@@ -888,47 +888,6 @@ class StreamOperationCommand : public OneMemoryArgCommand {
const size_t sizeBytes() const { return sizeBytes_; }
};
-/*! \brief A batch memory operation command.
- *
- * \details Batch operations to synchronize the stream via memory operations
- * Operations are either 32-bit stream wait or write.
- * Wait: All the commands issued after stream wait are not executed
- * until the wait condition is true.
- * Write: Writes a 32 or 64 bit vaue to the memory using a GPU Blit.
- * The operations are enqueued in the order they appear in the array.
- */
-
-class BatchMemoryOperationCommand : public Command {
- public:
- BatchMemoryOperationCommand(HostQueue& queue, cl_command_type cmdType, uint32_t count,
- uint32_t flags, EventWaitList& eventWaitList, const void* paramArray,
- size_t paramSize)
- : Command(queue, cmdType, eventWaitList),
- count_(count),
- paramArray_(paramArray),
- flags_(flags),
- paramSize_(paramSize) {
- // Sanity check
- assert(((cmdType == ROCCLR_COMMAND_BATCH_STREAM)) &&
- "Invalid batch memory operation");
- }
-
- virtual void submit(device::VirtualDevice& device) { device.submitBatchMemoryOperation(*this); }
-
- //! Returns the value
- const uint64_t count() const { return count_; }
- //! Return the pointer to the paramList
- const void* getParamPtr() {return paramArray_; }
- //! Return the size of a single mem op param in bytes
- const size_t paramSize() const { return paramSize_; }
-
- private:
- uint32_t count_; // !< The number of operations in the array.
- uint32_t flags_; // !< Reserved for future expansion. Must be 0.
- const void* paramArray_; // !< Pointer to the array of individual operations
- size_t paramSize_; // !< size in bytes of the param array passed
-};
-
/*! \brief A generic copy memory command
*
* \details Used for both buffers and images. Backends are expected
@@ -1899,7 +1858,6 @@ union ComputeCommand {
CopyMemoryP2PCommand cmd25;
SvmPrefetchAsyncCommand cmd26;
VirtualMapCommand cmd27;
- BatchMemoryOperationCommand cmd28;
ComputeCommand() {}
~ComputeCommand() {}
};
diff --git a/projects/clr/rocclr/platform/command_utils.hpp b/projects/clr/rocclr/platform/command_utils.hpp
index 2a706050cb..254485bc95 100644
--- a/projects/clr/rocclr/platform/command_utils.hpp
+++ b/projects/clr/rocclr/platform/command_utils.hpp
@@ -24,7 +24,6 @@
// Dummy command types for Stream Wait and Write commands.
#define ROCCLR_COMMAND_STREAM_WAIT_VALUE 0x4501
#define ROCCLR_COMMAND_STREAM_WRITE_VALUE 0x4502
-#define ROCCLR_COMMAND_BATCH_STREAM 0x4503
// Stream Wait Value Conidtions
#define ROCCLR_STREAM_WAIT_VALUE_GTE 0x0