SWDEV-440866 - [hip-roclr] Adds support to batch memory operations APIs

Change-Id: I449ffca44bbb04d13348d112e896d603c70fd485
This commit is contained in:
Sourabh Betigeri
2024-03-26 10:22:35 -07:00
committed by Sourabh Betigeri
vanhempi c47f9dda58
commit bd5d8e9baf
18 muutettua tiedostoa jossa 217 lisäystä ja 16 poistoa
@@ -61,7 +61,7 @@
// - Reset any of the *_STEP_VERSION defines to zero if the corresponding *_MAJOR_VERSION increases
#define HIP_API_TABLE_STEP_VERSION 0
#define HIP_COMPILER_API_TABLE_STEP_VERSION 0
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 6
#define HIP_RUNTIME_API_TABLE_STEP_VERSION 7
// HIP API interface
typedef hipError_t (*t___hipPopCallConfiguration)(dim3* gridDim, dim3* blockDim, size_t* sharedMem,
@@ -722,6 +722,8 @@ typedef hipError_t (*t_hipStreamWriteValue32)(hipStream_t stream, void* ptr, uin
unsigned int flags);
typedef hipError_t (*t_hipStreamWriteValue64)(hipStream_t stream, void* ptr, uint64_t value,
unsigned int flags);
typedef hipError_t (*t_hipStreamBatchMemOp)(hipStream_t stream, unsigned int count,
hipStreamBatchMemOpParams* paramArray, unsigned int flags);
typedef hipError_t (*t_hipTexObjectCreate)(hipTextureObject_t* pTexObject,
const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
@@ -1519,6 +1521,9 @@ struct HipDispatchTable {
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
t_hipDeviceGetTexture1DLinearMaxWidth hipDeviceGetTexture1DLinearMaxWidth_fn;
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
t_hipStreamBatchMemOp hipStreamBatchMemOp_fn;
// DO NOT EDIT ABOVE!
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
@@ -425,7 +425,8 @@ enum hip_api_id_t {
HIP_API_ID_hipMemcpyHtoAAsync = 405,
HIP_API_ID_hipSetValidDevices = 406,
HIP_API_ID_hipExtHostAlloc = 407,
HIP_API_ID_LAST = 407,
HIP_API_ID_hipStreamBatchMemOp = 408,
HIP_API_ID_LAST = 408,
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -545,7 +546,6 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipEventQuery: return "hipEventQuery";
case HIP_API_ID_hipEventRecord: return "hipEventRecord";
case HIP_API_ID_hipEventSynchronize: return "hipEventSynchronize";
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
case HIP_API_ID_hipExtGetLinkTypeAndHopCount: return "hipExtGetLinkTypeAndHopCount";
case HIP_API_ID_hipExtLaunchKernel: return "hipExtLaunchKernel";
case HIP_API_ID_hipExtLaunchMultiKernelMultiDevice: return "hipExtLaunchMultiKernelMultiDevice";
@@ -859,6 +859,8 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease";
case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain";
case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync";
case HIP_API_ID_hipExtGetLastError: return "hipExtGetLastError";
case HIP_API_ID_hipStreamBatchMemOp: return "hipStreamBatchMemOp";
};
return "unknown";
};
@@ -1262,6 +1264,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease;
if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain;
if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync;
if (strcmp("hipStreamBatchMemOp", name) == 0) return HIP_API_ID_hipStreamBatchMemOp;
return HIP_API_ID_NONE;
}
@@ -3623,6 +3626,13 @@ typedef struct hip_api_data_s {
unsigned int numExtSems;
hipStream_t stream;
} hipWaitExternalSemaphoresAsync;
struct {
hipStream_t stream;
unsigned int count;
hipStreamBatchMemOpParams* paramArray;
hipStreamBatchMemOpParams paramArray__val;
unsigned int flags;
} hipStreamBatchMemOp;
} args;
uint64_t *phase_data;
} hip_api_data_t;
@@ -5892,6 +5902,15 @@ typedef struct hip_api_data_s {
cb_data.args.hipStreamWriteValue64.value = (uint64_t)value; \
cb_data.args.hipStreamWriteValue64.flags = (unsigned int)flags; \
};
// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
#define INIT_hipStreamBatchMemOp_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipStreamBatchMemOp.stream = (hipStream_t)stream; \
cb_data.args.hipStreamBatchMemOp.count = (unsigned int)count; \
cb_data.args.hipStreamBatchMemOp.paramArray= (hipStreamBatchMemOpParams*)paramArray; \
cb_data.args.hipStreamBatchMemOp.flags = (unsigned int)flags; \
};
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
#define INIT_hipTexRefGetAddress_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipTexRefGetAddress.dev_ptr = (hipDeviceptr_t*)dptr; \
@@ -7527,6 +7546,11 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
// hipStreamWriteValue64[('hipStream_t', 'stream'), ('void*', 'ptr'), ('uint64_t', 'value'), ('unsigned int', 'flags')]
case HIP_API_ID_hipStreamWriteValue64:
break;
// hipStreamBatchMemOp[('hipStream_t', 'stream'), ('unsigned int', 'count'),
// ('hipStreamBatchMemOpParams*', 'paramArray'), ('unsigned int', 'flags')]
case HIP_API_ID_hipStreamBatchMemOp:
if (data->args.hipStreamBatchMemOp.paramArray) data->args.hipStreamBatchMemOp.paramArray__val = *(data->args.hipStreamBatchMemOp.paramArray);
break;
// hipTexRefGetAddress[('hipDeviceptr_t*', 'dev_ptr'), ('const textureReference*', 'texRef')]
case HIP_API_ID_hipTexRefGetAddress:
if (data->args.hipTexRefGetAddress.dev_ptr) data->args.hipTexRefGetAddress.dev_ptr__val = *(data->args.hipTexRefGetAddress.dev_ptr);
@@ -10614,6 +10638,15 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamWriteValue64.flags);
oss << ")";
break;
case HIP_API_ID_hipStreamBatchMemOp:
oss << "hipStreamBatchMemOp(";
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.stream);
oss << ", count="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.count);
if (data->args.hipStreamBatchMemOp.paramArray == NULL) oss << ", paramArray=NULL";
else { oss << ", paramArray="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.paramArray__val); }
oss << ", flags="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamBatchMemOp.flags);
oss << ")";
break;
case HIP_API_ID_hipTexRefGetAddress:
oss << "hipTexRefGetAddress(";
if (data->args.hipTexRefGetAddress.dev_ptr == NULL) oss << "dev_ptr=NULL";
+1 -1
Näytä tiedosto
@@ -299,7 +299,7 @@ if(WIN32 OR NOT BUILD_SHARED_LIBS)
# rocprofiler-register is not support on Windows
set(HIP_ENABLE_ROCPROFILER_REGISTER OFF)
else()
option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
option(HIP_ENABLE_ROCPROFILER_REGISTER "Enable rocprofiler-register support" ON)
endif()
if(HIP_ENABLE_ROCPROFILER_REGISTER)
+1
Näytä tiedosto
@@ -480,3 +480,4 @@ hipGraphExecNodeSetParams
hipDrvGraphMemcpyNodeSetParams
hipDrvGraphMemcpyNodeGetParams
hipExtHostAlloc
hipStreamBatchMemOp
+8 -2
Näytä tiedosto
@@ -602,6 +602,8 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, uint64_t value, u
uint64_t mask);
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, uint32_t value, unsigned int flags);
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value, unsigned int flags);
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count, hipStreamBatchMemOpParams* paramArray,
unsigned int flags);
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
const HIP_RESOURCE_VIEW_DESC* pResViewDesc);
@@ -1197,6 +1199,7 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipStreamWaitValue64_fn = hip::hipStreamWaitValue64;
ptrDispatchTable->hipStreamWriteValue32_fn = hip::hipStreamWriteValue32;
ptrDispatchTable->hipStreamWriteValue64_fn = hip::hipStreamWriteValue64;
ptrDispatchTable->hipStreamBatchMemOp_fn = hip::hipStreamBatchMemOp;
ptrDispatchTable->hipTexObjectCreate_fn = hip::hipTexObjectCreate;
ptrDispatchTable->hipTexObjectDestroy_fn = hip::hipTexObjectDestroy;
ptrDispatchTable->hipTexObjectGetResourceDesc_fn = hip::hipTexObjectGetResourceDesc;
@@ -1887,6 +1890,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDrvGraphMemcpyNodeSetParams_fn, 460)
HIP_ENFORCE_ABI(HipDispatchTable, hipExtHostAlloc_fn, 461)
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 6
HIP_ENFORCE_ABI(HipDispatchTable, hipDeviceGetTexture1DLinearMaxWidth_fn, 462)
// HIP_RUNTIME_API_TABLE_STEP_VERSION == 7
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamBatchMemOp_fn, 463);
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
@@ -1894,9 +1900,9 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipDeviceGetTexture1DLinearMaxWidth_fn, 462)
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
//
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 463)
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 464)
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 6,
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 7,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
"pointers and then update this check so it is true");
#endif
+1
Näytä tiedosto
@@ -583,6 +583,7 @@ local:
hip_6.3 {
global:
hipExtHostAlloc;
hipStreamBatchMemOp;
local:
*;
} hip_6.2;
+40 -1
Näytä tiedosto
@@ -23,8 +23,36 @@
#include "platform/command_utils.hpp"
namespace hip {
hipError_t ihipBatchMemOperation(hipStream_t stream, cl_command_type cmdType, unsigned int count,
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
if (paramArray == nullptr || flags != 0 || count > 256) {
return hipErrorInvalidValue;
}
if (!hip::isValid(stream)) {
return hipErrorContextIsDestroyed;
}
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command::EventWaitList waitList;
amd::BatchMemoryOperationCommand* command =
new amd::BatchMemoryOperationCommand(*hip_stream, cmdType, count,
flags, waitList, paramArray,
sizeof(hipStreamBatchMemOpParams));
if (command == nullptr) {
return hipErrorOutOfMemory;
}
command->enqueue();
command->release();
HIP_RETURN(hipSuccess);
}
hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void* ptr,
uint64_t value, uint64_t mask, unsigned int flags, size_t sizeBytes) {
uint64_t value, uint64_t mask, unsigned int flags,
size_t sizeBytes) {
size_t offset = 0;
unsigned int outFlags = 0;
@@ -136,4 +164,15 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
0, // flags un-used for now set it to 0
sizeof(uint64_t)));
}
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
HIP_INIT_API(hipStreamBatchMemOp, count, paramArray, flags);
HIP_RETURN_DURATION(ihipBatchMemOperation(
stream,
ROCCLR_COMMAND_BATCH_STREAM,
count,
paramArray,
flags));
}
} // namespace hip
@@ -1413,6 +1413,10 @@ hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, uint64_t value,
unsigned int flags) {
return hip::GetHipDispatchTable()->hipStreamWriteValue64_fn(stream, ptr, value, flags);
}
hipError_t hipStreamBatchMemOp(hipStream_t stream, unsigned int count,
hipStreamBatchMemOpParams* paramArray, unsigned int flags) {
return hip::GetHipDispatchTable()->hipStreamBatchMemOp_fn(stream, count, paramArray, flags);
}
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject, const HIP_RESOURCE_DESC* pResDesc,
const HIP_TEXTURE_DESC* pTexDesc,
const HIP_RESOURCE_VIEW_DESC* pResViewDesc) {
+6
Näytä tiedosto
@@ -233,6 +233,12 @@ class BlitManager : public amd::HeapObject {
uint64_t mask
) const = 0;
//! Stream batch memory operation
virtual bool batchMemOps(const void* paramArray,
size_t paramSize,
uint32_t count
) const = 0;
//! Enables synchronization on blit operations
void enableSynchronization() { syncOperation_ = true; }
+6 -1
Näytä tiedosto
@@ -43,6 +43,8 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
extern void __amd_streamOpsWait(__global uint*, __global ulong*, ulong, ulong, ulong);
extern void __amd_batchMemOp(__global void*, uint count);
extern void __ockl_dm_init_v1(ulong, ulong, uint, uint);
extern void __ockl_gws_init(uint nwm1, uint rid);
@@ -162,6 +164,10 @@ const char* BlitLinearSourceCode = BLIT_KERNELS(
ulong4 srcRect, ulong4 dstRect, ulong4 size) {
__amd_copyBufferRectAligned(src, dst, srcRect, dstRect, size);
}
__kernel void __amd_rocclr_batchMemOp(__global void* params, uint count) {
__amd_batchMemOp(params, count);
}
);
const char* HipExtraSourceCode = BLIT_KERNELS(
@@ -254,7 +260,6 @@ const char* BlitImageSourceCode = BLIT_KERNELS(
__amd_copyImageToBuffer(src, dstUInt, dstUShort, dstUChar, srcOrigin, dstOrigin, size, format,
pitch);
}
);
} // namespace amd::device
+4
Näytä tiedosto
@@ -92,6 +92,7 @@ class SvmMapMemoryCommand;
class SvmUnmapMemoryCommand;
class SvmPrefetchAsyncCommand;
class StreamOperationCommand;
class BatchMemoryOperationCommand;
class VirtualMapCommand;
class ExternalSemaphoreCmd;
class Isa;
@@ -1308,6 +1309,9 @@ class VirtualDevice : public amd::HeapObject {
ShouldNotReachHere();
}
virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); }
virtual void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) {
ShouldNotReachHere();
}
virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); }
virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; }
+7
Näytä tiedosto
@@ -475,6 +475,13 @@ class KernelBlitManager : public DmaBlitManager {
uint number_of_initial_blocks
) const;
//! Batch memory ops- Submits batch of streamWaits and streamWrite operations.
virtual bool batchMemOps(const void* paramArray,
size_t paramSize,
uint32_t count) const {
assert(!"Unimplemented");
return false;
}
private:
static constexpr size_t MaxXferBuffers = 2;
static constexpr uint TransferSplitSize = 3;
+32
Näytä tiedosto
@@ -2583,6 +2583,38 @@ bool KernelBlitManager::streamOpsWait(device::Memory& memory, uint64_t value, si
return result;
}
// ================================================================================================
bool KernelBlitManager::batchMemOps(const void* paramArray, size_t paramSize,
uint32_t count) const {
amd::ScopedLock k(lockXferOps_);
bool result = false;
uint blitType = BatchMemOp;
size_t dim = 1;
size_t globalWorkOffset[1] = { 0 };
size_t globalWorkSize[1] = { count };
size_t localWorkSize[1] = { 1 };
// Get constant buffer and copy the array of parameters
constexpr bool kDirectVa = true;
auto constBuf = gpu().allocKernArg((count * paramSize), kCBAlignment);
memcpy(constBuf, paramArray, (count * paramSize));
setArgument(kernels_[blitType], 0, sizeof(cl_mem), constBuf, 0, nullptr, kDirectVa);
setArgument(kernels_[blitType], 1, sizeof(cl_mem), &count);
// Create ND range object for the kernel's execution
amd::NDRangeContainer ndrange(dim, globalWorkOffset, globalWorkSize, localWorkSize);
// Execute the blit
address parameters = captureArguments(kernels_[blitType]);
result = gpu().submitKernelInternal(ndrange, *kernels_[blitType], parameters, nullptr);
releaseArguments(parameters);
synchronize();
return result;
}
// ================================================================================================
bool KernelBlitManager::initHeap(device::Memory* heap_to_initialize, device::Memory* initial_blocks,
uint heap_size, uint number_of_initial_blocks) const {
+7 -3
Näytä tiedosto
@@ -292,6 +292,7 @@ class KernelBlitManager : public DmaBlitManager {
BlitCopyBufferRectAligned,
StreamOpsWrite,
StreamOpsWait,
BatchMemOp,
Scheduler,
GwsInit,
InitHeap,
@@ -519,6 +520,9 @@ class KernelBlitManager : public DmaBlitManager {
uint64_t mask
) const;
//! Batch memory ops- Submits batch of streamWaits and streamWrite operations.
virtual bool batchMemOps(const void* paramArray, size_t paramSize, uint32_t count) const;
virtual amd::Monitor* lockXfer() const { return &lockXferOps_; }
virtual bool initHeap(device::Memory* heap_to_initialize,
@@ -599,9 +603,9 @@ static const char* BlitName[KernelBlitManager::BlitTotal] = {
"__amd_rocclr_fillBufferAligned", "__amd_rocclr_fillBufferAligned2D", "__amd_rocclr_copyBuffer",
"__amd_rocclr_copyBufferAligned", "__amd_rocclr_copyBufferRect",
"__amd_rocclr_copyBufferRectAligned", "__amd_rocclr_streamOpsWrite", "__amd_rocclr_streamOpsWait",
"__amd_rocclr_scheduler", "__amd_rocclr_gwsInit", "__amd_rocclr_initHeap",
"__amd_rocclr_fillImage", "__amd_rocclr_copyImage", "__amd_rocclr_copyImage1DA",
"__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage"
"__amd_rocclr_batchMemOp", "__amd_rocclr_scheduler", "__amd_rocclr_gwsInit",
"__amd_rocclr_initHeap", "__amd_rocclr_fillImage", "__amd_rocclr_copyImage",
"__amd_rocclr_copyImage1DA", "__amd_rocclr_copyImageToBuffer", "__amd_rocclr_copyBufferToImage"
};
inline void KernelBlitManager::setArgument(amd::Kernel* kernel, size_t index,
+14 -4
Näytä tiedosto
@@ -2715,8 +2715,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
else {
// mask is applied on value before performing
// the comparision defined by 'condition'
bool result = static_cast<KernelBlitManager&>(blitMgr()).streamOpsWait(*memory, value, offset,
sizeBytes, flags, mask);
bool result = blitMgr().streamOpsWait(*memory, value, offset, sizeBytes, flags, mask);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Waiting for value: 0x%lx."
" Flags: 0x%lx mask: 0x%lx", value, flags, mask);
if (!result) {
@@ -2729,8 +2728,7 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
// Ensure memory ordering preceding the write
dispatchBarrierPacket(kBarrierPacketReleaseHeader);
bool result = static_cast<KernelBlitManager&>(blitMgr()).streamOpsWrite(*memory, value,
offset, sizeBytes);
bool result = blitMgr().streamOpsWrite(*memory, value, offset, sizeBytes);
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "Writing value: 0x%lx", value);
if (!result) {
LogError("submitStreamOperation: Write failed!");
@@ -2741,6 +2739,18 @@ void VirtualGPU::submitStreamOperation(amd::StreamOperationCommand& cmd) {
profilingEnd(cmd);
}
// ================================================================================================
void VirtualGPU::submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd) {
// Make sure VirtualGPU has an exclusive access to the resources
amd::ScopedLock lock(execution());
profilingBegin(cmd);
bool result = blitMgr().batchMemOps(cmd.getParamPtr(), cmd.paramSize(), cmd.count());
if (!result) {
LogError("submitBatchMemoryOperation failed!");
}
profilingEnd(cmd);
}
// ================================================================================================
void VirtualGPU::submitVirtualMap(amd::VirtualMapCommand& vcmd) {
// Make sure VirtualGPU has an exclusive access to the resources
+1
Näytä tiedosto
@@ -353,6 +353,7 @@ class VirtualGPU : public device::VirtualDevice {
void flush(amd::Command* list = nullptr, bool wait = false);
void submitFillMemory(amd::FillMemoryCommand& cmd);
void submitStreamOperation(amd::StreamOperationCommand& cmd);
void submitBatchMemoryOperation(amd::BatchMemoryOperationCommand& cmd);
void submitVirtualMap(amd::VirtualMapCommand& cmd);
void submitMigrateMemObjects(amd::MigrateMemObjectsCommand& cmd);
+43 -1
Näytä tiedosto
@@ -845,7 +845,7 @@ class FillMemoryCommand : public OneMemoryArgCommand {
class StreamOperationCommand : public OneMemoryArgCommand {
private:
uint64_t value_; // !< Value to Wait on or to Write.
uint64_t value_; // !< Value to Wait on or to Write.
uint64_t mask_; // !< Mask to be applied on signal value for Wait operation.
unsigned int flags_; // !< Flags defining the Wait condition.
size_t offset_; // !< Offset into memory for Write
@@ -888,6 +888,47 @@ class StreamOperationCommand : public OneMemoryArgCommand {
const size_t sizeBytes() const { return sizeBytes_; }
};
/*! \brief A batch memory operation command.
*
* \details Batch operations to synchronize the stream via memory operations
* Operations are either 32-bit stream wait or write.
* Wait: All the commands issued after stream wait are not executed
* until the wait condition is true.
* Write: Writes a 32 or 64 bit vaue to the memory using a GPU Blit.
* The operations are enqueued in the order they appear in the array.
*/
class BatchMemoryOperationCommand : public Command {
public:
BatchMemoryOperationCommand(HostQueue& queue, cl_command_type cmdType, uint32_t count,
uint32_t flags, EventWaitList& eventWaitList, const void* paramArray,
size_t paramSize)
: Command(queue, cmdType, eventWaitList),
count_(count),
paramArray_(paramArray),
flags_(flags),
paramSize_(paramSize) {
// Sanity check
assert(((cmdType == ROCCLR_COMMAND_BATCH_STREAM)) &&
"Invalid batch memory operation");
}
virtual void submit(device::VirtualDevice& device) { device.submitBatchMemoryOperation(*this); }
//! Returns the value
const uint64_t count() const { return count_; }
//! Return the pointer to the paramList
const void* getParamPtr() {return paramArray_; }
//! Return the size of a single mem op param in bytes
const size_t paramSize() const { return paramSize_; }
private:
uint32_t count_; // !< The number of operations in the array.
uint32_t flags_; // !< Reserved for future expansion. Must be 0.
const void* paramArray_; // !< Pointer to the array of individual operations
size_t paramSize_; // !< size in bytes of the param array passed
};
/*! \brief A generic copy memory command
*
* \details Used for both buffers and images. Backends are expected
@@ -1858,6 +1899,7 @@ union ComputeCommand {
CopyMemoryP2PCommand cmd25;
SvmPrefetchAsyncCommand cmd26;
VirtualMapCommand cmd27;
BatchMemoryOperationCommand cmd28;
ComputeCommand() {}
~ComputeCommand() {}
};
+1
Näytä tiedosto
@@ -24,6 +24,7 @@
// Dummy command types for Stream Wait and Write commands.
#define ROCCLR_COMMAND_STREAM_WAIT_VALUE 0x4501
#define ROCCLR_COMMAND_STREAM_WRITE_VALUE 0x4502
#define ROCCLR_COMMAND_BATCH_STREAM 0x4503
// Stream Wait Value Conidtions
#define ROCCLR_STREAM_WAIT_VALUE_GTE 0x0