From 2633e62801e1fd8b1fb4fb2aa9c40278ea3932e2 Mon Sep 17 00:00:00 2001 From: Ioannis Assiouras Date: Wed, 8 Feb 2023 20:18:11 +0000 Subject: [PATCH] SWDEV-381402 - Derive hip::Stream from amd::HostQueue Change-Id: I6c1aca5eb350c32d974ae4ffcc725705355956d8 [ROCm/clr commit: e3633dc8f4608784f32354642f38d71f07e6edcc] --- projects/clr/hipamd/src/hip_code_object.cpp | 16 +- projects/clr/hipamd/src/hip_context.cpp | 14 +- projects/clr/hipamd/src/hip_device.cpp | 30 +- .../clr/hipamd/src/hip_device_runtime.cpp | 6 +- projects/clr/hipamd/src/hip_event.cpp | 22 +- projects/clr/hipamd/src/hip_event.hpp | 12 +- projects/clr/hipamd/src/hip_event_ipc.cpp | 15 +- projects/clr/hipamd/src/hip_gl.cpp | 18 +- projects/clr/hipamd/src/hip_graph_helper.hpp | 12 +- .../clr/hipamd/src/hip_graph_internal.cpp | 243 +-------------- .../clr/hipamd/src/hip_graph_internal.hpp | 179 ++++------- projects/clr/hipamd/src/hip_hmm.cpp | 10 +- projects/clr/hipamd/src/hip_internal.hpp | 46 ++- projects/clr/hipamd/src/hip_memory.cpp | 290 +++++++++--------- projects/clr/hipamd/src/hip_module.cpp | 34 +- projects/clr/hipamd/src/hip_platform.cpp | 8 +- projects/clr/hipamd/src/hip_stream.cpp | 142 ++++----- projects/clr/hipamd/src/hip_stream_ops.cpp | 4 +- projects/clr/hipamd/src/hip_texture.cpp | 34 +- 19 files changed, 403 insertions(+), 732 deletions(-) diff --git a/projects/clr/hipamd/src/hip_code_object.cpp b/projects/clr/hipamd/src/hip_code_object.cpp index 0709c1a05d..778783cfde 100644 --- a/projects/clr/hipamd/src/hip_code_object.cpp +++ b/projects/clr/hipamd/src/hip_code_object.cpp @@ -32,7 +32,7 @@ THE SOFTWARE. #include hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - amd::HostQueue& queue, bool isAsync = false); + hip::Stream& stream, bool isAsync = false); hipError_t ihipFree(void* ptr); // forward declaration of methods required for managed variables hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); @@ -635,10 +635,10 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) { it->second->setManagedVarInfo(pointer, dvar->size()); // copy initial value to the managed variable to the managed memory allocated - amd::HostQueue* queue = hip::getNullStream(); - if (queue != nullptr) { + hip::Stream* stream = hip::getNullStream(); + if (stream != nullptr) { status = ihipMemcpy(pointer, reinterpret_cast
(dvar->device_ptr()), dvar->size(), - hipMemcpyDeviceToDevice, *queue); + hipMemcpyDeviceToDevice, *stream); if (status != hipSuccess) { ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status, managedVar.c_str()); @@ -658,7 +658,7 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) { } // copy managed memory pointer to the managed device variable status = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), &pointer, dvar->size(), - hipMemcpyHostToDevice, *queue); + hipMemcpyHostToDevice, *stream); if (status != hipSuccess) { ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status, managedVar.c_str()); @@ -895,10 +895,10 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) { DeviceVar* dvar = nullptr; IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId)); - amd::HostQueue* queue = g_devices.at(deviceId)->NullStream(); - if (queue != nullptr) { + hip::Stream* stream = g_devices.at(deviceId)->NullStream(); + if (stream != nullptr) { err = ihipMemcpy(reinterpret_cast
(dvar->device_ptr()), var->getManagedVarPtr(), - dvar->size(), hipMemcpyHostToDevice, *queue); + dvar->size(), hipMemcpyHostToDevice, *stream); } else { ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); return hipErrorInvalidResourceHandle; diff --git a/projects/clr/hipamd/src/hip_context.cpp b/projects/clr/hipamd/src/hip_context.cpp index 307f8452f7..f639d4ff66 100644 --- a/projects/clr/hipamd/src/hip_context.cpp +++ b/projects/clr/hipamd/src/hip_context.cpp @@ -91,21 +91,21 @@ void setCurrentDevice(unsigned int index) { amd::Os::setPreferredNumaNode(preferredNumaNode); } -amd::HostQueue* getQueue(hipStream_t stream) { +hip::Stream* getStream(hipStream_t stream) { if (stream == nullptr) { return getNullStream(); } else { - amd::HostQueue* queue = reinterpret_cast(stream)->asHostQueue(); - if (!(reinterpret_cast(stream)->Flags() & hipStreamNonBlocking)) { + hip::Stream* hip_stream = reinterpret_cast(stream); + if (!(hip_stream->Flags() & hipStreamNonBlocking)) { constexpr bool WaitNullStreamOnly = true; - iHipWaitActiveStreams(queue, WaitNullStreamOnly); + iHipWaitActiveStreams(hip_stream, WaitNullStreamOnly); } - return queue; + return hip_stream; } } // ================================================================================================ -amd::HostQueue* getNullStream(amd::Context& ctx) { +hip::Stream* getNullStream(amd::Context& ctx) { for (auto& it : g_devices) { if (it->asContext() == &ctx) { return it->NullStream(); @@ -131,7 +131,7 @@ int getDeviceID(amd::Context& ctx) { } // ================================================================================================ -amd::HostQueue* getNullStream() { +hip::Stream* getNullStream() { Device* device = getCurrentDevice(); return device ? device->NullStream() : nullptr; } diff --git a/projects/clr/hipamd/src/hip_device.cpp b/projects/clr/hipamd/src/hip_device.cpp index a3d059ac89..2b83616d77 100644 --- a/projects/clr/hipamd/src/hip_device.cpp +++ b/projects/clr/hipamd/src/hip_device.cpp @@ -26,25 +26,31 @@ namespace hip { // ================================================================================================ -amd::HostQueue* Device::NullStream(bool skip_alloc) { - amd::HostQueue* null_queue = null_stream_.asHostQueue(skip_alloc); - if (null_queue == nullptr) { +hip::Stream* Device::NullStream(bool skip_alloc) { + if (null_stream_ == nullptr && !skip_alloc) { + null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true); + } + + if (null_stream_ == nullptr) { return nullptr; } // Wait for all active streams before executing commands on the default - iHipWaitActiveStreams(null_queue); - return null_queue; + iHipWaitActiveStreams(null_stream_); + return null_stream_; } // ================================================================================================ -Stream* Device::GetNullStream() { - amd::HostQueue* null_queue = null_stream_.asHostQueue(); - if (null_queue == nullptr) { +hip::Stream* Device::GetNullStream() { + if (null_stream_ == nullptr) { + null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true); + } + + if (null_stream_ == nullptr) { return nullptr; } // Wait for all active streams before executing commands on the default - iHipWaitActiveStreams(null_queue); - return &null_stream_; + iHipWaitActiveStreams(null_stream_); + return null_stream_; } // ================================================================================================ @@ -128,6 +134,10 @@ Device::~Device() { if (default_mem_pool_ != nullptr) { default_mem_pool_->release(); } + + if (null_stream_!= nullptr) { + delete null_stream_; + } } } diff --git a/projects/clr/hipamd/src/hip_device_runtime.cpp b/projects/clr/hipamd/src/hip_device_runtime.cpp index 0288c885f4..19bed5ef83 100644 --- a/projects/clr/hipamd/src/hip_device_runtime.cpp +++ b/projects/clr/hipamd/src/hip_device_runtime.cpp @@ -512,9 +512,9 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) { hipError_t hipDeviceSynchronize ( void ) { HIP_INIT_API(hipDeviceSynchronize); - amd::HostQueue* queue = hip::getNullStream(); + hip::Stream* stream = hip::getNullStream(); - if (!queue) { + if (!stream) { HIP_RETURN(hipErrorOutOfMemory); } @@ -522,7 +522,7 @@ hipError_t hipDeviceSynchronize ( void ) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } - queue->finish(); + stream->finish(); hip::Stream::syncNonBlockingStreams(hip::getCurrentDevice()->deviceId()); diff --git a/projects/clr/hipamd/src/hip_event.cpp b/projects/clr/hipamd/src/hip_event.cpp index f556cabe9d..83cbb9ef32 100644 --- a/projects/clr/hipamd/src/hip_event.cpp +++ b/projects/clr/hipamd/src/hip_event.cpp @@ -177,12 +177,12 @@ int64_t EventDD::time(bool getStartTs) const { } } -hipError_t Event::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) { +hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) { amd::Command::EventWaitList eventWaitList; if (event_ != nullptr) { eventWaitList.push_back(event_); } - command = new amd::Marker(*queue, kMarkerDisableFlush, eventWaitList); + command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList); if (command == NULL) { return hipErrorOutOfMemory; @@ -196,17 +196,17 @@ hipError_t Event::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* com } hipError_t Event::streamWait(hipStream_t stream, uint flags) { - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); // Access to event_ object must be lock protected amd::ScopedLock lock(lock_); - if ((event_ == nullptr) || (event_->command().queue() == queue) || ready()) { + if ((event_ == nullptr) || (event_->command().queue() == hip_stream) || ready()) { return hipSuccess; } if (!event_->notifyCmdQueue()) { return hipErrorLaunchOutOfResources; } amd::Command* command; - hipError_t status = streamWaitCommand(command, queue); + hipError_t status = streamWaitCommand(command, hip_stream); if (status != hipSuccess) { return status; } @@ -218,7 +218,7 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) { return hipSuccess; } -hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue, +hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t ext_flags ) { if (command == nullptr) { int32_t releaseFlags = ((ext_flags == 0) ? flags : ext_flags) & @@ -231,7 +231,7 @@ hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue, releaseFlags = amd::Device::kCacheStateIgnore; } // Always submit a EventMarker. - command = new hip::EventMarker(*queue, !kMarkerDisableFlush, true, releaseFlags); + command = new hip::EventMarker(*stream, !kMarkerDisableFlush, true, releaseFlags); } return hipSuccess; } @@ -249,10 +249,10 @@ hipError_t Event::enqueueRecordCommand(hipStream_t stream, amd::Command* command } hipError_t Event::addMarker(hipStream_t stream, amd::Command* command, bool record) { - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); // Keep the lock always at the beginning of this to avoid a race. SWDEV-277847 amd::ScopedLock lock(lock_); - hipError_t status = recordCommand(command, queue); + hipError_t status = recordCommand(command, hip_stream); if (status != hipSuccess) { return hipSuccess; } @@ -379,8 +379,8 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream) { return hipErrorInvalidHandle; } hip::Event* e = reinterpret_cast(event); - amd::HostQueue* queue = hip::getQueue(stream); - if (g_devices[e->deviceId()]->devices()[0] != &queue->device()) { + hip::Stream* hip_stream = hip::getStream(stream); + if (g_devices[e->deviceId()]->devices()[0] != &hip_stream->device()) { return hipErrorInvalidHandle; } return e->addMarker(stream, nullptr, true); diff --git a/projects/clr/hipamd/src/hip_event.hpp b/projects/clr/hipamd/src/hip_event.hpp index e08ea33f66..91a8193d48 100644 --- a/projects/clr/hipamd/src/hip_event.hpp +++ b/projects/clr/hipamd/src/hip_event.hpp @@ -78,9 +78,9 @@ typedef struct ihipIpcEventShmem_s { class EventMarker : public amd::Marker { public: - EventMarker(amd::HostQueue& queue, bool disableFlush, bool markerTs = false, + EventMarker(amd::HostQueue& stream, bool disableFlush, bool markerTs = false, int32_t scope = amd::Device::kCacheStateInvalid) - : amd::Marker(queue, disableFlush) { + : amd::Marker(stream, disableFlush) { profilingInfo_.enabled_ = true; profilingInfo_.callback_ = nullptr; profilingInfo_.marker_ts_ = markerTs; @@ -116,11 +116,11 @@ class Event { virtual hipError_t synchronize(); hipError_t elapsedTime(Event& eStop, float& ms); - virtual hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue); + virtual hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream); virtual hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command); virtual hipError_t streamWait(hipStream_t stream, uint flags); - virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* queue, + virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t flags = 0); virtual hipError_t enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record); hipError_t addMarker(hipStream_t stream, amd::Command* command, bool record); @@ -175,7 +175,7 @@ class Event { protected: amd::Monitor lock_; - amd::HostQueue* stream_; + hip::Stream* stream_; amd::Event* event_; int device_id_; //! Flag to indicate hipEventRecord has not been called. This is needed for @@ -224,7 +224,7 @@ class IPCEvent : public Event { hipError_t synchronize(); hipError_t query(); - hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue); + hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream); hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command); hipError_t streamWait(hipStream_t stream, uint flags); diff --git a/projects/clr/hipamd/src/hip_event_ipc.cpp b/projects/clr/hipamd/src/hip_event_ipc.cpp index 7385566b8e..706b3d4448 100644 --- a/projects/clr/hipamd/src/hip_event_ipc.cpp +++ b/projects/clr/hipamd/src/hip_event_ipc.cpp @@ -102,8 +102,8 @@ hipError_t IPCEvent::synchronize() { return hipSuccess; } -hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) { - command = new amd::Marker(*queue, false); +hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, hip::Stream* stream) { + command = new amd::Marker(*stream, false); if (command == NULL) { return hipErrorOutOfMemory; } @@ -125,12 +125,12 @@ hipError_t IPCEvent::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* } hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) { - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); amd::ScopedLock lock(lock_); if(query() != hipSuccess) { amd::Command* command; - hipError_t status = streamWaitCommand(command, queue); + hipError_t status = streamWaitCommand(command, hip_stream); if (status != hipSuccess) { return status; } @@ -140,18 +140,17 @@ hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) { return hipSuccess; } -hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* queue, uint32_t flags) { +hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t flags) { bool unrecorded = isUnRecorded(); if (unrecorded) { - command = new amd::Marker(*queue, kMarkerDisableFlush); + command = new amd::Marker(*stream, kMarkerDisableFlush); } else { - return Event::recordCommand(command, queue); + return Event::recordCommand(command, stream); } return hipSuccess; } hipError_t IPCEvent::enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record) { - amd::HostQueue* queue = hip::getQueue(stream); bool unrecorded = isUnRecorded(); if (unrecorded) { amd::Event& tEvent = command->event(); diff --git a/projects/clr/hipamd/src/hip_gl.cpp b/projects/clr/hipamd/src/hip_gl.cpp index 216a2cb401..ce692753e4 100644 --- a/projects/clr/hipamd/src/hip_gl.cpp +++ b/projects/clr/hipamd/src/hip_gl.cpp @@ -637,13 +637,12 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, HIP_RETURN(hipErrorUnknown); } - amd::HostQueue* queue = hip::getQueue(stream); - if (nullptr == queue) { + hip::Stream* hip_stream = hip::getStream(stream); + if (nullptr == hip_stream) { HIP_RETURN(hipErrorUnknown); } - amd::HostQueue& hostQueue = *queue; - if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) { + if (!hip_stream->context().glenv() || !hip_stream->context().glenv()->isAssociated()) { LogWarning("\"amdContext\" is not created from GL context or share list"); HIP_RETURN(hipErrorUnknown); } @@ -658,7 +657,7 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources, //! Now create command and enqueue amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand( - hostQueue, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS); + *hip_stream, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS); if (command == nullptr) { HIP_RETURN(hipErrorUnknown); } @@ -712,13 +711,12 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources } // Wait for the current host queue - hip::getQueue(stream)->finish(); + hip::getStream(stream)->finish(); - amd::HostQueue* queue = hip::getQueue(stream); - if (nullptr == queue) { + hip::Stream* hip_stream = hip::getStream(stream); + if (nullptr == hip_stream) { HIP_RETURN(hipErrorUnknown); } - amd::HostQueue& hostQueue = *queue; std::vector memObjects; hipError_t err = hipSetInteropObjects(count, reinterpret_cast(resources), memObjects); @@ -730,7 +728,7 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources // Now create command and enqueue amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand( - hostQueue, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS); + *hip_stream, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS); if (command == nullptr) { HIP_RETURN(hipErrorUnknown); } diff --git a/projects/clr/hipamd/src/hip_graph_helper.hpp b/projects/clr/hipamd/src/hip_graph_helper.hpp index 69780338b2..20d011658e 100644 --- a/projects/clr/hipamd/src/hip_graph_helper.hpp +++ b/projects/clr/hipamd/src/hip_graph_helper.hpp @@ -5,9 +5,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p); hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind); hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false); + hipMemcpyKind kind, hip::Stream& stream, bool isAsync = false); -void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue); +void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream); bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind); @@ -26,19 +26,19 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, - amd::HostQueue* queue, void** kernelParams, void** extra, + hip::Stream* stream, void** kernelParams, void** extra, hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags, uint32_t params, uint32_t gridId, uint32_t numGrids, uint64_t prevGridSum, uint64_t allGridSum, uint32_t firstDevice); hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p, - amd::HostQueue* queue); + hip::Stream* stream); hipError_t ihipMemsetCommand(std::vector& commands, void* dst, int64_t value, - size_t valueSize, size_t sizeBytes, amd::HostQueue* queue); + size_t valueSize, size_t sizeBytes, hip::Stream* stream); hipError_t ihipMemset3DCommand(std::vector& commands, hipPitchedPtr pitchedDevPtr, - int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1); + int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1); hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset, size_t& sym_size, hipDeviceptr_t& device_ptr); diff --git a/projects/clr/hipamd/src/hip_graph_internal.cpp b/projects/clr/hipamd/src/hip_graph_internal.cpp index 68bcab0a39..5733cc5c9d 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.cpp +++ b/projects/clr/hipamd/src/hip_graph_internal.cpp @@ -98,56 +98,6 @@ hipError_t hipGraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size return hipSuccess; } -hipError_t hipGraphMemcpyNode1D::SetCommandParams(void* dst, const void* src, size_t count, - hipMemcpyKind kind) { - hipError_t status = ihipMemcpy_validate(dst, src, count, kind); - if (status != hipSuccess) { - return status; - } - size_t sOffsetOrig = 0; - amd::Memory* origSrcMemory = getMemoryObject(src, sOffsetOrig); - size_t dOffsetOrig = 0; - amd::Memory* origDstMemory = getMemoryObject(dst, dOffsetOrig); - - size_t sOffset = 0; - amd::Memory* srcMemory = getMemoryObject(src, sOffset); - size_t dOffset = 0; - amd::Memory* dstMemory = getMemoryObject(dst, dOffset); - - if ((srcMemory == nullptr) && (dstMemory != nullptr)) { - if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) { - return hipErrorInvalidValue; - } - amd::WriteMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*dstMemory->asBuffer(), dOffset, count, src); - } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { - if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) { - return hipErrorInvalidValue; - } - amd::ReadMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory->asBuffer(), sOffset, count, dst); - } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { - if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) { - return hipErrorInvalidValue; - } - if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) { - return hipErrorInvalidValue; - } - amd::CopyMemoryP2PCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count); - // Make sure runtime has valid memory for the command execution. P2P access - // requires page table mapping on the current device to another GPU memory - if (!static_cast(command)->validateMemory()) { - delete command; - return hipErrorInvalidValue; - } - } else { - amd::CopyMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count); - } - return hipSuccess; -} - hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParams) { hipError_t status = ihipMemcpy3D_validate(pNodeParams); if (status != hipSuccess) { @@ -297,185 +247,6 @@ hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParam return hipSuccess; } -hipError_t hipGraphMemcpyNode::SetCommandParams(const hipMemcpy3DParms* pNodeParams) { - hipError_t status = ihipMemcpy3D_validate(pNodeParams); - if (status != hipSuccess) { - return status; - } - const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pNodeParams); - // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the - // (unified virtual address space) base address of the source data and the bytes per row to apply. - // {src/dst}Array is ignored. - hipMemoryType srcMemoryType = pCopy.srcMemoryType; - if (srcMemoryType == hipMemoryTypeUnified) { - srcMemoryType = - amd::MemObjMap::FindMemObj(pCopy.srcDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; - if (srcMemoryType == hipMemoryTypeHost) { - // {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system - // memory. - const_cast(&pCopy)->srcHost = pCopy.srcDevice; - } - } - hipMemoryType dstMemoryType = pCopy.dstMemoryType; - if (dstMemoryType == hipMemoryTypeUnified) { - dstMemoryType = - amd::MemObjMap::FindMemObj(pCopy.dstDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost; - if (srcMemoryType == hipMemoryTypeHost) { - const_cast(&pCopy)->dstHost = pCopy.dstDevice; - } - } - - // If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned. - // In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning. - if (srcMemoryType == hipMemoryTypeHost) { - amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.srcHost); - srcMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost; - if (srcMemoryType == hipMemoryTypeDevice) { - const_cast(&pCopy)->srcDevice = const_cast(pCopy.srcHost); - } - } - if (dstMemoryType == hipMemoryTypeHost) { - amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.dstHost); - dstMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost; - if (dstMemoryType == hipMemoryTypeDevice) { - const_cast(&pCopy)->dstDevice = const_cast(pCopy.dstDevice); - } - } - - amd::Coord3D srcOrigin = {pCopy.srcXInBytes, pCopy.srcY, pCopy.srcZ}; - amd::Coord3D dstOrigin = {pCopy.dstXInBytes, pCopy.dstY, pCopy.dstZ}; - amd::Coord3D copyRegion = {pCopy.WidthInBytes, pCopy.Height, pCopy.Depth}; - - if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) { - // Host to Device. - - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - status = - ihipMemcpyHtoDValidate(pCopy.srcHost, pCopy.dstDevice, srcOrigin, dstOrigin, copyRegion, - pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch, - pCopy.dstPitch * pCopy.dstHeight, dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::WriteMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*dstMemory, {dstRect.start_, 0, 0}, copyRegion, pCopy.srcHost, dstRect, - srcRect); - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { - // Device to Host. - amd::Memory* srcMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - status = - ihipMemcpyDtoHValidate(pCopy.srcDevice, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch, - pCopy.dstPitch * pCopy.dstHeight, srcMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::ReadMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory, {srcRect.start_, 0, 0}, copyRegion, pCopy.dstHost, srcRect, - dstRect); - command->setSource(*srcMemory); - command->setOrigin({srcRect.start_, 0, 0}); - command->setSize(copyRegion); - command->setDestination(pCopy.dstHost); - command->setBufRect(srcRect); - command->setHostRect(dstRect); - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { - // Device to Device. - amd::Memory* srcMemory; - amd::Memory* dstMemory; - amd::BufferRect srcRect; - amd::BufferRect dstRect; - - status = ihipMemcpyDtoDValidate(pCopy.srcDevice, pCopy.dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, - pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcMemory, - dstMemory, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::CopyMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory, *dstMemory, {srcRect.start_, 0, 0}, {dstRect.start_, 0, 0}, - copyRegion, srcRect, dstRect); - } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { - amd::Image* dstImage; - amd::BufferRect srcRect; - - status = - ihipMemcpyHtoAValidate(pCopy.srcHost, pCopy.dstArray, srcOrigin, dstOrigin, copyRegion, - pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, dstImage, srcRect); - if (status != hipSuccess) { - return status; - } - amd::WriteMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*dstImage, dstOrigin, copyRegion, - static_cast(pCopy.srcHost) + srcRect.start_, pCopy.srcPitch, - pCopy.srcPitch * pCopy.srcHeight); - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { - // Image to Host. - amd::Image* srcImage; - amd::BufferRect dstRect; - - status = - ihipMemcpyAtoHValidate(pCopy.srcArray, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion, - pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcImage, dstRect); - if (status != hipSuccess) { - return status; - } - amd::ReadMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcImage, srcOrigin, copyRegion, - static_cast(pCopy.dstHost) + dstRect.start_, pCopy.dstPitch, - pCopy.dstPitch * pCopy.dstHeight); - } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { - // Device to Image. - amd::Image* dstImage; - amd::Memory* srcMemory; - amd::BufferRect dstRect; - amd::BufferRect srcRect; - status = ihipMemcpyDtoAValidate(pCopy.srcDevice, pCopy.dstArray, srcOrigin, dstOrigin, - copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, - dstImage, srcMemory, dstRect, srcRect); - if (status != hipSuccess) { - return status; - } - amd::CopyMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcMemory, *dstImage, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { - // Image to Device. - amd::BufferRect srcRect; - amd::BufferRect dstRect; - amd::Memory* dstMemory; - amd::Image* srcImage; - status = ihipMemcpyAtoDValidate(pCopy.srcArray, pCopy.dstDevice, srcOrigin, dstOrigin, - copyRegion, pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, - dstMemory, srcImage, srcRect, dstRect); - if (status != hipSuccess) { - return status; - } - amd::CopyMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcImage, *dstMemory, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); - } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { - amd::Image* srcImage; - amd::Image* dstImage; - - status = ihipMemcpyAtoAValidate(pCopy.srcArray, pCopy.dstArray, srcOrigin, dstOrigin, - copyRegion, srcImage, dstImage); - if (status != hipSuccess) { - return status; - } - amd::CopyMemoryCommand* command = reinterpret_cast(commands_[0]); - command->setParams(*srcImage, *dstImage, srcOrigin, dstOrigin, copyRegion); - } else { - return hipErrorInvalidValue; - } - return hipSuccess; -} - - bool ihipGraph::isGraphValid(ihipGraph* pGraph) { amd::ScopedLock lock(graphSetLock_); if (graphSet_.find(pGraph) == graphSet_.end()) { @@ -685,7 +456,9 @@ hipError_t hipGraphExec::CreateStreams(uint32_t num_streams) { auto stream = new hip::Stream(hip::getCurrentDevice(), hip::Stream::Priority::Normal, hipStreamNonBlocking); if (stream == nullptr || !stream->Create()) { - delete stream; + if (stream != nullptr) { + stream->release(); + } ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to create parallel stream!\n"); return hipErrorOutOfMemory; } @@ -708,7 +481,7 @@ hipError_t hipGraphExec::Init() { hipError_t FillCommands(std::vector>& parallelLists, std::unordered_map>& nodeWaitLists, std::vector& levelOrder, std::vector& rootCommands, - amd::Command*& endCommand, amd::HostQueue* queue) { + amd::Command*& endCommand, hip::Stream* stream) { hipError_t status; for (auto& node : levelOrder) { // TODO: clone commands from next launch @@ -758,7 +531,7 @@ hipError_t FillCommands(std::vector>& parallelLists, } } if (!graphLastCmdWaitList.empty()) { - endCommand = new amd::Marker(*queue, false, graphLastCmdWaitList); + endCommand = new amd::Marker(*stream, false, graphLastCmdWaitList); if (endCommand == nullptr) { return hipErrorOutOfMemory; } @@ -787,8 +560,8 @@ void UpdateStream(std::vector>& parallelLists, hip::Stream* st hipError_t hipGraphExec::Run(hipStream_t stream) { hipError_t status; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + + if (hip::getStream(stream) == nullptr) { return hipErrorInvalidResourceHandle; } if (flags_ == hipGraphInstantiateFlagAutoFreeOnLaunch) { @@ -802,7 +575,7 @@ hipError_t hipGraphExec::Run(hipStream_t stream) { std::vector rootCommands; amd::Command* endCommand = nullptr; status = - FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, queue); + FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, hip_stream); if (status != hipSuccess) { return status; } diff --git a/projects/clr/hipamd/src/hip_graph_internal.hpp b/projects/clr/hipamd/src/hip_graph_internal.hpp index 952bb8ee26..4f0b5dd31e 100644 --- a/projects/clr/hipamd/src/hip_graph_internal.hpp +++ b/projects/clr/hipamd/src/hip_graph_internal.hpp @@ -38,7 +38,7 @@ typedef hipGraphNode* Node; hipError_t FillCommands(std::vector>& parallelLists, std::unordered_map>& nodeWaitLists, std::vector& levelOrder, std::vector& rootCommands, - amd::Command*& endCommand, amd::HostQueue* queue); + amd::Command*& endCommand, hip::Stream* stream); void UpdateStream(std::vector>& parallelLists, hip::Stream* stream, hipGraphExec* ptr); @@ -155,7 +155,6 @@ struct hipGraphNodeDOTAttribute { struct hipGraphNode : public hipGraphNodeDOTAttribute { protected: hip::Stream* stream_ = nullptr; - amd::HostQueue* queue_; uint32_t level_; unsigned int id_; hipGraphNodeType type_; @@ -222,16 +221,15 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute { return true; } - amd::HostQueue* GetQueue() { return queue_; } + hip::Stream* GetQueue() { return stream_; } virtual void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) { stream_ = stream; - queue_ = stream->asHostQueue(); } /// Create amd::command for the graph node - virtual hipError_t CreateCommand(amd::HostQueue* queue) { + virtual hipError_t CreateCommand(hip::Stream* stream) { commands_.clear(); - queue_ = queue; + stream_ = stream; return hipSuccess; } /// Return node unique ID @@ -350,8 +348,8 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute { (type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy || type_ == hipGraphNodeTypeMemset)) { amd::Command::EventWaitList waitList; - amd::HostQueue* queue = hip::getQueue(stream); - amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList); + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList); command->enqueue(); command->release(); return; @@ -575,7 +573,9 @@ struct hipGraphExec { // new commands are launched for every launch they are destroyed as and when command is // terminated after it complete execution for (auto stream : parallel_streams_) { - delete stream; + if (stream != nullptr) { + stream->release(); + } } for (auto it = clonedNodes_.begin(); it != clonedNodes_.end(); it++) delete it->second; amd::ScopedLock lock(graphExecSetLock_); @@ -645,7 +645,6 @@ struct hipChildGraphNode : public hipGraphNode { void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) { stream_ = stream; - queue_ = stream->asHostQueue(); UpdateStream(parallelLists_, stream, ptr); } @@ -654,8 +653,8 @@ struct hipChildGraphNode : public hipGraphNode { std::vector& GetCommands() { return parallelLists_[0].back()->GetCommands(); } // Create child graph node commands and set waitlists - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } @@ -663,7 +662,7 @@ struct hipChildGraphNode : public hipGraphNode { std::vector rootCommands; amd::Command* endCommand = nullptr; status = FillCommands(parallelLists_, nodeWaitLists_, childGraphlevelOrder_, rootCommands, - endCommand, queue); + endCommand, stream); for (auto& cmd : rootCommands) { commands_.push_back(cmd); } @@ -933,14 +932,14 @@ class hipGraphKernelNode : public hipGraphNode { return new hipGraphKernelNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { + hipError_t CreateCommand(hip::Stream* stream) { hipFunction_t func = nullptr; hipError_t status = validateKernelParams(pKernelParams_, &func, - queue ? hip::getDeviceID(queue->context()) : -1); + stream ? hip::getDeviceID(stream->context()) : -1); if (hipSuccess != status) { return status; } - status = hipGraphNode::CreateCommand(queue); + status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } @@ -951,7 +950,7 @@ class hipGraphKernelNode : public hipGraphNode { pKernelParams_->gridDim.y * pKernelParams_->blockDim.y, pKernelParams_->gridDim.z * pKernelParams_->blockDim.z, pKernelParams_->blockDim.x, pKernelParams_->blockDim.y, pKernelParams_->blockDim.z, pKernelParams_->sharedMemBytes, - queue, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0, + stream, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0, 0, 0); commands_.emplace_back(command); return status; @@ -1044,22 +1043,6 @@ class hipGraphKernelNode : public hipGraphNode { } return hipSuccess; } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(const hipKernelNodeParams* params) { - // updates kernel params - hipError_t status = validateKernelParams(params); - if (hipSuccess != status) { - return status; - } - size_t globalWorkOffset[3] = {0}; - size_t globalWorkSize[3] = {params->gridDim.x, params->gridDim.y, params->gridDim.z}; - size_t localWorkSize[3] = {params->blockDim.x, params->blockDim.y, params->blockDim.z}; - reinterpret_cast(commands_[0]) - ->setSizes(globalWorkOffset, globalWorkSize, localWorkSize); - reinterpret_cast(commands_[0]) - ->setSharedMemBytes(params->sharedMemBytes); - return hipSuccess; - } hipError_t SetParams(hipGraphNode* node) { const hipGraphKernelNode* kernelNode = static_cast(node); @@ -1110,17 +1093,17 @@ class hipGraphMemcpyNode : public hipGraphNode { return new hipGraphMemcpyNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { + hipError_t CreateCommand(hip::Stream* stream) { if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { return hipSuccess; } - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } commands_.reserve(1); amd::Command* command; - status = ihipMemcpy3DCommand(command, pCopyParams_, queue); + status = ihipMemcpy3DCommand(command, pCopyParams_, stream); commands_.emplace_back(command); return status; } @@ -1129,7 +1112,7 @@ class hipGraphMemcpyNode : public hipGraphNode { if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) { ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->extent.width * pCopyParams_->extent.height * - pCopyParams_->extent.depth, *hip::getQueue(stream)); + pCopyParams_->extent.depth, *hip::getStream(stream)); return; } hipGraphNode::EnqueueCommands(stream); @@ -1150,8 +1133,6 @@ class hipGraphMemcpyNode : public hipGraphNode { const hipGraphMemcpyNode* memcpyNode = static_cast(node); return SetParams(memcpyNode->pCopyParams_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(const hipMemcpy3DParms* pNodeParams); hipError_t ValidateParams(const hipMemcpy3DParms* pNodeParams); std::string GetLabel(hipGraphDebugDotFlags flag) { const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pCopyParams_); @@ -1256,17 +1237,17 @@ class hipGraphMemcpyNode1D : public hipGraphNode { return new hipGraphMemcpyNode1D(static_cast(*this)); } - virtual hipError_t CreateCommand(amd::HostQueue* queue) { + virtual hipError_t CreateCommand(hip::Stream* stream) { if (IsHtoHMemcpy(dst_, src_, kind_)) { return hipSuccess; } - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } commands_.reserve(1); amd::Command* command = nullptr; - status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *queue); + status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream); commands_.emplace_back(command); return status; } @@ -1281,14 +1262,14 @@ class hipGraphMemcpyNode1D : public hipGraphNode { if (isEnabled_) { //HtoH if (isH2H) { - ihipHtoHMemcpy(dst_, src_, count_, *hip::getQueue(stream)); + ihipHtoHMemcpy(dst_, src_, count_, *hip::getStream(stream)); return; } amd::Command* command = commands_[0]; amd::HostQueue* cmdQueue = command->queue(); - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); - if (cmdQueue == queue) { + if (cmdQueue == hip_stream) { command->enqueue(); command->release(); return; @@ -1296,7 +1277,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode { amd::Command::EventWaitList waitList; amd::Command* depdentMarker = nullptr; - amd::Command* cmd = queue->getLastQueuedCommand(true); + amd::Command* cmd = hip_stream->getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); amd::Command* depdentMarker = new amd::Marker(*cmdQueue, true, waitList); @@ -1313,7 +1294,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode { if (cmd != nullptr) { waitList.clear(); waitList.push_back(cmd); - amd::Command* depdentMarker = new amd::Marker(*queue, true, waitList); + amd::Command* depdentMarker = new amd::Marker(*hip_stream, true, waitList); if (depdentMarker != nullptr) { depdentMarker->enqueue(); // Make sure future commands of queue synced with command depdentMarker->release(); @@ -1322,8 +1303,8 @@ class hipGraphMemcpyNode1D : public hipGraphNode { } } else { amd::Command::EventWaitList waitList; - amd::HostQueue* queue = hip::getQueue(stream); - amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList); + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList); command->enqueue(); command->release(); } @@ -1346,8 +1327,6 @@ class hipGraphMemcpyNode1D : public hipGraphNode { return SetParams(memcpy1DNode->dst_, memcpy1DNode->src_, memcpy1DNode->count_, memcpy1DNode->kind_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(void* dst, const void* src, size_t count, hipMemcpyKind kind); static hipError_t ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind); std::string GetLabel(hipGraphDebugDotFlags flag) { size_t sOffsetOrig = 0; @@ -1414,8 +1393,8 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D { static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } @@ -1428,7 +1407,7 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D { if (status != hipSuccess) { return status; } - status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *queue); + status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *stream); if (status != hipSuccess) { return status; } @@ -1474,18 +1453,6 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D { return SetParams(memcpyNode->dst_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_, memcpyNode->kind_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(void* dst, const void* symbol, size_t count, size_t offset, - hipMemcpyKind kind) { - size_t sym_size = 0; - hipDeviceptr_t device_ptr = nullptr; - - hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); - if (status != hipSuccess) { - return status; - } - return hipGraphMemcpyNode1D::SetCommandParams(dst, device_ptr, count, kind); - } }; class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D { const void* symbol_; @@ -1504,8 +1471,8 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D { return new hipGraphMemcpyNodeToSymbol(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } @@ -1518,7 +1485,7 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D { if (status != hipSuccess) { return status; } - status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *queue); + status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *stream); if (status != hipSuccess) { return status; } @@ -1562,18 +1529,6 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D { return SetParams(memcpyNode->src_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_, memcpyNode->kind_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(const void* symbol, const void* src, size_t count, size_t offset, - hipMemcpyKind kind) { - size_t sym_size = 0; - hipDeviceptr_t device_ptr = nullptr; - - hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr); - if (status != hipSuccess) { - return status; - } - return hipGraphMemcpyNode1D::SetCommandParams(device_ptr, src, count, kind); - } }; class hipGraphMemsetNode : public hipGraphNode { @@ -1633,21 +1588,21 @@ class hipGraphMemsetNode : public hipGraphNode { } } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } if (pMemsetParams_->height == 1) { size_t sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize; hipError_t status = ihipMemsetCommand(commands_, pMemsetParams_->dst, pMemsetParams_->value, - pMemsetParams_->elementSize, sizeBytes, queue); + pMemsetParams_->elementSize, sizeBytes, stream); } else { hipError_t status = ihipMemset3DCommand( commands_, {pMemsetParams_->dst, pMemsetParams_->pitch, pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height}, - pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, queue, pMemsetParams_->elementSize); + pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, stream, pMemsetParams_->elementSize); } return status; } @@ -1706,15 +1661,15 @@ class hipGraphEventRecordNode : public hipGraphNode { return new hipGraphEventRecordNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } hip::Event* e = reinterpret_cast(event_); commands_.reserve(1); amd::Command* command = nullptr; - status = e->recordCommand(command, queue); + status = e->recordCommand(command, stream); commands_.emplace_back(command); return status; } @@ -1744,16 +1699,6 @@ class hipGraphEventRecordNode : public hipGraphNode { static_cast(node); return SetParams(eventRecordNode->event_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(hipEvent_t event) { - amd::HostQueue* queue; - if (!commands_.empty()) { - queue = commands_[0]->queue(); - commands_[0]->release(); - } - commands_.clear(); - return CreateCommand(queue); - } }; class hipGraphEventWaitNode : public hipGraphNode { @@ -1769,15 +1714,15 @@ class hipGraphEventWaitNode : public hipGraphNode { return new hipGraphEventWaitNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } hip::Event* e = reinterpret_cast(event_); commands_.reserve(1); amd::Command* command; - status = e->streamWaitCommand(command, queue); + status = e->streamWaitCommand(command, stream); commands_.emplace_back(command); return status; } @@ -1806,16 +1751,6 @@ class hipGraphEventWaitNode : public hipGraphNode { const hipGraphEventWaitNode* eventWaitNode = static_cast(node); return SetParams(eventWaitNode->event_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(hipEvent_t event) { - amd::HostQueue* queue; - if (!commands_.empty()) { - queue = commands_[0]->queue(); - commands_[0]->release(); - } - commands_.clear(); - return CreateCommand(queue); - } }; class hipGraphHostNode : public hipGraphNode { @@ -1836,14 +1771,14 @@ class hipGraphHostNode : public hipGraphNode { return new hipGraphHostNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } amd::Command::EventWaitList waitList; commands_.reserve(1); - amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList); + amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList); commands_.emplace_back(command); return hipSuccess; } @@ -1885,8 +1820,6 @@ class hipGraphHostNode : public hipGraphNode { const hipGraphHostNode* hostNode = static_cast(node); return SetParams(hostNode->pNodeParams_); } - // ToDo: use this when commands are cloned and command params are to be updated - hipError_t SetCommandParams(const hipHostNodeParams* params); }; class hipGraphEmptyNode : public hipGraphNode { @@ -1898,14 +1831,14 @@ class hipGraphEmptyNode : public hipGraphNode { return new hipGraphEmptyNode(static_cast(*this)); } - hipError_t CreateCommand(amd::HostQueue* queue) { - hipError_t status = hipGraphNode::CreateCommand(queue); + hipError_t CreateCommand(hip::Stream* stream) { + hipError_t status = hipGraphNode::CreateCommand(stream); if (status != hipSuccess) { return status; } amd::Command::EventWaitList waitList; commands_.reserve(1); - amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList); + amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList); commands_.emplace_back(command); return hipSuccess; } @@ -1925,8 +1858,8 @@ class hipGraphMemAllocNode : public hipGraphNode { return new hipGraphMemAllocNode(static_cast(*this)); } - virtual hipError_t CreateCommand(amd::HostQueue* queue) { - auto error = hipGraphNode::CreateCommand(queue); + virtual hipError_t CreateCommand(hip::Stream* stream) { + auto error = hipGraphNode::CreateCommand(stream); auto ptr = Execute(stream_); return error; } @@ -1966,8 +1899,8 @@ class hipGraphMemFreeNode : public hipGraphNode { return new hipGraphMemFreeNode(static_cast(*this)); } - virtual hipError_t CreateCommand(amd::HostQueue* queue) { - auto error = hipGraphNode::CreateCommand(queue); + virtual hipError_t CreateCommand(hip::Stream* stream) { + auto error = hipGraphNode::CreateCommand(stream); Execute(stream_); return error; } diff --git a/projects/clr/hipamd/src/hip_hmm.cpp b/projects/clr/hipamd/src/hip_hmm.cpp index be4c6cb4fe..ec201663f6 100644 --- a/projects/clr/hipamd/src/hip_hmm.cpp +++ b/projects/clr/hipamd/src/hip_hmm.cpp @@ -94,7 +94,7 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, HIP_RETURN(hipErrorInvalidDevice); } - amd::HostQueue* queue = nullptr; + hip::Stream* hip_stream = nullptr; amd::Device* dev = nullptr; bool cpu_access = false; @@ -106,19 +106,19 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device, // Pick the specified stream or Null one from the provided device if (device == hipCpuDeviceId) { cpu_access = true; - queue = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getQueue(stream); + hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getStream(stream); } else { dev = g_devices[device]->devices()[0]; - queue = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getQueue(stream); + hip_stream = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getStream(stream); } - if (queue == nullptr) { + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } amd::Command::EventWaitList waitList; amd::SvmPrefetchAsyncCommand* command = - new amd::SvmPrefetchAsyncCommand(*queue, waitList, dev_ptr, count, dev, cpu_access); + new amd::SvmPrefetchAsyncCommand(*hip_stream, waitList, dev_ptr, count, dev, cpu_access); if (command == nullptr) { return hipErrorOutOfMemory; } diff --git a/projects/clr/hipamd/src/hip_internal.hpp b/projects/clr/hipamd/src/hip_internal.hpp index a416c1e7fb..84782cb6ef 100644 --- a/projects/clr/hipamd/src/hip_internal.hpp +++ b/projects/clr/hipamd/src/hip_internal.hpp @@ -225,12 +225,11 @@ public: namespace hip { class Device; class MemoryPool; - class Stream { + class Stream : public amd::HostQueue { public: enum Priority : int { High = -1, Normal = 0, Low = 1 }; private: - amd::HostQueue* queue_; mutable amd::Monitor lock_; Device* device_; Priority priority_; @@ -260,18 +259,20 @@ namespace hip { /// Capture events std::unordered_set captureEvents_; unsigned long long captureID_; + + static inline CommandQueue::Priority convertToQueuePriority(Priority p){ + return p == Priority::High ? amd::CommandQueue::Priority::High : p == Priority::Low ? + amd::CommandQueue::Priority::Low : amd::CommandQueue::Priority::Normal; + } + public: Stream(Device* dev, Priority p = Priority::Normal, unsigned int f = 0, bool null_stream = false, const std::vector& cuMask = {}, hipStreamCaptureStatus captureStatus = hipStreamCaptureStatusNone); - ~Stream(); + /// Creates the hip stream object, including AMD host queue bool Create(); - - /// Get device AMD host queue object. The method can allocate the queue - amd::HostQueue* asHostQueue(bool skip_alloc = false); - - void Finish() const; + virtual bool terminate() override; /// Get device ID associated with the current stream; int DeviceId() const; /// Get HIP device associated with the stream @@ -378,6 +379,7 @@ namespace hip { parallelCaptureStreams_.erase(it); } } + static bool existsActiveStreamForDevice(hip::Device* device); }; /// HIP Device class @@ -389,7 +391,7 @@ namespace hip { /// Store it here so we don't have to loop through the device list every time int deviceId_; /// ROCclr host queue for default streams - Stream null_stream_; + Stream* null_stream_ = nullptr; /// Store device flags unsigned int flags_; /// Maintain list of user enabled peers @@ -398,7 +400,6 @@ namespace hip { /// True if this device is active bool isActive_; - std::vector queues_; MemoryPool* default_mem_pool_; MemoryPool* current_mem_pool_; @@ -408,7 +409,6 @@ namespace hip { public: Device(amd::Context* ctx, int devId): context_(ctx), deviceId_(devId), - null_stream_(this, Stream::Priority::Normal, 0, true), flags_(hipDeviceScheduleSpin), isActive_(false), default_mem_pool_(nullptr), @@ -445,22 +445,16 @@ namespace hip { void setFlags(unsigned int flags) { flags_ = flags; } void Reset(); - amd::HostQueue* NullStream(bool skip_alloc = false); - Stream* GetNullStream(); + hip::Stream* NullStream(bool skip_alloc = false); + Stream* GetNullStream(); - void SaveQueue(amd::HostQueue* queue) { - amd::ScopedLock lock(lock_); - queues_.push_back(queue); - } bool GetActiveStatus() { amd::ScopedLock lock(lock_); if (isActive_) return true; - for (int i = 0; i < queues_.size(); i++) { - if (queues_[i]->GetQueueStatus()) { - isActive_ = true; - return true; - } + if (Stream::existsActiveStreamForDevice(this)) { + isActive_ = true; + return true; } return false; } @@ -524,11 +518,11 @@ namespace hip { /// Get ROCclr queue associated with hipStream /// Note: This follows the CUDA spec to sync with default streams /// and Blocking streams - extern amd::HostQueue* getQueue(hipStream_t stream); + extern hip::Stream* getStream(hipStream_t stream); /// Get default stream associated with the ROCclr context - extern amd::HostQueue* getNullStream(amd::Context&); + extern hip::Stream* getNullStream(amd::Context&); /// Get default stream of the thread - extern amd::HostQueue* getNullStream(); + extern hip::Stream* getNullStream(); /// Get device ID associated with the ROCclr context int getDeviceID(amd::Context& ctx); /// Check if stream is valid @@ -542,7 +536,7 @@ extern void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void* /// Wait all active streams on the blocking queue. The method enqueues a wait command and /// doesn't stall the current thread -extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false); +extern void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream = false); extern std::vector g_devices; extern hipError_t ihipDeviceGetCount(int* count); diff --git a/projects/clr/hipamd/src/hip_memory.cpp b/projects/clr/hipamd/src/hip_memory.cpp index 40f5a52845..d642354ab7 100644 --- a/projects/clr/hipamd/src/hip_memory.cpp +++ b/projects/clr/hipamd/src/hip_memory.cpp @@ -78,9 +78,9 @@ hipError_t ihipFree(void *ptr) { auto dev = g_devices[device_id]; // Skip stream allocation, since if it wasn't allocated until free, then the device wasn't used constexpr bool SkipStreamAlloc = true; - amd::HostQueue* queue = dev->NullStream(SkipStreamAlloc); - if (queue != nullptr) { - queue->finish(); + hip::Stream* stream = dev->NullStream(SkipStreamAlloc); + if (stream != nullptr) { + stream->finish(); } hip::Stream::syncNonBlockingStreams(device_id); // Find out if memory belongs to any memory pool @@ -195,15 +195,15 @@ hipError_t hipSignalExternalSemaphoresAsync( if (extSemArray == nullptr || paramsArray == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } for (unsigned int i = 0; i < numExtSems; i++) { if (extSemArray[i] != nullptr) { amd::ExternalSemaphoreCmd* command = - new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value, + new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value, amd::ExternalSemaphoreCmd::COMMAND_SIGNAL_EXTSEMAPHORE); if (command == nullptr) { return hipErrorOutOfMemory; @@ -227,15 +227,15 @@ hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemAr if (extSemArray == nullptr || paramsArray == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } for (unsigned int i = 0; i < numExtSems; i++) { if (extSemArray[i] != nullptr) { amd::ExternalSemaphoreCmd* command = - new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value, + new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value, amd::ExternalSemaphoreCmd::COMMAND_WAIT_EXTSEMAPHORE); if (command == nullptr) { return hipErrorOutOfMemory; @@ -343,35 +343,35 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, } hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes, - hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync) { + hipMemcpyKind kind, hip::Stream& stream, bool isAsync) { amd::Command::EventWaitList waitList; size_t sOffset = 0; amd::Memory* srcMemory = getMemoryObject(src, sOffset); size_t dOffset = 0; amd::Memory* dstMemory = getMemoryObject(dst, dOffset); - amd::Device* queueDevice = &queue.device(); + amd::Device* queueDevice = &stream.device(); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); if ((srcMemory == nullptr) && (dstMemory != nullptr)) { - amd::HostQueue* pQueue = &queue; + hip::Stream* pStream = &stream; if (queueDevice != dstMemory->getContext().devices()[0]) { - pQueue = hip::getNullStream(dstMemory->getContext()); - amd::Command* cmd = queue.getLastQueuedCommand(true); + pStream = hip::getNullStream(dstMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); } } - command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList, + command = new amd::WriteMemoryCommand(*pStream, CL_COMMAND_WRITE_BUFFER, waitList, *dstMemory->asBuffer(), dOffset, sizeBytes, src, 0, 0, copyMetadata); } else if ((srcMemory != nullptr) && (dstMemory == nullptr)) { - amd::HostQueue* pQueue = &queue; + hip::Stream* pStream = &stream; if (queueDevice != srcMemory->getContext().devices()[0]) { - pQueue = hip::getNullStream(srcMemory->getContext()); - amd::Command* cmd = queue.getLastQueuedCommand(true); + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); } } - command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList, + command = new amd::ReadMemoryCommand(*pStream, CL_COMMAND_READ_BUFFER, waitList, *srcMemory->asBuffer(), sOffset, sizeBytes, dst, 0, 0, copyMetadata); } else if ((srcMemory != nullptr) && (dstMemory != nullptr)) { // Check if the queue device doesn't match the device on any memory object. @@ -380,7 +380,7 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) && ((srcMemory->getContext().devices().size() == 1) && (dstMemory->getContext().devices().size() == 1))) { - command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList, + command = new amd::CopyMemoryP2PCommand(stream, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes); if (command == nullptr) { return hipErrorOutOfMemory; @@ -392,12 +392,12 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, return hipErrorInvalidValue; } } else { - amd::HostQueue* pQueue = &queue; + hip::Stream* pStream = &stream; if ((srcMemory->getContext().devices()[0] == dstMemory->getContext().devices()[0]) && (queueDevice != srcMemory->getContext().devices()[0])) { copyMetadata.copyEnginePreference_ = amd::CopyMetadata::CopyEnginePreference::NONE; - pQueue = hip::getNullStream(srcMemory->getContext()); - amd::Command* cmd = queue.getLastQueuedCommand(true); + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); } @@ -405,22 +405,22 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, // Scenarios such as DtoH where dst is pinned memory if ((queueDevice != srcMemory->getContext().devices()[0]) && (dstMemory->getContext().devices().size() != 1)) { - pQueue = hip::getNullStream(srcMemory->getContext()); - amd::Command* cmd = queue.getLastQueuedCommand(true); + pStream = hip::getNullStream(srcMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); } // Scenarios such as HtoD where src is pinned memory } else if ((queueDevice != dstMemory->getContext().devices()[0]) && (srcMemory->getContext().devices().size() != 1)) { - pQueue = hip::getNullStream(dstMemory->getContext()); - amd::Command* cmd = queue.getLastQueuedCommand(true); + pStream = hip::getNullStream(dstMemory->getContext()); + amd::Command* cmd = stream.getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); } } } - command = new amd::CopyMemoryCommand(*pQueue, CL_COMMAND_COPY_BUFFER, waitList, + command = new amd::CopyMemoryCommand(*pStream, CL_COMMAND_COPY_BUFFER, waitList, *srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes, copyMetadata); } @@ -445,13 +445,13 @@ bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) { } return false; } -void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue) { - queue.finish(); +void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream) { + stream.finish(); memcpy(dst, src, sizeBytes); } // ================================================================================================ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - amd::HostQueue& queue, bool isAsync = false) { + hip::Stream& stream, bool isAsync = false) { hipError_t status; if (sizeBytes == 0) { // Skip if nothing needs writing. @@ -470,7 +470,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin size_t dOffset = 0; amd::Memory* dstMemory = getMemoryObject(dst, dOffset); if (srcMemory == nullptr && dstMemory == nullptr) { - ihipHtoHMemcpy(dst, src, sizeBytes, queue); + ihipHtoHMemcpy(dst, src, sizeBytes, stream); return hipSuccess; } else if ((srcMemory == nullptr) && (dstMemory != nullptr)) { isAsync = false; @@ -483,7 +483,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin isP2P = true; } amd::Command* command = nullptr; - status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, queue, isAsync); + status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, stream, isAsync); if (status != hipSuccess) { return status; } @@ -491,22 +491,22 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin if (!isAsync) { command->awaitCompletion(); } else if (isP2P) { - amd::HostQueue* pQueue = hip::getNullStream(dstMemory->getContext()); + hip::Stream* pStream = hip::getNullStream(dstMemory->getContext()); amd::Command::EventWaitList waitList; waitList.push_back(command); - amd::Command* depdentMarker = new amd::Marker(*pQueue, false, waitList); + amd::Command* depdentMarker = new amd::Marker(*pStream, false, waitList); if (depdentMarker != nullptr) { depdentMarker->enqueue(); depdentMarker->release(); } } else { amd::HostQueue* newQueue = command->queue(); - if (newQueue != &queue) { + if (newQueue != &stream) { amd::Command::EventWaitList waitList; amd::Command* cmd = newQueue->getLastQueuedCommand(true); if (cmd != nullptr) { waitList.push_back(cmd); - amd::Command* depdentMarker = new amd::Marker(queue, true, waitList); + amd::Command* depdentMarker = new amd::Marker(stream, true, waitList); if (depdentMarker != nullptr) { depdentMarker->enqueue(); depdentMarker->release(); @@ -611,18 +611,18 @@ hipError_t hipFree(void* ptr) { hipError_t hipMemcpy_common(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream = nullptr) { CHECK_STREAM_CAPTURING(); - amd::HostQueue* queue = nullptr; + hip::Stream* hip_stream = nullptr; if (stream != nullptr) { - queue = hip::getQueue(stream); + hip_stream = hip::getStream(stream); } else { - queue = hip::getNullStream(); + hip_stream = hip::getNullStream(); } - if (queue == nullptr) { + if (hip_stream == nullptr) { return hipErrorInvalidValue; } - return ihipMemcpy(dst, src, sizeBytes, kind, *queue); + return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream); } hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) { @@ -643,12 +643,12 @@ hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes, HIP_RETURN(hipErrorContextIsDestroyed); } - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *queue, false)); + HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, false)); } hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) { @@ -697,9 +697,9 @@ hipError_t ihipArrayDestroy(hipArray* array) { } for (auto& dev : g_devices) { - amd::HostQueue* queue = dev->NullStream(true); - if (queue != nullptr) { - queue->finish(); + hip::Stream* stream = dev->NullStream(true); + if (stream != nullptr) { + stream->finish(); } } @@ -1205,9 +1205,9 @@ hipError_t ihipHostUnregister(void* hostPtr) { // Wait on the device, associated with the current memory object during allocation auto device_id = mem->getUserData().deviceId; - amd::HostQueue* queue = g_devices[device_id]->NullStream(true); - if (queue != nullptr) { - queue->finish(); + hip::Stream* stream = g_devices[device_id]->NullStream(true); + if (stream != nullptr) { + stream->finish(); } amd::MemObjMap::RemoveMemObj(hostPtr); @@ -1392,11 +1392,11 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice, size_t ByteCount) { HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount); CHECK_STREAM_CAPTURING(); - amd::HostQueue* queue = hip::getQueue(nullptr); - if (queue == nullptr) { + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *queue)); + HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *stream)); } hipError_t hipMemcpyDtoH(void* dstHost, @@ -1404,11 +1404,11 @@ hipError_t hipMemcpyDtoH(void* dstHost, size_t ByteCount) { HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount); CHECK_STREAM_CAPTURING(); - amd::HostQueue* queue = hip::getQueue(nullptr); - if (queue == nullptr) { + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *queue)); + HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *stream)); } hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice, @@ -1416,22 +1416,22 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice, size_t ByteCount) { HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount); CHECK_STREAM_CAPTURING(); - amd::HostQueue* queue = hip::getQueue(nullptr); - if (queue == nullptr) { + hip::Stream* stream = hip::getStream(nullptr); + if (stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } - HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *queue)); + HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *stream)); } hipError_t hipMemcpyAsync_common(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream) { STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind); - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } - return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true); + return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, true); } hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes, @@ -1452,12 +1452,12 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, void* srcHost, size_t By HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream); hipMemcpyKind kind = hipMemcpyHostToDevice; STREAM_CAPTURE(hipMemcpyHtoDAsync, stream, dstDevice, srcHost, ByteCount, kind); - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN_DURATION( - ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *queue, true)); + ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *hip_stream, true)); } hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount, @@ -1465,12 +1465,12 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream); hipMemcpyKind kind = hipMemcpyDeviceToDevice; STREAM_CAPTURE(hipMemcpyDtoDAsync, stream, dstDevice, srcDevice, ByteCount, kind); - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN_DURATION( - ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *queue, true)); + ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *hip_stream, true)); } hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount, @@ -1478,12 +1478,12 @@ hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t By HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream); hipMemcpyKind kind = hipMemcpyDeviceToHost; STREAM_CAPTURE(hipMemcpyDtoHAsync, stream, dstHost, srcDevice, ByteCount, kind); - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { HIP_RETURN(hipErrorInvalidValue); } HIP_RETURN_DURATION( - ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *queue, true)); + ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true)); } hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin, @@ -1532,7 +1532,7 @@ hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coor hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, void* dstDevice, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, - amd::HostQueue* queue) { + hip::Stream* stream) { amd::BufferRect srcRect; amd::BufferRect dstRect; amd::Memory* dstMemory; @@ -1544,7 +1544,7 @@ hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, voi return status; } - amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE_TO_BUFFER, + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE_TO_BUFFER, amd::Command::EventWaitList{}, *srcImage, *dstMemory, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); @@ -1606,7 +1606,7 @@ hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coor hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray* dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - amd::HostQueue* queue) { + hip::Stream* stream) { amd::Image* dstImage; amd::Memory* srcMemory; amd::BufferRect dstRect; @@ -1617,7 +1617,7 @@ hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArr if (status != hipSuccess) { return status; } - amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_TO_IMAGE, + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE, amd::Command::EventWaitList{}, *srcMemory, *dstImage, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect); @@ -1679,7 +1679,7 @@ hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* dstDevice, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue) { + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) { amd::Memory* srcMemory; amd::Memory* dstMemory; amd::BufferRect srcRect; @@ -1694,7 +1694,7 @@ hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* amd::Coord3D srcStart(srcRect.start_, 0, 0); amd::Coord3D dstStart(dstRect.start_, 0, 0); amd::CopyMemoryCommand* copyCommand = new amd::CopyMemoryCommand( - *queue, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory, + *stream, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory, srcStart, dstStart, copyRegion, srcRect, dstRect); if (copyCommand == nullptr) { @@ -1744,7 +1744,7 @@ hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D& hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* dstHost, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue, + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, bool isAsync = false) { amd::Memory* srcMemory; amd::BufferRect srcRect; @@ -1758,7 +1758,7 @@ hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* amd::Coord3D srcStart(srcRect.start_, 0, 0); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); amd::ReadMemoryCommand* readCommand = - new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{}, + new amd::ReadMemoryCommand(*stream, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, srcStart, copyRegion, dstHost, srcRect, dstRect, copyMetadata); @@ -1809,7 +1809,7 @@ hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coo hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, void* dstDevice, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue, + size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream, bool isAsync = false) { amd::Memory* dstMemory; amd::BufferRect srcRect; @@ -1824,7 +1824,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo amd::Coord3D dstStart(dstRect.start_, 0, 0); amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); amd::WriteMemoryCommand* writeCommand = new amd::WriteMemoryCommand( - *queue, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart, + *stream, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart, copyRegion, srcHost, dstRect, srcRect, copyMetadata); if (writeCommand == nullptr) { @@ -1842,7 +1842,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, - amd::HostQueue* queue) { + hip::Stream* stream) { if ((srcHost == nullptr) || (dstHost == nullptr)) { return hipErrorInvalidValue; } @@ -1859,8 +1859,8 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOr return hipErrorInvalidValue; } - if (queue) { - queue->finish(); + if (stream) { + stream->finish(); } for (size_t slice = 0; slice < copyRegion[2]; slice++) { @@ -1909,7 +1909,7 @@ hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::C hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hipArray* dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, - amd::Coord3D copyRegion, amd::HostQueue* queue) { + amd::Coord3D copyRegion, hip::Stream* stream) { amd::Image* srcImage; amd::Image* dstImage; @@ -1919,7 +1919,7 @@ hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hip return status; } - amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE, + amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE, amd::Command::EventWaitList{}, *srcImage, *dstImage, srcOrigin, dstOrigin, copyRegion); @@ -1968,7 +1968,7 @@ hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray, hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray* dstArray, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, - amd::HostQueue* queue, bool isAsync = false) { + hip::Stream* stream, bool isAsync = false) { amd::Image* dstImage; amd::BufferRect srcRect; @@ -1980,7 +1980,7 @@ hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hi amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); amd::WriteMemoryCommand* writeMemCmd = new amd::WriteMemoryCommand( - *queue, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin, + *stream, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin, copyRegion, static_cast(srcHost) + srcRect.start_, srcRowPitch, srcSlicePitch, copyMetadata); @@ -2029,7 +2029,7 @@ hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3 hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, void* dstHost, amd::Coord3D srcOrigin, amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, - amd::HostQueue* queue, bool isAsync = false) { + hip::Stream* stream, bool isAsync = false) { amd::Image* srcImage; amd::BufferRect dstRect; amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA); @@ -2041,7 +2041,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi } amd::ReadMemoryCommand* readMemCmd = new amd::ReadMemoryCommand( - *queue, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin, + *stream, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin, copyRegion, static_cast(dstHost) + dstRect.start_, dstRowPitch, dstSlicePitch, copyMetadata); @@ -2058,7 +2058,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi } hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3D* pCopy, - amd::HostQueue* queue) { + hip::Stream* stream) { // If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the // (unified virtual address space) base address of the source data and the bytes per row to apply. // {src/dst}Array is ignored. @@ -2106,41 +2106,41 @@ hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3 // Host to Device. return ihipMemcpyHtoDCommand(command, pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue); + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) { // Device to Host. return ihipMemcpyDtoHCommand(command, pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue); + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) { // Device to Device. return ihipMemcpyDtoDCommand(command, pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue); + pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream); } else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) { // Host to Image. return ihipMemcpyHtoACommand(command, pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - queue); + stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) { // Image to Host. return ihipMemcpyAtoHCommand(command, pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, - queue); + stream); } else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) { // Device to Image. return ihipMemcpyDtoACommand(command, pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, - queue); + stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) { // Image to Device. return ihipMemcpyAtoDCommand(command, pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin, copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, - queue); + stream); } else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) { // Image to Image. return ihipMemcpyAtoACommand(command, pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin, - copyRegion, queue); + copyRegion, stream); } else { ShouldNotReachHere(); } @@ -2212,14 +2212,14 @@ hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool // Host to Host. return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch, - pCopy->dstPitch * pCopy->dstHeight, hip::getQueue(stream)); + pCopy->dstPitch * pCopy->dstHeight, hip::getStream(stream)); } else { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } - status = ihipGetMemcpyParam3DCommand(command, pCopy, queue); + status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream); if (status != hipSuccess) return status; // Transfers from device memory to pageable host memory and transfers from any host memory to any host memory @@ -2507,13 +2507,13 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcO amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyAtoDCommand(command, srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion, - dstRowPitch, dstSlicePitch, queue); + dstRowPitch, dstSlicePitch, hip_stream); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2521,13 +2521,13 @@ hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray* dstArray, amd::Coord3D srcO amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyDtoACommand(command, srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, queue); + srcRowPitch, srcSlicePitch, hip_stream); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2536,13 +2536,13 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, void* dstDevice, amd::Coord3D srcOrig size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyDtoDCommand(command, srcDevice, dstDevice, srcOrigin, dstOrigin, copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, queue); + dstSlicePitch, hip_stream); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2551,13 +2551,13 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, void* dstHost, amd::Coord3D srcOrigin size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyDtoHCommand(command, srcDevice, dstHost, srcOrigin, dstOrigin, copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, queue, isAsync); + dstSlicePitch, hip_stream, isAsync); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2566,13 +2566,13 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D src size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyHtoDCommand(command, srcHost, dstDevice, srcOrigin, dstOrigin, copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch, - dstSlicePitch, queue, isAsync); + dstSlicePitch, hip_stream, isAsync); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2580,12 +2580,12 @@ hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D s amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyAtoACommand(command, srcArray, dstArray, srcOrigin, dstOrigin, - copyRegion, queue); + copyRegion, hip_stream); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2593,13 +2593,13 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyHtoACommand(command, srcHost, dstArray, srcOrigin, dstOrigin, copyRegion, - srcRowPitch, srcSlicePitch, queue, isAsync); + srcRowPitch, srcSlicePitch, hip_stream, isAsync); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2607,13 +2607,13 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOri amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) { amd::Command* command; - amd::HostQueue* queue = hip::getQueue(stream); - if (queue == nullptr) { + hip::Stream* hip_stream = hip::getStream(stream); + if (hip_stream == nullptr) { return hipErrorInvalidValue; } hipError_t status = ihipMemcpyAtoHCommand(command, srcArray, dstHost, srcOrigin, dstOrigin, copyRegion, - dstRowPitch, dstSlicePitch, queue, isAsync); + dstRowPitch, dstSlicePitch, hip_stream, isAsync); if (status != hipSuccess) return status; return ihipMemcpyCmdEnqueue(command, isAsync); } @@ -2673,9 +2673,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) { } hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p, - amd::HostQueue* queue) { + hip::Stream* stream) { const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p); - return ihipGetMemcpyParam3DCommand(command, &desc, queue); + return ihipGetMemcpyParam3DCommand(command, &desc, stream); } hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) { @@ -2733,8 +2733,8 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) { hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset, int64_t value, size_t valueSize, size_t sizeBytes, - amd::HostQueue* queue) { - if ((memory == nullptr) || (queue == nullptr)) { + hip::Stream* stream) { + if ((memory == nullptr) || (stream == nullptr)) { return hipErrorInvalidValue; } @@ -2744,7 +2744,7 @@ hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, si // surface=[pitch, width, height] amd::Coord3D surface(sizeBytes, sizeBytes, 1); amd::FillMemoryCommand* fillMemCommand = - new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), + new amd::FillMemoryCommand(*stream, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(), &value, valueSize, fillOffset, fillSize, surface); if (fillMemCommand == nullptr) { return hipErrorOutOfMemory; @@ -2810,7 +2810,7 @@ hipError_t ihipGraphMemsetParams_validate(const hipMemsetParams* pNodeParams) { } hipError_t ihipMemsetCommand(std::vector& commands, void* dst, int64_t value, - size_t valueSize, size_t sizeBytes, amd::HostQueue* queue) { + size_t valueSize, size_t sizeBytes, hip::Stream* stream) { hipError_t hip_error = hipSuccess; auto aligned_dst = amd::alignUp(reinterpret_cast
(dst), sizeof(uint64_t)); size_t offset = 0; @@ -2820,7 +2820,7 @@ hipError_t ihipMemsetCommand(std::vector& commands, void* dst, in amd::Command* command; hip_error = packFillMemoryCommand(command, memory, offset, value, valueSize, sizeBytes, - queue); + stream); commands.push_back(command); return hip_error; @@ -2854,8 +2854,8 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt } } std::vector commands; - amd::HostQueue* queue = hip::getQueue(stream); - hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, queue); + hip::Stream* hip_stream = hip::getStream(stream); + hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, hip_stream); if (hip_error != hipSuccess) { break; } @@ -2972,13 +2972,13 @@ hipError_t ihipMemset3D_validate(hipPitchedPtr pitchedDevPtr, int value, hipExte } hipError_t ihipMemset3DCommand(std::vector &commands, hipPitchedPtr pitchedDevPtr, - int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1) { + int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1) { size_t offset = 0; auto sizeBytes = extent.width * extent.height * extent.depth; amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset); if (pitchedDevPtr.pitch == extent.width) { return ihipMemsetCommand(commands, pitchedDevPtr.ptr, value, elementSize, - static_cast(sizeBytes), queue); + static_cast(sizeBytes), stream); } // Workaround for cases when pitch > row until fill kernel will be updated to support pitch. // Fall back to filling one row at a time. @@ -2994,7 +2994,7 @@ hipError_t ihipMemset3DCommand(std::vector &commands, hipPitchedP } amd::FillMemoryCommand* command; command = new amd::FillMemoryCommand( - *queue, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(), + *stream, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(), &value, elementSize, origin, region, surface); commands.push_back(command); return hipSuccess; @@ -3025,9 +3025,9 @@ hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent isAsync = true; } } - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); std::vector commands; - status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, queue); + status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, hip_stream); if (status != hipSuccess) { return status; } @@ -3946,9 +3946,9 @@ hipError_t ihipMipmappedArrayDestroy(hipMipmappedArray_t mipmapped_array_ptr) { } for (auto& dev : g_devices) { - amd::HostQueue* queue = dev->NullStream(true); - if (queue != nullptr) { - queue->finish(); + hip::Stream* stream = dev->NullStream(true); + if (stream != nullptr) { + stream->finish(); } } diff --git a/projects/clr/hipamd/src/hip_module.cpp b/projects/clr/hipamd/src/hip_module.cpp index 41b9038d0c..98cc7fd599 100644 --- a/projects/clr/hipamd/src/hip_module.cpp +++ b/projects/clr/hipamd/src/hip_module.cpp @@ -305,7 +305,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, uint32_t globalWorkSizeX, uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t sharedMemBytes, - amd::HostQueue* queue, void** kernelParams, void** extra, + hip::Stream* stream, void** kernelParams, void** extra, hipEvent_t startEvent = nullptr, hipEvent_t stopEvent = nullptr, uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0, uint32_t numGrids = 0, uint64_t prevGridSum = 0, @@ -328,7 +328,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f, } amd::NDRangeKernelCommand* kernelCommand = new amd::NDRangeKernelCommand( - *queue, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum, + *stream, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum, allGridSum, firstDevice, profileNDRange); if (!kernelCommand) { return hipErrorOutOfMemory; @@ -371,9 +371,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX, return status; } amd::Command* command = nullptr; - amd::HostQueue* queue = hip::getQueue(hStream); + hip::Stream* hip_stream = hip::getStream(hStream); status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ, - blockDimX, blockDimY, blockDimZ, sharedMemBytes, queue, + blockDimX, blockDimY, blockDimZ, sharedMemBytes, hip_stream, kernelParams, extra, startEvent, stopEvent, flags, params, gridId, numGrids, prevGridSum, allGridSum, firstDevice); if (status != hipSuccess) { @@ -544,8 +544,8 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* } if (launch.hStream != nullptr) { // Validate devices to make sure it dosn't have duplicates - amd::HostQueue* queue = reinterpret_cast(launch.hStream)->asHostQueue(); - auto device = &queue->vdev()->device(); + hip::Stream* hip_stream = reinterpret_cast(launch.hStream); + auto device = &hip_stream->vdev()->device(); for (int j = 0; j < numDevices; ++j) { if (mgpu_list[j] == device) { return hipErrorInvalidDevice; @@ -562,23 +562,23 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* // Sync the execution streams on all devices if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) { for (int i = 0; i < numDevices; ++i) { - amd::HostQueue* queue = - reinterpret_cast(launchParamsList[i].hStream)->asHostQueue(); - queue->finish(); + hip::Stream* hip_stream = + reinterpret_cast(launchParamsList[i].hStream); + hip_stream->finish(); } } for (int i = 0; i < numDevices; ++i) { const hipFunctionLaunchParams& launch = launchParamsList[i]; - amd::HostQueue* queue = reinterpret_cast(launch.hStream)->asHostQueue(); + hip::Stream* hip_stream = reinterpret_cast(launch.hStream); if (i == 0) { // The order of devices in the launch may not match the order in the global array for (size_t dev = 0; dev < g_devices.size(); ++dev) { // Find the matching device - if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) { // Save ROCclr index of the first device in the launch - firstDevice = queue->vdev()->device().index(); + firstDevice = hip_stream->vdev()->device().index(); break; } } @@ -608,9 +608,9 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams* // Sync the execution streams on all devices if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) { for (int i = 0; i < numDevices; ++i) { - amd::HostQueue* queue = - reinterpret_cast(launchParamsList[i].hStream)->asHostQueue(); - queue->finish(); + hip::Stream* hip_stream = + reinterpret_cast(launchParamsList[i].hStream); + hip_stream->finish(); } } @@ -739,12 +739,12 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL return hipErrorInvalidValue; } - amd::HostQueue* queue = hip::getQueue(launch.stream); + hip::Stream* hip_stream = hip::getStream(launch.stream); hipFunction_t func = nullptr; // The order of devices in the launch may not match the order in the global array for (size_t dev = 0; dev < g_devices.size(); ++dev) { // Find the matching device and request the kernel function - if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) { + if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) { IHIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, launch.func, dev)); break; } diff --git a/projects/clr/hipamd/src/hip_platform.cpp b/projects/clr/hipamd/src/hip_platform.cpp index 20d1316217..aecf506448 100644 --- a/projects/clr/hipamd/src/hip_platform.cpp +++ b/projects/clr/hipamd/src/hip_platform.cpp @@ -34,7 +34,7 @@ PlatformState* PlatformState::platform_; // Initiaized as nullptr by default // forward declaration of methods required for __hipRegisrterManagedVar hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0); hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - amd::HostQueue& queue, bool isAsync = false); + hip::Stream& stream, bool isAsync = false); struct __CudaFatBinaryWrapper { unsigned int magic; @@ -146,9 +146,9 @@ extern "C" void __hipRegisterManagedVar( HIP_INIT_VOID(); hipError_t status = ihipMallocManaged(pointer, size, align); if (status == hipSuccess) { - amd::HostQueue* queue = hip::getNullStream(); - if (queue != nullptr) { - status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue); + hip::Stream* stream = hip::getNullStream(); + if (stream != nullptr) { + status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *stream); guarantee((status == hipSuccess), "Error during memcpy to managed memory!"); } else { ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL"); diff --git a/projects/clr/hipamd/src/hip_stream.cpp b/projects/clr/hipamd/src/hip_stream.cpp index a0444334db..e6c2839a5b 100644 --- a/projects/clr/hipamd/src/hip_stream.cpp +++ b/projects/clr/hipamd/src/hip_stream.cpp @@ -31,7 +31,8 @@ namespace hip { // ================================================================================================ Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream, const std::vector& cuMask, hipStreamCaptureStatus captureStatus) - : queue_(nullptr), + : amd::HostQueue(*dev->asContext(), *dev->devices()[0], 0, amd::CommandQueue::RealTimeDisabled, + convertToQueuePriority(p), cuMask), lock_("Stream Callback lock"), device_(dev), priority_(p), @@ -40,18 +41,11 @@ Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream, cuMask_(cuMask), captureStatus_(captureStatus), originStream_(false), - captureID_(0) {} - -// ================================================================================================ -Stream::~Stream() { - if (queue_ != nullptr) { - amd::ScopedLock lock(streamSetLock); - streamSet.erase(this); - - queue_->release(); - queue_ = nullptr; - } -} + captureID_(0) + { + amd::ScopedLock lock(streamSetLock); + streamSet.insert(this); + } // ================================================================================================ hipError_t Stream::EndCapture() { @@ -77,38 +71,16 @@ hipError_t Stream::EndCapture() { // ================================================================================================ bool Stream::Create() { - amd::CommandQueue::Priority p; - switch (priority_) { - case Priority::High: - p = amd::CommandQueue::Priority::High; - break; - case Priority::Low: - p = amd::CommandQueue::Priority::Low; - break; - case Priority::Normal: - default: - p = amd::CommandQueue::Priority::Normal; - break; - } - amd::HostQueue* queue = new amd::HostQueue(*device_->asContext(), *device_->devices()[0], - 0, amd::CommandQueue::RealTimeDisabled, - p, cuMask_); + return create(); +} - // Create a host queue - bool result = (queue != nullptr) ? queue->create() : false; - // Insert just created stream into the list of the blocking queues - if (result) { +// ================================================================================================ +bool Stream::terminate() { + { amd::ScopedLock lock(streamSetLock); - streamSet.insert(this); - queue_ = queue; - device_->SaveQueue(queue); - } else if (queue != nullptr) { - // Queue creation has failed, and virtual device associated with the queue may not be created. - // Just need to delete the queue instance. - delete queue; + streamSet.erase(this); } - - return result; + return HostQueue::terminate(); } // ================================================================================================ @@ -130,29 +102,6 @@ bool isValid(hipStream_t& stream) { return true; } -// ================================================================================================ -amd::HostQueue* Stream::asHostQueue(bool skip_alloc) { - if (queue_ != nullptr) { - return queue_; - } - // Access to the stream object is lock protected, because possible allocation - amd::ScopedLock l(Lock()); - if (queue_ == nullptr) { - // Create the host queue for the first time - if (!skip_alloc) { - Create(); - } - } - return queue_; -} - -// ================================================================================================ -void Stream::Finish() const { - if (queue_ != nullptr) { - queue_->finish(); - } -} - // ================================================================================================ int Stream::DeviceId() const { return device_->deviceId(); @@ -176,7 +125,7 @@ void Stream::syncNonBlockingStreams(int deviceId) { for (auto& it : streamSet) { if (it->Flags() & hipStreamNonBlocking) { if (it->DeviceId() == deviceId) { - it->asHostQueue()->finish(); + it->finish(); } } } @@ -203,7 +152,7 @@ void Stream::destroyAllStreams(int deviceId) { } } for (auto& it : toBeDeleted) { - delete it; + it->release(); } } @@ -211,36 +160,48 @@ bool Stream::StreamCaptureOngoing(void) { return (g_allCapturingStreams.empty() == true) ? false : true; } +bool Stream::existsActiveStreamForDevice(hip::Device* device) { + + amd::ScopedLock lock(streamSetLock); + + for (const auto& active_stream : streamSet) { + if ((active_stream->GetDevice() == device) && + active_stream->GetQueueStatus()) { + return true; + } + } + return false; +} + };// hip namespace // ================================================================================================ -void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) { +void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream) { amd::Command::EventWaitList eventWaitList(0); bool submitMarker = 0; { amd::ScopedLock lock(streamSetLock); - for (const auto& stream : streamSet) { - amd::HostQueue* active_queue = stream->asHostQueue(); + for (const auto& active_stream : streamSet) { // If it's the current device - if ((&active_queue->device() == &blocking_queue->device()) && + if ((&active_stream->device() == &blocking_stream->device()) && // Make sure it's a default stream - ((stream->Flags() & hipStreamNonBlocking) == 0) && + ((active_stream->Flags() & hipStreamNonBlocking) == 0) && // and it's not the current stream - (active_queue != blocking_queue) && + (active_stream != blocking_stream) && // check for a wait on the null stream - (stream->Null() == wait_null_stream)) { + (active_stream->Null() == wait_null_stream)) { // Get the last valid command - amd::Command* command = active_queue->getLastQueuedCommand(true); + amd::Command* command = active_stream->getLastQueuedCommand(true); if (command != nullptr) { amd::Event& event = command->event(); // Check HW status of the ROCcrl event. // Note: not all ROCclr modes support HW status - bool ready = active_queue->device().IsHwEventReady(event); + bool ready = active_stream->device().IsHwEventReady(event); if (!ready) { ready = (command->status() == CL_COMPLETE); } - submitMarker |= active_queue->vdev()->isFenceDirty(); + submitMarker |= active_stream->vdev()->isFenceDirty(); // Check the current active status if (!ready) { command->notifyCmdQueue(); @@ -259,7 +220,7 @@ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream // Check if we have to wait anything if (eventWaitList.size() > 0 || submitMarker) { - amd::Command* command = new amd::Marker(*blocking_queue, kMarkerDisableFlush, eventWaitList); + amd::Command* command = new amd::Marker(*blocking_stream, kMarkerDisableFlush, eventWaitList); if (command != nullptr) { command->enqueue(); command->release(); @@ -288,8 +249,11 @@ static hipError_t ihipStreamCreate(hipStream_t* stream, } hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags, false, cuMask); - if (hStream == nullptr || !hStream->Create()) { - delete hStream; + if (hStream == nullptr) { + return hipErrorOutOfMemory; + } + else if (!hStream->Create()) { + hStream->release(); return hipErrorOutOfMemory; } @@ -310,7 +274,7 @@ stream_per_thread::stream_per_thread() { stream_per_thread::~stream_per_thread() { for (auto &stream:m_streams) { if (stream != nullptr && hip::isValid(stream)) { - delete reinterpret_cast(stream); + reinterpret_cast(stream)->release(); stream = nullptr; } } @@ -449,7 +413,7 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) { } } // Wait for the current host queue - hip::getQueue(stream)->finish(); + hip::getStream(stream)->finish(); return hipSuccess; } @@ -498,7 +462,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) { if (l_it != hip::tls.capture_streams_.end()) { hip::tls.capture_streams_.erase(l_it); } - delete s; + s->release(); HIP_RETURN(hipSuccess); } @@ -564,9 +528,9 @@ hipError_t hipStreamQuery_common(hipStream_t stream) { HIP_RETURN(hipErrorStreamCaptureUnsupported); } } - amd::HostQueue* hostQueue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); - amd::Command* command = hostQueue->getLastQueuedCommand(true); + amd::Command* command = hip_stream->getLastQueuedCommand(true); if (command == nullptr) { // Nothing was submitted to the queue return hipSuccess; @@ -604,13 +568,13 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void* return hipErrorContextIsDestroyed; } - amd::HostQueue* hostQueue = hip::getQueue(stream); - amd::Command* last_command = hostQueue->getLastQueuedCommand(true); + hip::Stream* hip_stream = hip::getStream(stream); + amd::Command* last_command = hip_stream->getLastQueuedCommand(true); amd::Command::EventWaitList eventWaitList; if (last_command != nullptr) { eventWaitList.push_back(last_command); } - amd::Command* command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList); + amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList); if (command == nullptr) { return hipErrorInvalidValue; } @@ -630,7 +594,7 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void* // Add the new barrier to stall the stream, until the callback is done eventWaitList.clear(); eventWaitList.push_back(command); - amd::Command* block_command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList); + amd::Command* block_command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList); if (block_command == nullptr) { return hipErrorInvalidValue; } diff --git a/projects/clr/hipamd/src/hip_stream_ops.cpp b/projects/clr/hipamd/src/hip_stream_ops.cpp index a3bed6cf53..7032c4c65d 100644 --- a/projects/clr/hipamd/src/hip_stream_ops.cpp +++ b/projects/clr/hipamd/src/hip_stream_ops.cpp @@ -69,11 +69,11 @@ hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void return hipErrorInvalidValue; } - amd::HostQueue* queue = hip::getQueue(stream); + hip::Stream* hip_stream = hip::getStream(stream); amd::Command::EventWaitList waitList; amd::StreamOperationCommand* command = - new amd::StreamOperationCommand(*queue, cmdType, waitList, *memory->asBuffer(), + new amd::StreamOperationCommand(*hip_stream, cmdType, waitList, *memory->asBuffer(), value, mask, outFlags, offset, sizeBytes); if (command == nullptr) { diff --git a/projects/clr/hipamd/src/hip_texture.cpp b/projects/clr/hipamd/src/hip_texture.cpp index 610d93fe46..9fead8d84a 100644 --- a/projects/clr/hipamd/src/hip_texture.cpp +++ b/projects/clr/hipamd/src/hip_texture.cpp @@ -26,7 +26,7 @@ #include "platform/sampler.hpp" hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind, - amd::HostQueue& queue, bool isAsync = false); + hip::Stream& stream, bool isAsync = false); hipError_t ihipFree(void* ptr); @@ -575,8 +575,8 @@ hipError_t hipBindTexture2D(size_t* offset, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t ihipBindTextureToArray(const textureReference* texref, @@ -624,8 +624,8 @@ hipError_t hipBindTextureToArray(const textureReference* texref, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref, @@ -674,8 +674,8 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t hipUnbindTexture(const textureReference* texref) { @@ -701,8 +701,8 @@ hipError_t hipBindTexture(size_t* offset, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc, @@ -966,8 +966,8 @@ hipError_t hipTexRefSetArray(textureReference* texRef, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr, @@ -1049,8 +1049,8 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t hipTexRefSetAddress2D(textureReference* texRef, @@ -1091,8 +1091,8 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); } hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) { @@ -1454,8 +1454,8 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef, HIP_RETURN(err); } // Copy to device. - amd::HostQueue* queue = hip::getNullStream(); - HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue)); + hip::Stream* stream = hip::getNullStream(); + HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream)); } hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject,