SWDEV-381402 - Derive hip::Stream from amd::HostQueue

Change-Id: I6c1aca5eb350c32d974ae4ffcc725705355956d8
Этот коммит содержится в:
Ioannis Assiouras
2023-02-08 20:18:11 +00:00
родитель aaf1c19609
Коммит e3633dc8f4
19 изменённых файлов: 403 добавлений и 732 удалений
+8 -8
Просмотреть файл
@@ -32,7 +32,7 @@ THE SOFTWARE.
#include <elf/elf.hpp>
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
hip::Stream& stream, bool isAsync = false);
hipError_t ihipFree(void* ptr);
// forward declaration of methods required for managed variables
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
@@ -635,10 +635,10 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) {
it->second->setManagedVarInfo(pointer, dvar->size());
// copy initial value to the managed variable to the managed memory allocated
amd::HostQueue* queue = hip::getNullStream();
if (queue != nullptr) {
hip::Stream* stream = hip::getNullStream();
if (stream != nullptr) {
status = ihipMemcpy(pointer, reinterpret_cast<address>(dvar->device_ptr()), dvar->size(),
hipMemcpyDeviceToDevice, *queue);
hipMemcpyDeviceToDevice, *stream);
if (status != hipSuccess) {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status,
managedVar.c_str());
@@ -658,7 +658,7 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) {
}
// copy managed memory pointer to the managed device variable
status = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), &pointer, dvar->size(),
hipMemcpyHostToDevice, *queue);
hipMemcpyHostToDevice, *stream);
if (status != hipSuccess) {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status,
managedVar.c_str());
@@ -895,10 +895,10 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
DeviceVar* dvar = nullptr;
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
amd::HostQueue* queue = g_devices.at(deviceId)->NullStream();
if (queue != nullptr) {
hip::Stream* stream = g_devices.at(deviceId)->NullStream();
if (stream != nullptr) {
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
dvar->size(), hipMemcpyHostToDevice, *queue);
dvar->size(), hipMemcpyHostToDevice, *stream);
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
return hipErrorInvalidResourceHandle;
+7 -7
Просмотреть файл
@@ -91,21 +91,21 @@ void setCurrentDevice(unsigned int index) {
amd::Os::setPreferredNumaNode(preferredNumaNode);
}
amd::HostQueue* getQueue(hipStream_t stream) {
hip::Stream* getStream(hipStream_t stream) {
if (stream == nullptr) {
return getNullStream();
} else {
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
if (!(reinterpret_cast<hip::Stream*>(stream)->Flags() & hipStreamNonBlocking)) {
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(stream);
if (!(hip_stream->Flags() & hipStreamNonBlocking)) {
constexpr bool WaitNullStreamOnly = true;
iHipWaitActiveStreams(queue, WaitNullStreamOnly);
iHipWaitActiveStreams(hip_stream, WaitNullStreamOnly);
}
return queue;
return hip_stream;
}
}
// ================================================================================================
amd::HostQueue* getNullStream(amd::Context& ctx) {
hip::Stream* getNullStream(amd::Context& ctx) {
for (auto& it : g_devices) {
if (it->asContext() == &ctx) {
return it->NullStream();
@@ -131,7 +131,7 @@ int getDeviceID(amd::Context& ctx) {
}
// ================================================================================================
amd::HostQueue* getNullStream() {
hip::Stream* getNullStream() {
Device* device = getCurrentDevice();
return device ? device->NullStream() : nullptr;
}
+20 -10
Просмотреть файл
@@ -26,25 +26,31 @@
namespace hip {
// ================================================================================================
amd::HostQueue* Device::NullStream(bool skip_alloc) {
amd::HostQueue* null_queue = null_stream_.asHostQueue(skip_alloc);
if (null_queue == nullptr) {
hip::Stream* Device::NullStream(bool skip_alloc) {
if (null_stream_ == nullptr && !skip_alloc) {
null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true);
}
if (null_stream_ == nullptr) {
return nullptr;
}
// Wait for all active streams before executing commands on the default
iHipWaitActiveStreams(null_queue);
return null_queue;
iHipWaitActiveStreams(null_stream_);
return null_stream_;
}
// ================================================================================================
Stream* Device::GetNullStream() {
amd::HostQueue* null_queue = null_stream_.asHostQueue();
if (null_queue == nullptr) {
hip::Stream* Device::GetNullStream() {
if (null_stream_ == nullptr) {
null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true);
}
if (null_stream_ == nullptr) {
return nullptr;
}
// Wait for all active streams before executing commands on the default
iHipWaitActiveStreams(null_queue);
return &null_stream_;
iHipWaitActiveStreams(null_stream_);
return null_stream_;
}
// ================================================================================================
@@ -128,6 +134,10 @@ Device::~Device() {
if (default_mem_pool_ != nullptr) {
default_mem_pool_->release();
}
if (null_stream_!= nullptr) {
delete null_stream_;
}
}
}
+3 -3
Просмотреть файл
@@ -512,9 +512,9 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
hipError_t hipDeviceSynchronize ( void ) {
HIP_INIT_API(hipDeviceSynchronize);
amd::HostQueue* queue = hip::getNullStream();
hip::Stream* stream = hip::getNullStream();
if (!queue) {
if (!stream) {
HIP_RETURN(hipErrorOutOfMemory);
}
@@ -522,7 +522,7 @@ hipError_t hipDeviceSynchronize ( void ) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
queue->finish();
stream->finish();
hip::Stream::syncNonBlockingStreams(hip::getCurrentDevice()->deviceId());
+11 -11
Просмотреть файл
@@ -177,12 +177,12 @@ int64_t EventDD::time(bool getStartTs) const {
}
}
hipError_t Event::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) {
hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) {
amd::Command::EventWaitList eventWaitList;
if (event_ != nullptr) {
eventWaitList.push_back(event_);
}
command = new amd::Marker(*queue, kMarkerDisableFlush, eventWaitList);
command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList);
if (command == NULL) {
return hipErrorOutOfMemory;
@@ -196,17 +196,17 @@ hipError_t Event::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* com
}
hipError_t Event::streamWait(hipStream_t stream, uint flags) {
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
// Access to event_ object must be lock protected
amd::ScopedLock lock(lock_);
if ((event_ == nullptr) || (event_->command().queue() == queue) || ready()) {
if ((event_ == nullptr) || (event_->command().queue() == hip_stream) || ready()) {
return hipSuccess;
}
if (!event_->notifyCmdQueue()) {
return hipErrorLaunchOutOfResources;
}
amd::Command* command;
hipError_t status = streamWaitCommand(command, queue);
hipError_t status = streamWaitCommand(command, hip_stream);
if (status != hipSuccess) {
return status;
}
@@ -218,7 +218,7 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) {
return hipSuccess;
}
hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue,
hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream,
uint32_t ext_flags ) {
if (command == nullptr) {
int32_t releaseFlags = ((ext_flags == 0) ? flags : ext_flags) &
@@ -231,7 +231,7 @@ hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue,
releaseFlags = amd::Device::kCacheStateIgnore;
}
// Always submit a EventMarker.
command = new hip::EventMarker(*queue, !kMarkerDisableFlush, true, releaseFlags);
command = new hip::EventMarker(*stream, !kMarkerDisableFlush, true, releaseFlags);
}
return hipSuccess;
}
@@ -249,10 +249,10 @@ hipError_t Event::enqueueRecordCommand(hipStream_t stream, amd::Command* command
}
hipError_t Event::addMarker(hipStream_t stream, amd::Command* command, bool record) {
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
// Keep the lock always at the beginning of this to avoid a race. SWDEV-277847
amd::ScopedLock lock(lock_);
hipError_t status = recordCommand(command, queue);
hipError_t status = recordCommand(command, hip_stream);
if (status != hipSuccess) {
return hipSuccess;
}
@@ -379,8 +379,8 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream) {
return hipErrorInvalidHandle;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event);
amd::HostQueue* queue = hip::getQueue(stream);
if (g_devices[e->deviceId()]->devices()[0] != &queue->device()) {
hip::Stream* hip_stream = hip::getStream(stream);
if (g_devices[e->deviceId()]->devices()[0] != &hip_stream->device()) {
return hipErrorInvalidHandle;
}
return e->addMarker(stream, nullptr, true);
+6 -6
Просмотреть файл
@@ -78,9 +78,9 @@ typedef struct ihipIpcEventShmem_s {
class EventMarker : public amd::Marker {
public:
EventMarker(amd::HostQueue& queue, bool disableFlush, bool markerTs = false,
EventMarker(amd::HostQueue& stream, bool disableFlush, bool markerTs = false,
int32_t scope = amd::Device::kCacheStateInvalid)
: amd::Marker(queue, disableFlush) {
: amd::Marker(stream, disableFlush) {
profilingInfo_.enabled_ = true;
profilingInfo_.callback_ = nullptr;
profilingInfo_.marker_ts_ = markerTs;
@@ -116,11 +116,11 @@ class Event {
virtual hipError_t synchronize();
hipError_t elapsedTime(Event& eStop, float& ms);
virtual hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue);
virtual hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream);
virtual hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command);
virtual hipError_t streamWait(hipStream_t stream, uint flags);
virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* queue,
virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* stream,
uint32_t flags = 0);
virtual hipError_t enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record);
hipError_t addMarker(hipStream_t stream, amd::Command* command, bool record);
@@ -175,7 +175,7 @@ class Event {
protected:
amd::Monitor lock_;
amd::HostQueue* stream_;
hip::Stream* stream_;
amd::Event* event_;
int device_id_;
//! Flag to indicate hipEventRecord has not been called. This is needed for
@@ -224,7 +224,7 @@ class IPCEvent : public Event {
hipError_t synchronize();
hipError_t query();
hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue);
hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream);
hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command);
hipError_t streamWait(hipStream_t stream, uint flags);
+7 -8
Просмотреть файл
@@ -102,8 +102,8 @@ hipError_t IPCEvent::synchronize() {
return hipSuccess;
}
hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) {
command = new amd::Marker(*queue, false);
hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, hip::Stream* stream) {
command = new amd::Marker(*stream, false);
if (command == NULL) {
return hipErrorOutOfMemory;
}
@@ -125,12 +125,12 @@ hipError_t IPCEvent::enqueueStreamWaitCommand(hipStream_t stream, amd::Command*
}
hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) {
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
amd::ScopedLock lock(lock_);
if(query() != hipSuccess) {
amd::Command* command;
hipError_t status = streamWaitCommand(command, queue);
hipError_t status = streamWaitCommand(command, hip_stream);
if (status != hipSuccess) {
return status;
}
@@ -140,18 +140,17 @@ hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) {
return hipSuccess;
}
hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* queue, uint32_t flags) {
hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t flags) {
bool unrecorded = isUnRecorded();
if (unrecorded) {
command = new amd::Marker(*queue, kMarkerDisableFlush);
command = new amd::Marker(*stream, kMarkerDisableFlush);
} else {
return Event::recordCommand(command, queue);
return Event::recordCommand(command, stream);
}
return hipSuccess;
}
hipError_t IPCEvent::enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record) {
amd::HostQueue* queue = hip::getQueue(stream);
bool unrecorded = isUnRecorded();
if (unrecorded) {
amd::Event& tEvent = command->event();
+8 -10
Просмотреть файл
@@ -637,13 +637,12 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources,
HIP_RETURN(hipErrorUnknown);
}
amd::HostQueue* queue = hip::getQueue(stream);
if (nullptr == queue) {
hip::Stream* hip_stream = hip::getStream(stream);
if (nullptr == hip_stream) {
HIP_RETURN(hipErrorUnknown);
}
amd::HostQueue& hostQueue = *queue;
if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) {
if (!hip_stream->context().glenv() || !hip_stream->context().glenv()->isAssociated()) {
LogWarning("\"amdContext\" is not created from GL context or share list");
HIP_RETURN(hipErrorUnknown);
}
@@ -658,7 +657,7 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources,
//! Now create command and enqueue
amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand(
hostQueue, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS);
*hip_stream, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS);
if (command == nullptr) {
HIP_RETURN(hipErrorUnknown);
}
@@ -712,13 +711,12 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources
}
// Wait for the current host queue
hip::getQueue(stream)->finish();
hip::getStream(stream)->finish();
amd::HostQueue* queue = hip::getQueue(stream);
if (nullptr == queue) {
hip::Stream* hip_stream = hip::getStream(stream);
if (nullptr == hip_stream) {
HIP_RETURN(hipErrorUnknown);
}
amd::HostQueue& hostQueue = *queue;
std::vector<amd::Memory*> memObjects;
hipError_t err = hipSetInteropObjects(count, reinterpret_cast<void**>(resources), memObjects);
@@ -730,7 +728,7 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources
// Now create command and enqueue
amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand(
hostQueue, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS);
*hip_stream, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS);
if (command == nullptr) {
HIP_RETURN(hipErrorUnknown);
}
+6 -6
Просмотреть файл
@@ -5,9 +5,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p);
hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false);
hipMemcpyKind kind, hip::Stream& stream, bool isAsync = false);
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue);
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream);
bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind);
@@ -26,19 +26,19 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY,
uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, uint32_t sharedMemBytes,
amd::HostQueue* queue, void** kernelParams, void** extra,
hip::Stream* stream, void** kernelParams, void** extra,
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags,
uint32_t params, uint32_t gridId, uint32_t numGrids,
uint64_t prevGridSum, uint64_t allGridSum, uint32_t firstDevice);
hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p,
amd::HostQueue* queue);
hip::Stream* stream);
hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, int64_t value,
size_t valueSize, size_t sizeBytes, amd::HostQueue* queue);
size_t valueSize, size_t sizeBytes, hip::Stream* stream);
hipError_t ihipMemset3DCommand(std::vector<amd::Command*>& commands, hipPitchedPtr pitchedDevPtr,
int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1);
int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1);
hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset,
size_t& sym_size, hipDeviceptr_t& device_ptr);
+8 -235
Просмотреть файл
@@ -98,56 +98,6 @@ hipError_t hipGraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size
return hipSuccess;
}
hipError_t hipGraphMemcpyNode1D::SetCommandParams(void* dst, const void* src, size_t count,
hipMemcpyKind kind) {
hipError_t status = ihipMemcpy_validate(dst, src, count, kind);
if (status != hipSuccess) {
return status;
}
size_t sOffsetOrig = 0;
amd::Memory* origSrcMemory = getMemoryObject(src, sOffsetOrig);
size_t dOffsetOrig = 0;
amd::Memory* origDstMemory = getMemoryObject(dst, dOffsetOrig);
size_t sOffset = 0;
amd::Memory* srcMemory = getMemoryObject(src, sOffset);
size_t dOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) {
return hipErrorInvalidValue;
}
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
command->setParams(*dstMemory->asBuffer(), dOffset, count, src);
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) {
return hipErrorInvalidValue;
}
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
command->setParams(*srcMemory->asBuffer(), sOffset, count, dst);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) {
return hipErrorInvalidValue;
}
if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) {
return hipErrorInvalidValue;
}
amd::CopyMemoryP2PCommand* command = reinterpret_cast<amd::CopyMemoryP2PCommand*>(commands_[0]);
command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count);
// Make sure runtime has valid memory for the command execution. P2P access
// requires page table mapping on the current device to another GPU memory
if (!static_cast<amd::CopyMemoryP2PCommand*>(command)->validateMemory()) {
delete command;
return hipErrorInvalidValue;
}
} else {
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count);
}
return hipSuccess;
}
hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParams) {
hipError_t status = ihipMemcpy3D_validate(pNodeParams);
if (status != hipSuccess) {
@@ -297,185 +247,6 @@ hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParam
return hipSuccess;
}
hipError_t hipGraphMemcpyNode::SetCommandParams(const hipMemcpy3DParms* pNodeParams) {
hipError_t status = ihipMemcpy3D_validate(pNodeParams);
if (status != hipSuccess) {
return status;
}
const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pNodeParams);
// If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the
// (unified virtual address space) base address of the source data and the bytes per row to apply.
// {src/dst}Array is ignored.
hipMemoryType srcMemoryType = pCopy.srcMemoryType;
if (srcMemoryType == hipMemoryTypeUnified) {
srcMemoryType =
amd::MemObjMap::FindMemObj(pCopy.srcDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost;
if (srcMemoryType == hipMemoryTypeHost) {
// {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system
// memory.
const_cast<HIP_MEMCPY3D*>(&pCopy)->srcHost = pCopy.srcDevice;
}
}
hipMemoryType dstMemoryType = pCopy.dstMemoryType;
if (dstMemoryType == hipMemoryTypeUnified) {
dstMemoryType =
amd::MemObjMap::FindMemObj(pCopy.dstDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost;
if (srcMemoryType == hipMemoryTypeHost) {
const_cast<HIP_MEMCPY3D*>(&pCopy)->dstHost = pCopy.dstDevice;
}
}
// If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned.
// In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning.
if (srcMemoryType == hipMemoryTypeHost) {
amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.srcHost);
srcMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost;
if (srcMemoryType == hipMemoryTypeDevice) {
const_cast<HIP_MEMCPY3D*>(&pCopy)->srcDevice = const_cast<void*>(pCopy.srcHost);
}
}
if (dstMemoryType == hipMemoryTypeHost) {
amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.dstHost);
dstMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost;
if (dstMemoryType == hipMemoryTypeDevice) {
const_cast<HIP_MEMCPY3D*>(&pCopy)->dstDevice = const_cast<void*>(pCopy.dstDevice);
}
}
amd::Coord3D srcOrigin = {pCopy.srcXInBytes, pCopy.srcY, pCopy.srcZ};
amd::Coord3D dstOrigin = {pCopy.dstXInBytes, pCopy.dstY, pCopy.dstZ};
amd::Coord3D copyRegion = {pCopy.WidthInBytes, pCopy.Height, pCopy.Depth};
if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) {
// Host to Device.
amd::Memory* dstMemory;
amd::BufferRect srcRect;
amd::BufferRect dstRect;
status =
ihipMemcpyHtoDValidate(pCopy.srcHost, pCopy.dstDevice, srcOrigin, dstOrigin, copyRegion,
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch,
pCopy.dstPitch * pCopy.dstHeight, dstMemory, srcRect, dstRect);
if (status != hipSuccess) {
return status;
}
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
command->setParams(*dstMemory, {dstRect.start_, 0, 0}, copyRegion, pCopy.srcHost, dstRect,
srcRect);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) {
// Device to Host.
amd::Memory* srcMemory;
amd::BufferRect srcRect;
amd::BufferRect dstRect;
status =
ihipMemcpyDtoHValidate(pCopy.srcDevice, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion,
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch,
pCopy.dstPitch * pCopy.dstHeight, srcMemory, srcRect, dstRect);
if (status != hipSuccess) {
return status;
}
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
command->setParams(*srcMemory, {srcRect.start_, 0, 0}, copyRegion, pCopy.dstHost, srcRect,
dstRect);
command->setSource(*srcMemory);
command->setOrigin({srcRect.start_, 0, 0});
command->setSize(copyRegion);
command->setDestination(pCopy.dstHost);
command->setBufRect(srcRect);
command->setHostRect(dstRect);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) {
// Device to Device.
amd::Memory* srcMemory;
amd::Memory* dstMemory;
amd::BufferRect srcRect;
amd::BufferRect dstRect;
status = ihipMemcpyDtoDValidate(pCopy.srcDevice, pCopy.dstDevice, srcOrigin, dstOrigin,
copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight,
pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcMemory,
dstMemory, srcRect, dstRect);
if (status != hipSuccess) {
return status;
}
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
command->setParams(*srcMemory, *dstMemory, {srcRect.start_, 0, 0}, {dstRect.start_, 0, 0},
copyRegion, srcRect, dstRect);
} else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) {
amd::Image* dstImage;
amd::BufferRect srcRect;
status =
ihipMemcpyHtoAValidate(pCopy.srcHost, pCopy.dstArray, srcOrigin, dstOrigin, copyRegion,
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, dstImage, srcRect);
if (status != hipSuccess) {
return status;
}
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
command->setParams(*dstImage, dstOrigin, copyRegion,
static_cast<const char*>(pCopy.srcHost) + srcRect.start_, pCopy.srcPitch,
pCopy.srcPitch * pCopy.srcHeight);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) {
// Image to Host.
amd::Image* srcImage;
amd::BufferRect dstRect;
status =
ihipMemcpyAtoHValidate(pCopy.srcArray, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion,
pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcImage, dstRect);
if (status != hipSuccess) {
return status;
}
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
command->setParams(*srcImage, srcOrigin, copyRegion,
static_cast<char*>(pCopy.dstHost) + dstRect.start_, pCopy.dstPitch,
pCopy.dstPitch * pCopy.dstHeight);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) {
// Device to Image.
amd::Image* dstImage;
amd::Memory* srcMemory;
amd::BufferRect dstRect;
amd::BufferRect srcRect;
status = ihipMemcpyDtoAValidate(pCopy.srcDevice, pCopy.dstArray, srcOrigin, dstOrigin,
copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight,
dstImage, srcMemory, dstRect, srcRect);
if (status != hipSuccess) {
return status;
}
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
command->setParams(*srcMemory, *dstImage, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) {
// Image to Device.
amd::BufferRect srcRect;
amd::BufferRect dstRect;
amd::Memory* dstMemory;
amd::Image* srcImage;
status = ihipMemcpyAtoDValidate(pCopy.srcArray, pCopy.dstDevice, srcOrigin, dstOrigin,
copyRegion, pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight,
dstMemory, srcImage, srcRect, dstRect);
if (status != hipSuccess) {
return status;
}
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
command->setParams(*srcImage, *dstMemory, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) {
amd::Image* srcImage;
amd::Image* dstImage;
status = ihipMemcpyAtoAValidate(pCopy.srcArray, pCopy.dstArray, srcOrigin, dstOrigin,
copyRegion, srcImage, dstImage);
if (status != hipSuccess) {
return status;
}
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
command->setParams(*srcImage, *dstImage, srcOrigin, dstOrigin, copyRegion);
} else {
return hipErrorInvalidValue;
}
return hipSuccess;
}
bool ihipGraph::isGraphValid(ihipGraph* pGraph) {
amd::ScopedLock lock(graphSetLock_);
if (graphSet_.find(pGraph) == graphSet_.end()) {
@@ -685,7 +456,9 @@ hipError_t hipGraphExec::CreateStreams(uint32_t num_streams) {
auto stream = new hip::Stream(hip::getCurrentDevice(),
hip::Stream::Priority::Normal, hipStreamNonBlocking);
if (stream == nullptr || !stream->Create()) {
delete stream;
if (stream != nullptr) {
stream->release();
}
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to create parallel stream!\n");
return hipErrorOutOfMemory;
}
@@ -708,7 +481,7 @@ hipError_t hipGraphExec::Init() {
hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
std::unordered_map<Node, std::vector<Node>>& nodeWaitLists,
std::vector<Node>& levelOrder, std::vector<amd::Command*>& rootCommands,
amd::Command*& endCommand, amd::HostQueue* queue) {
amd::Command*& endCommand, hip::Stream* stream) {
hipError_t status;
for (auto& node : levelOrder) {
// TODO: clone commands from next launch
@@ -758,7 +531,7 @@ hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
}
}
if (!graphLastCmdWaitList.empty()) {
endCommand = new amd::Marker(*queue, false, graphLastCmdWaitList);
endCommand = new amd::Marker(*stream, false, graphLastCmdWaitList);
if (endCommand == nullptr) {
return hipErrorOutOfMemory;
}
@@ -787,8 +560,8 @@ void UpdateStream(std::vector<std::vector<Node>>& parallelLists, hip::Stream* st
hipError_t hipGraphExec::Run(hipStream_t stream) {
hipError_t status;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
if (hip::getStream(stream) == nullptr) {
return hipErrorInvalidResourceHandle;
}
if (flags_ == hipGraphInstantiateFlagAutoFreeOnLaunch) {
@@ -802,7 +575,7 @@ hipError_t hipGraphExec::Run(hipStream_t stream) {
std::vector<amd::Command*> rootCommands;
amd::Command* endCommand = nullptr;
status =
FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, queue);
FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, hip_stream);
if (status != hipSuccess) {
return status;
}
+56 -123
Просмотреть файл
@@ -38,7 +38,7 @@ typedef hipGraphNode* Node;
hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
std::unordered_map<Node, std::vector<Node>>& nodeWaitLists,
std::vector<Node>& levelOrder, std::vector<amd::Command*>& rootCommands,
amd::Command*& endCommand, amd::HostQueue* queue);
amd::Command*& endCommand, hip::Stream* stream);
void UpdateStream(std::vector<std::vector<Node>>& parallelLists, hip::Stream* stream,
hipGraphExec* ptr);
@@ -155,7 +155,6 @@ struct hipGraphNodeDOTAttribute {
struct hipGraphNode : public hipGraphNodeDOTAttribute {
protected:
hip::Stream* stream_ = nullptr;
amd::HostQueue* queue_;
uint32_t level_;
unsigned int id_;
hipGraphNodeType type_;
@@ -222,16 +221,15 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute {
return true;
}
amd::HostQueue* GetQueue() { return queue_; }
hip::Stream* GetQueue() { return stream_; }
virtual void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) {
stream_ = stream;
queue_ = stream->asHostQueue();
}
/// Create amd::command for the graph node
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
virtual hipError_t CreateCommand(hip::Stream* stream) {
commands_.clear();
queue_ = queue;
stream_ = stream;
return hipSuccess;
}
/// Return node unique ID
@@ -350,8 +348,8 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute {
(type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy ||
type_ == hipGraphNodeTypeMemset)) {
amd::Command::EventWaitList waitList;
amd::HostQueue* queue = hip::getQueue(stream);
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
command->enqueue();
command->release();
return;
@@ -575,7 +573,9 @@ struct hipGraphExec {
// new commands are launched for every launch they are destroyed as and when command is
// terminated after it complete execution
for (auto stream : parallel_streams_) {
delete stream;
if (stream != nullptr) {
stream->release();
}
}
for (auto it = clonedNodes_.begin(); it != clonedNodes_.end(); it++) delete it->second;
amd::ScopedLock lock(graphExecSetLock_);
@@ -645,7 +645,6 @@ struct hipChildGraphNode : public hipGraphNode {
void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) {
stream_ = stream;
queue_ = stream->asHostQueue();
UpdateStream(parallelLists_, stream, ptr);
}
@@ -654,8 +653,8 @@ struct hipChildGraphNode : public hipGraphNode {
std::vector<amd::Command*>& GetCommands() { return parallelLists_[0].back()->GetCommands(); }
// Create child graph node commands and set waitlists
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
@@ -663,7 +662,7 @@ struct hipChildGraphNode : public hipGraphNode {
std::vector<amd::Command*> rootCommands;
amd::Command* endCommand = nullptr;
status = FillCommands(parallelLists_, nodeWaitLists_, childGraphlevelOrder_, rootCommands,
endCommand, queue);
endCommand, stream);
for (auto& cmd : rootCommands) {
commands_.push_back(cmd);
}
@@ -933,14 +932,14 @@ class hipGraphKernelNode : public hipGraphNode {
return new hipGraphKernelNode(static_cast<hipGraphKernelNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t CreateCommand(hip::Stream* stream) {
hipFunction_t func = nullptr;
hipError_t status = validateKernelParams(pKernelParams_, &func,
queue ? hip::getDeviceID(queue->context()) : -1);
stream ? hip::getDeviceID(stream->context()) : -1);
if (hipSuccess != status) {
return status;
}
status = hipGraphNode::CreateCommand(queue);
status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
@@ -951,7 +950,7 @@ class hipGraphKernelNode : public hipGraphNode {
pKernelParams_->gridDim.y * pKernelParams_->blockDim.y,
pKernelParams_->gridDim.z * pKernelParams_->blockDim.z, pKernelParams_->blockDim.x,
pKernelParams_->blockDim.y, pKernelParams_->blockDim.z, pKernelParams_->sharedMemBytes,
queue, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0,
stream, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0,
0, 0);
commands_.emplace_back(command);
return status;
@@ -1044,22 +1043,6 @@ class hipGraphKernelNode : public hipGraphNode {
}
return hipSuccess;
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(const hipKernelNodeParams* params) {
// updates kernel params
hipError_t status = validateKernelParams(params);
if (hipSuccess != status) {
return status;
}
size_t globalWorkOffset[3] = {0};
size_t globalWorkSize[3] = {params->gridDim.x, params->gridDim.y, params->gridDim.z};
size_t localWorkSize[3] = {params->blockDim.x, params->blockDim.y, params->blockDim.z};
reinterpret_cast<amd::NDRangeKernelCommand*>(commands_[0])
->setSizes(globalWorkOffset, globalWorkSize, localWorkSize);
reinterpret_cast<amd::NDRangeKernelCommand*>(commands_[0])
->setSharedMemBytes(params->sharedMemBytes);
return hipSuccess;
}
hipError_t SetParams(hipGraphNode* node) {
const hipGraphKernelNode* kernelNode = static_cast<hipGraphKernelNode const*>(node);
@@ -1110,17 +1093,17 @@ class hipGraphMemcpyNode : public hipGraphNode {
return new hipGraphMemcpyNode(static_cast<hipGraphMemcpyNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t CreateCommand(hip::Stream* stream) {
if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
return hipSuccess;
}
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
commands_.reserve(1);
amd::Command* command;
status = ihipMemcpy3DCommand(command, pCopyParams_, queue);
status = ihipMemcpy3DCommand(command, pCopyParams_, stream);
commands_.emplace_back(command);
return status;
}
@@ -1129,7 +1112,7 @@ class hipGraphMemcpyNode : public hipGraphNode {
if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr,
pCopyParams_->extent.width * pCopyParams_->extent.height *
pCopyParams_->extent.depth, *hip::getQueue(stream));
pCopyParams_->extent.depth, *hip::getStream(stream));
return;
}
hipGraphNode::EnqueueCommands(stream);
@@ -1150,8 +1133,6 @@ class hipGraphMemcpyNode : public hipGraphNode {
const hipGraphMemcpyNode* memcpyNode = static_cast<hipGraphMemcpyNode const*>(node);
return SetParams(memcpyNode->pCopyParams_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(const hipMemcpy3DParms* pNodeParams);
hipError_t ValidateParams(const hipMemcpy3DParms* pNodeParams);
std::string GetLabel(hipGraphDebugDotFlags flag) {
const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pCopyParams_);
@@ -1256,17 +1237,17 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
return new hipGraphMemcpyNode1D(static_cast<hipGraphMemcpyNode1D const&>(*this));
}
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
virtual hipError_t CreateCommand(hip::Stream* stream) {
if (IsHtoHMemcpy(dst_, src_, kind_)) {
return hipSuccess;
}
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
commands_.reserve(1);
amd::Command* command = nullptr;
status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *queue);
status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream);
commands_.emplace_back(command);
return status;
}
@@ -1281,14 +1262,14 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
if (isEnabled_) {
//HtoH
if (isH2H) {
ihipHtoHMemcpy(dst_, src_, count_, *hip::getQueue(stream));
ihipHtoHMemcpy(dst_, src_, count_, *hip::getStream(stream));
return;
}
amd::Command* command = commands_[0];
amd::HostQueue* cmdQueue = command->queue();
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
if (cmdQueue == queue) {
if (cmdQueue == hip_stream) {
command->enqueue();
command->release();
return;
@@ -1296,7 +1277,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
amd::Command::EventWaitList waitList;
amd::Command* depdentMarker = nullptr;
amd::Command* cmd = queue->getLastQueuedCommand(true);
amd::Command* cmd = hip_stream->getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
amd::Command* depdentMarker = new amd::Marker(*cmdQueue, true, waitList);
@@ -1313,7 +1294,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
if (cmd != nullptr) {
waitList.clear();
waitList.push_back(cmd);
amd::Command* depdentMarker = new amd::Marker(*queue, true, waitList);
amd::Command* depdentMarker = new amd::Marker(*hip_stream, true, waitList);
if (depdentMarker != nullptr) {
depdentMarker->enqueue(); // Make sure future commands of queue synced with command
depdentMarker->release();
@@ -1322,8 +1303,8 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
}
} else {
amd::Command::EventWaitList waitList;
amd::HostQueue* queue = hip::getQueue(stream);
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
command->enqueue();
command->release();
}
@@ -1346,8 +1327,6 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
return SetParams(memcpy1DNode->dst_, memcpy1DNode->src_, memcpy1DNode->count_,
memcpy1DNode->kind_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(void* dst, const void* src, size_t count, hipMemcpyKind kind);
static hipError_t ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind);
std::string GetLabel(hipGraphDebugDotFlags flag) {
size_t sOffsetOrig = 0;
@@ -1414,8 +1393,8 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
static_cast<hipGraphMemcpyNodeFromSymbol const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
@@ -1428,7 +1407,7 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
if (status != hipSuccess) {
return status;
}
status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *queue);
status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *stream);
if (status != hipSuccess) {
return status;
}
@@ -1474,18 +1453,6 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
return SetParams(memcpyNode->dst_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_,
memcpyNode->kind_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(void* dst, const void* symbol, size_t count, size_t offset,
hipMemcpyKind kind) {
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr);
if (status != hipSuccess) {
return status;
}
return hipGraphMemcpyNode1D::SetCommandParams(dst, device_ptr, count, kind);
}
};
class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
const void* symbol_;
@@ -1504,8 +1471,8 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
return new hipGraphMemcpyNodeToSymbol(static_cast<hipGraphMemcpyNodeToSymbol const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
@@ -1518,7 +1485,7 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
if (status != hipSuccess) {
return status;
}
status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *queue);
status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *stream);
if (status != hipSuccess) {
return status;
}
@@ -1562,18 +1529,6 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
return SetParams(memcpyNode->src_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_,
memcpyNode->kind_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(const void* symbol, const void* src, size_t count, size_t offset,
hipMemcpyKind kind) {
size_t sym_size = 0;
hipDeviceptr_t device_ptr = nullptr;
hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr);
if (status != hipSuccess) {
return status;
}
return hipGraphMemcpyNode1D::SetCommandParams(device_ptr, src, count, kind);
}
};
class hipGraphMemsetNode : public hipGraphNode {
@@ -1633,21 +1588,21 @@ class hipGraphMemsetNode : public hipGraphNode {
}
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
if (pMemsetParams_->height == 1) {
size_t sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize;
hipError_t status = ihipMemsetCommand(commands_, pMemsetParams_->dst, pMemsetParams_->value,
pMemsetParams_->elementSize, sizeBytes, queue);
pMemsetParams_->elementSize, sizeBytes, stream);
} else {
hipError_t status = ihipMemset3DCommand(
commands_,
{pMemsetParams_->dst, pMemsetParams_->pitch, pMemsetParams_->width * pMemsetParams_->elementSize,
pMemsetParams_->height},
pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, queue, pMemsetParams_->elementSize);
pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, stream, pMemsetParams_->elementSize);
}
return status;
}
@@ -1706,15 +1661,15 @@ class hipGraphEventRecordNode : public hipGraphNode {
return new hipGraphEventRecordNode(static_cast<hipGraphEventRecordNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
commands_.reserve(1);
amd::Command* command = nullptr;
status = e->recordCommand(command, queue);
status = e->recordCommand(command, stream);
commands_.emplace_back(command);
return status;
}
@@ -1744,16 +1699,6 @@ class hipGraphEventRecordNode : public hipGraphNode {
static_cast<hipGraphEventRecordNode const*>(node);
return SetParams(eventRecordNode->event_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(hipEvent_t event) {
amd::HostQueue* queue;
if (!commands_.empty()) {
queue = commands_[0]->queue();
commands_[0]->release();
}
commands_.clear();
return CreateCommand(queue);
}
};
class hipGraphEventWaitNode : public hipGraphNode {
@@ -1769,15 +1714,15 @@ class hipGraphEventWaitNode : public hipGraphNode {
return new hipGraphEventWaitNode(static_cast<hipGraphEventWaitNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
commands_.reserve(1);
amd::Command* command;
status = e->streamWaitCommand(command, queue);
status = e->streamWaitCommand(command, stream);
commands_.emplace_back(command);
return status;
}
@@ -1806,16 +1751,6 @@ class hipGraphEventWaitNode : public hipGraphNode {
const hipGraphEventWaitNode* eventWaitNode = static_cast<hipGraphEventWaitNode const*>(node);
return SetParams(eventWaitNode->event_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(hipEvent_t event) {
amd::HostQueue* queue;
if (!commands_.empty()) {
queue = commands_[0]->queue();
commands_[0]->release();
}
commands_.clear();
return CreateCommand(queue);
}
};
class hipGraphHostNode : public hipGraphNode {
@@ -1836,14 +1771,14 @@ class hipGraphHostNode : public hipGraphNode {
return new hipGraphHostNode(static_cast<hipGraphHostNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
amd::Command::EventWaitList waitList;
commands_.reserve(1);
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
commands_.emplace_back(command);
return hipSuccess;
}
@@ -1885,8 +1820,6 @@ class hipGraphHostNode : public hipGraphNode {
const hipGraphHostNode* hostNode = static_cast<hipGraphHostNode const*>(node);
return SetParams(hostNode->pNodeParams_);
}
// ToDo: use this when commands are cloned and command params are to be updated
hipError_t SetCommandParams(const hipHostNodeParams* params);
};
class hipGraphEmptyNode : public hipGraphNode {
@@ -1898,14 +1831,14 @@ class hipGraphEmptyNode : public hipGraphNode {
return new hipGraphEmptyNode(static_cast<hipGraphEmptyNode const&>(*this));
}
hipError_t CreateCommand(amd::HostQueue* queue) {
hipError_t status = hipGraphNode::CreateCommand(queue);
hipError_t CreateCommand(hip::Stream* stream) {
hipError_t status = hipGraphNode::CreateCommand(stream);
if (status != hipSuccess) {
return status;
}
amd::Command::EventWaitList waitList;
commands_.reserve(1);
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
commands_.emplace_back(command);
return hipSuccess;
}
@@ -1925,8 +1858,8 @@ class hipGraphMemAllocNode : public hipGraphNode {
return new hipGraphMemAllocNode(static_cast<hipGraphMemAllocNode const&>(*this));
}
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
auto error = hipGraphNode::CreateCommand(queue);
virtual hipError_t CreateCommand(hip::Stream* stream) {
auto error = hipGraphNode::CreateCommand(stream);
auto ptr = Execute(stream_);
return error;
}
@@ -1966,8 +1899,8 @@ class hipGraphMemFreeNode : public hipGraphNode {
return new hipGraphMemFreeNode(static_cast<hipGraphMemFreeNode const&>(*this));
}
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
auto error = hipGraphNode::CreateCommand(queue);
virtual hipError_t CreateCommand(hip::Stream* stream) {
auto error = hipGraphNode::CreateCommand(stream);
Execute(stream_);
return error;
}
+5 -5
Просмотреть файл
@@ -94,7 +94,7 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
HIP_RETURN(hipErrorInvalidDevice);
}
amd::HostQueue* queue = nullptr;
hip::Stream* hip_stream = nullptr;
amd::Device* dev = nullptr;
bool cpu_access = false;
@@ -106,19 +106,19 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
// Pick the specified stream or Null one from the provided device
if (device == hipCpuDeviceId) {
cpu_access = true;
queue = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getQueue(stream);
hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getStream(stream);
} else {
dev = g_devices[device]->devices()[0];
queue = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getQueue(stream);
hip_stream = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getStream(stream);
}
if (queue == nullptr) {
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::Command::EventWaitList waitList;
amd::SvmPrefetchAsyncCommand* command =
new amd::SvmPrefetchAsyncCommand(*queue, waitList, dev_ptr, count, dev, cpu_access);
new amd::SvmPrefetchAsyncCommand(*hip_stream, waitList, dev_ptr, count, dev, cpu_access);
if (command == nullptr) {
return hipErrorOutOfMemory;
}
+20 -26
Просмотреть файл
@@ -225,12 +225,11 @@ public:
namespace hip {
class Device;
class MemoryPool;
class Stream {
class Stream : public amd::HostQueue {
public:
enum Priority : int { High = -1, Normal = 0, Low = 1 };
private:
amd::HostQueue* queue_;
mutable amd::Monitor lock_;
Device* device_;
Priority priority_;
@@ -260,18 +259,20 @@ namespace hip {
/// Capture events
std::unordered_set<hipEvent_t> captureEvents_;
unsigned long long captureID_;
static inline CommandQueue::Priority convertToQueuePriority(Priority p){
return p == Priority::High ? amd::CommandQueue::Priority::High : p == Priority::Low ?
amd::CommandQueue::Priority::Low : amd::CommandQueue::Priority::Normal;
}
public:
Stream(Device* dev, Priority p = Priority::Normal, unsigned int f = 0, bool null_stream = false,
const std::vector<uint32_t>& cuMask = {},
hipStreamCaptureStatus captureStatus = hipStreamCaptureStatusNone);
~Stream();
/// Creates the hip stream object, including AMD host queue
bool Create();
/// Get device AMD host queue object. The method can allocate the queue
amd::HostQueue* asHostQueue(bool skip_alloc = false);
void Finish() const;
virtual bool terminate() override;
/// Get device ID associated with the current stream;
int DeviceId() const;
/// Get HIP device associated with the stream
@@ -378,6 +379,7 @@ namespace hip {
parallelCaptureStreams_.erase(it);
}
}
static bool existsActiveStreamForDevice(hip::Device* device);
};
/// HIP Device class
@@ -389,7 +391,7 @@ namespace hip {
/// Store it here so we don't have to loop through the device list every time
int deviceId_;
/// ROCclr host queue for default streams
Stream null_stream_;
Stream* null_stream_ = nullptr;
/// Store device flags
unsigned int flags_;
/// Maintain list of user enabled peers
@@ -398,7 +400,6 @@ namespace hip {
/// True if this device is active
bool isActive_;
std::vector<amd::HostQueue*> queues_;
MemoryPool* default_mem_pool_;
MemoryPool* current_mem_pool_;
@@ -408,7 +409,6 @@ namespace hip {
public:
Device(amd::Context* ctx, int devId): context_(ctx),
deviceId_(devId),
null_stream_(this, Stream::Priority::Normal, 0, true),
flags_(hipDeviceScheduleSpin),
isActive_(false),
default_mem_pool_(nullptr),
@@ -445,22 +445,16 @@ namespace hip {
void setFlags(unsigned int flags) { flags_ = flags; }
void Reset();
amd::HostQueue* NullStream(bool skip_alloc = false);
Stream* GetNullStream();
hip::Stream* NullStream(bool skip_alloc = false);
Stream* GetNullStream();
void SaveQueue(amd::HostQueue* queue) {
amd::ScopedLock lock(lock_);
queues_.push_back(queue);
}
bool GetActiveStatus() {
amd::ScopedLock lock(lock_);
if (isActive_) return true;
for (int i = 0; i < queues_.size(); i++) {
if (queues_[i]->GetQueueStatus()) {
isActive_ = true;
return true;
}
if (Stream::existsActiveStreamForDevice(this)) {
isActive_ = true;
return true;
}
return false;
}
@@ -524,11 +518,11 @@ namespace hip {
/// Get ROCclr queue associated with hipStream
/// Note: This follows the CUDA spec to sync with default streams
/// and Blocking streams
extern amd::HostQueue* getQueue(hipStream_t stream);
extern hip::Stream* getStream(hipStream_t stream);
/// Get default stream associated with the ROCclr context
extern amd::HostQueue* getNullStream(amd::Context&);
extern hip::Stream* getNullStream(amd::Context&);
/// Get default stream of the thread
extern amd::HostQueue* getNullStream();
extern hip::Stream* getNullStream();
/// Get device ID associated with the ROCclr context
int getDeviceID(amd::Context& ctx);
/// Check if stream is valid
@@ -542,7 +536,7 @@ extern void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void*
/// Wait all active streams on the blocking queue. The method enqueues a wait command and
/// doesn't stall the current thread
extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false);
extern void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream = false);
extern std::vector<hip::Device*> g_devices;
extern hipError_t ihipDeviceGetCount(int* count);
+145 -145
Просмотреть файл
@@ -78,9 +78,9 @@ hipError_t ihipFree(void *ptr) {
auto dev = g_devices[device_id];
// Skip stream allocation, since if it wasn't allocated until free, then the device wasn't used
constexpr bool SkipStreamAlloc = true;
amd::HostQueue* queue = dev->NullStream(SkipStreamAlloc);
if (queue != nullptr) {
queue->finish();
hip::Stream* stream = dev->NullStream(SkipStreamAlloc);
if (stream != nullptr) {
stream->finish();
}
hip::Stream::syncNonBlockingStreams(device_id);
// Find out if memory belongs to any memory pool
@@ -195,15 +195,15 @@ hipError_t hipSignalExternalSemaphoresAsync(
if (extSemArray == nullptr || paramsArray == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
for (unsigned int i = 0; i < numExtSems; i++) {
if (extSemArray[i] != nullptr) {
amd::ExternalSemaphoreCmd* command =
new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value,
new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value,
amd::ExternalSemaphoreCmd::COMMAND_SIGNAL_EXTSEMAPHORE);
if (command == nullptr) {
return hipErrorOutOfMemory;
@@ -227,15 +227,15 @@ hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemAr
if (extSemArray == nullptr || paramsArray == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
for (unsigned int i = 0; i < numExtSems; i++) {
if (extSemArray[i] != nullptr) {
amd::ExternalSemaphoreCmd* command =
new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value,
new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value,
amd::ExternalSemaphoreCmd::COMMAND_WAIT_EXTSEMAPHORE);
if (command == nullptr) {
return hipErrorOutOfMemory;
@@ -343,35 +343,35 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes,
}
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync) {
hipMemcpyKind kind, hip::Stream& stream, bool isAsync) {
amd::Command::EventWaitList waitList;
size_t sOffset = 0;
amd::Memory* srcMemory = getMemoryObject(src, sOffset);
size_t dOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
amd::Device* queueDevice = &queue.device();
amd::Device* queueDevice = &stream.device();
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
amd::HostQueue* pQueue = &queue;
hip::Stream* pStream = &stream;
if (queueDevice != dstMemory->getContext().devices()[0]) {
pQueue = hip::getNullStream(dstMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
pStream = hip::getNullStream(dstMemory->getContext());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
}
command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList,
command = new amd::WriteMemoryCommand(*pStream, CL_COMMAND_WRITE_BUFFER, waitList,
*dstMemory->asBuffer(), dOffset, sizeBytes, src, 0, 0, copyMetadata);
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
amd::HostQueue* pQueue = &queue;
hip::Stream* pStream = &stream;
if (queueDevice != srcMemory->getContext().devices()[0]) {
pQueue = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
pStream = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
}
command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList,
command = new amd::ReadMemoryCommand(*pStream, CL_COMMAND_READ_BUFFER, waitList,
*srcMemory->asBuffer(), sOffset, sizeBytes, dst, 0, 0, copyMetadata);
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
// Check if the queue device doesn't match the device on any memory object.
@@ -380,7 +380,7 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) &&
((srcMemory->getContext().devices().size() == 1) &&
(dstMemory->getContext().devices().size() == 1))) {
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
command = new amd::CopyMemoryP2PCommand(stream, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
if (command == nullptr) {
return hipErrorOutOfMemory;
@@ -392,12 +392,12 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
return hipErrorInvalidValue;
}
} else {
amd::HostQueue* pQueue = &queue;
hip::Stream* pStream = &stream;
if ((srcMemory->getContext().devices()[0] == dstMemory->getContext().devices()[0]) &&
(queueDevice != srcMemory->getContext().devices()[0])) {
copyMetadata.copyEnginePreference_ = amd::CopyMetadata::CopyEnginePreference::NONE;
pQueue = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
pStream = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
@@ -405,22 +405,22 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
// Scenarios such as DtoH where dst is pinned memory
if ((queueDevice != srcMemory->getContext().devices()[0]) &&
(dstMemory->getContext().devices().size() != 1)) {
pQueue = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
pStream = hip::getNullStream(srcMemory->getContext());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
// Scenarios such as HtoD where src is pinned memory
} else if ((queueDevice != dstMemory->getContext().devices()[0]) &&
(srcMemory->getContext().devices().size() != 1)) {
pQueue = hip::getNullStream(dstMemory->getContext());
amd::Command* cmd = queue.getLastQueuedCommand(true);
pStream = hip::getNullStream(dstMemory->getContext());
amd::Command* cmd = stream.getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
}
}
}
command = new amd::CopyMemoryCommand(*pQueue, CL_COMMAND_COPY_BUFFER, waitList,
command = new amd::CopyMemoryCommand(*pStream, CL_COMMAND_COPY_BUFFER, waitList,
*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes,
copyMetadata);
}
@@ -445,13 +445,13 @@ bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) {
}
return false;
}
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue) {
queue.finish();
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream) {
stream.finish();
memcpy(dst, src, sizeBytes);
}
// ================================================================================================
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false) {
hip::Stream& stream, bool isAsync = false) {
hipError_t status;
if (sizeBytes == 0) {
// Skip if nothing needs writing.
@@ -470,7 +470,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
size_t dOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
if (srcMemory == nullptr && dstMemory == nullptr) {
ihipHtoHMemcpy(dst, src, sizeBytes, queue);
ihipHtoHMemcpy(dst, src, sizeBytes, stream);
return hipSuccess;
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
isAsync = false;
@@ -483,7 +483,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
isP2P = true;
}
amd::Command* command = nullptr;
status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, queue, isAsync);
status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, stream, isAsync);
if (status != hipSuccess) {
return status;
}
@@ -491,22 +491,22 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
if (!isAsync) {
command->awaitCompletion();
} else if (isP2P) {
amd::HostQueue* pQueue = hip::getNullStream(dstMemory->getContext());
hip::Stream* pStream = hip::getNullStream(dstMemory->getContext());
amd::Command::EventWaitList waitList;
waitList.push_back(command);
amd::Command* depdentMarker = new amd::Marker(*pQueue, false, waitList);
amd::Command* depdentMarker = new amd::Marker(*pStream, false, waitList);
if (depdentMarker != nullptr) {
depdentMarker->enqueue();
depdentMarker->release();
}
} else {
amd::HostQueue* newQueue = command->queue();
if (newQueue != &queue) {
if (newQueue != &stream) {
amd::Command::EventWaitList waitList;
amd::Command* cmd = newQueue->getLastQueuedCommand(true);
if (cmd != nullptr) {
waitList.push_back(cmd);
amd::Command* depdentMarker = new amd::Marker(queue, true, waitList);
amd::Command* depdentMarker = new amd::Marker(stream, true, waitList);
if (depdentMarker != nullptr) {
depdentMarker->enqueue();
depdentMarker->release();
@@ -611,18 +611,18 @@ hipError_t hipFree(void* ptr) {
hipError_t hipMemcpy_common(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream = nullptr) {
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = nullptr;
hip::Stream* hip_stream = nullptr;
if (stream != nullptr) {
queue = hip::getQueue(stream);
hip_stream = hip::getStream(stream);
} else {
queue = hip::getNullStream();
hip_stream = hip::getNullStream();
}
if (queue == nullptr) {
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
return ihipMemcpy(dst, src, sizeBytes, kind, *queue);
return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream);
}
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
@@ -643,12 +643,12 @@ hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
HIP_RETURN(hipErrorContextIsDestroyed);
}
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *queue, false));
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, false));
}
hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) {
@@ -697,9 +697,9 @@ hipError_t ihipArrayDestroy(hipArray* array) {
}
for (auto& dev : g_devices) {
amd::HostQueue* queue = dev->NullStream(true);
if (queue != nullptr) {
queue->finish();
hip::Stream* stream = dev->NullStream(true);
if (stream != nullptr) {
stream->finish();
}
}
@@ -1205,9 +1205,9 @@ hipError_t ihipHostUnregister(void* hostPtr) {
// Wait on the device, associated with the current memory object during allocation
auto device_id = mem->getUserData().deviceId;
amd::HostQueue* queue = g_devices[device_id]->NullStream(true);
if (queue != nullptr) {
queue->finish();
hip::Stream* stream = g_devices[device_id]->NullStream(true);
if (stream != nullptr) {
stream->finish();
}
amd::MemObjMap::RemoveMemObj(hostPtr);
@@ -1392,11 +1392,11 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice,
size_t ByteCount) {
HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount);
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = hip::getQueue(nullptr);
if (queue == nullptr) {
hip::Stream* stream = hip::getStream(nullptr);
if (stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *queue));
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *stream));
}
hipError_t hipMemcpyDtoH(void* dstHost,
@@ -1404,11 +1404,11 @@ hipError_t hipMemcpyDtoH(void* dstHost,
size_t ByteCount) {
HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount);
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = hip::getQueue(nullptr);
if (queue == nullptr) {
hip::Stream* stream = hip::getStream(nullptr);
if (stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *queue));
HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *stream));
}
hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice,
@@ -1416,22 +1416,22 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice,
size_t ByteCount) {
HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount);
CHECK_STREAM_CAPTURING();
amd::HostQueue* queue = hip::getQueue(nullptr);
if (queue == nullptr) {
hip::Stream* stream = hip::getStream(nullptr);
if (stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *queue));
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *stream));
}
hipError_t hipMemcpyAsync_common(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, hipStream_t stream) {
STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind);
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true);
return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, true);
}
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
@@ -1452,12 +1452,12 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, void* srcHost, size_t By
HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream);
hipMemcpyKind kind = hipMemcpyHostToDevice;
STREAM_CAPTURE(hipMemcpyHtoDAsync, stream, dstDevice, srcHost, ByteCount, kind);
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(
ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *queue, true));
ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *hip_stream, true));
}
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount,
@@ -1465,12 +1465,12 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice
HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream);
hipMemcpyKind kind = hipMemcpyDeviceToDevice;
STREAM_CAPTURE(hipMemcpyDtoDAsync, stream, dstDevice, srcDevice, ByteCount, kind);
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(
ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *queue, true));
ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *hip_stream, true));
}
hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount,
@@ -1478,12 +1478,12 @@ hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t By
HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream);
hipMemcpyKind kind = hipMemcpyDeviceToHost;
STREAM_CAPTURE(hipMemcpyDtoHAsync, stream, dstHost, srcDevice, ByteCount, kind);
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
HIP_RETURN_DURATION(
ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *queue, true));
ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true));
}
hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin,
@@ -1532,7 +1532,7 @@ hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coor
hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, void* dstDevice,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch,
amd::HostQueue* queue) {
hip::Stream* stream) {
amd::BufferRect srcRect;
amd::BufferRect dstRect;
amd::Memory* dstMemory;
@@ -1544,7 +1544,7 @@ hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, voi
return status;
}
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE_TO_BUFFER,
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE_TO_BUFFER,
amd::Command::EventWaitList{}, *srcImage, *dstMemory,
srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
@@ -1606,7 +1606,7 @@ hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coor
hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray* dstArray,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
amd::HostQueue* queue) {
hip::Stream* stream) {
amd::Image* dstImage;
amd::Memory* srcMemory;
amd::BufferRect dstRect;
@@ -1617,7 +1617,7 @@ hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArr
if (status != hipSuccess) {
return status;
}
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
amd::Command::EventWaitList{}, *srcMemory, *dstImage,
srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
@@ -1679,7 +1679,7 @@ hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D
hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* dstDevice,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue) {
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) {
amd::Memory* srcMemory;
amd::Memory* dstMemory;
amd::BufferRect srcRect;
@@ -1694,7 +1694,7 @@ hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void*
amd::Coord3D srcStart(srcRect.start_, 0, 0);
amd::Coord3D dstStart(dstRect.start_, 0, 0);
amd::CopyMemoryCommand* copyCommand = new amd::CopyMemoryCommand(
*queue, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory,
*stream, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory,
srcStart, dstStart, copyRegion, srcRect, dstRect);
if (copyCommand == nullptr) {
@@ -1744,7 +1744,7 @@ hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D&
hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* dstHost,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue,
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream,
bool isAsync = false) {
amd::Memory* srcMemory;
amd::BufferRect srcRect;
@@ -1758,7 +1758,7 @@ hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void*
amd::Coord3D srcStart(srcRect.start_, 0, 0);
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
amd::ReadMemoryCommand* readCommand =
new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{},
new amd::ReadMemoryCommand(*stream, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{},
*srcMemory, srcStart, copyRegion, dstHost, srcRect, dstRect,
copyMetadata);
@@ -1809,7 +1809,7 @@ hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coo
hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, void* dstDevice,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue,
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream,
bool isAsync = false) {
amd::Memory* dstMemory;
amd::BufferRect srcRect;
@@ -1824,7 +1824,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo
amd::Coord3D dstStart(dstRect.start_, 0, 0);
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
amd::WriteMemoryCommand* writeCommand = new amd::WriteMemoryCommand(
*queue, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart,
*stream, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart,
copyRegion, srcHost, dstRect, srcRect, copyMetadata);
if (writeCommand == nullptr) {
@@ -1842,7 +1842,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo
hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin,
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
amd::HostQueue* queue) {
hip::Stream* stream) {
if ((srcHost == nullptr) || (dstHost == nullptr)) {
return hipErrorInvalidValue;
}
@@ -1859,8 +1859,8 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOr
return hipErrorInvalidValue;
}
if (queue) {
queue->finish();
if (stream) {
stream->finish();
}
for (size_t slice = 0; slice < copyRegion[2]; slice++) {
@@ -1909,7 +1909,7 @@ hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::C
hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hipArray* dstArray,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, amd::HostQueue* queue) {
amd::Coord3D copyRegion, hip::Stream* stream) {
amd::Image* srcImage;
amd::Image* dstImage;
@@ -1919,7 +1919,7 @@ hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hip
return status;
}
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE,
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE,
amd::Command::EventWaitList{}, *srcImage, *dstImage,
srcOrigin, dstOrigin, copyRegion);
@@ -1968,7 +1968,7 @@ hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray,
hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray* dstArray,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
amd::HostQueue* queue, bool isAsync = false) {
hip::Stream* stream, bool isAsync = false) {
amd::Image* dstImage;
amd::BufferRect srcRect;
@@ -1980,7 +1980,7 @@ hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hi
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
amd::WriteMemoryCommand* writeMemCmd = new amd::WriteMemoryCommand(
*queue, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin,
*stream, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin,
copyRegion, static_cast<const char*>(srcHost) + srcRect.start_, srcRowPitch, srcSlicePitch,
copyMetadata);
@@ -2029,7 +2029,7 @@ hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3
hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, void* dstHost,
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch,
amd::HostQueue* queue, bool isAsync = false) {
hip::Stream* stream, bool isAsync = false) {
amd::Image* srcImage;
amd::BufferRect dstRect;
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
@@ -2041,7 +2041,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi
}
amd::ReadMemoryCommand* readMemCmd = new amd::ReadMemoryCommand(
*queue, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin,
*stream, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin,
copyRegion, static_cast<char*>(dstHost) + dstRect.start_, dstRowPitch, dstSlicePitch,
copyMetadata);
@@ -2058,7 +2058,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi
}
hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3D* pCopy,
amd::HostQueue* queue) {
hip::Stream* stream) {
// If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the
// (unified virtual address space) base address of the source data and the bytes per row to apply.
// {src/dst}Array is ignored.
@@ -2106,41 +2106,41 @@ hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3
// Host to Device.
return ihipMemcpyHtoDCommand(command, pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin,
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) {
// Device to Host.
return ihipMemcpyDtoHCommand(command, pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin,
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) {
// Device to Device.
return ihipMemcpyDtoDCommand(command, pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin,
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
} else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) {
// Host to Image.
return ihipMemcpyHtoACommand(command, pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin,
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
queue);
stream);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) {
// Image to Host.
return ihipMemcpyAtoHCommand(command, pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin,
copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight,
queue);
stream);
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) {
// Device to Image.
return ihipMemcpyDtoACommand(command, pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin,
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
queue);
stream);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) {
// Image to Device.
return ihipMemcpyAtoDCommand(command, pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin,
copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight,
queue);
stream);
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) {
// Image to Image.
return ihipMemcpyAtoACommand(command, pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin,
copyRegion, queue);
copyRegion, stream);
} else {
ShouldNotReachHere();
}
@@ -2212,14 +2212,14 @@ hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool
// Host to Host.
return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion,
pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch,
pCopy->dstPitch * pCopy->dstHeight, hip::getQueue(stream));
pCopy->dstPitch * pCopy->dstHeight, hip::getStream(stream));
} else {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
status = ihipGetMemcpyParam3DCommand(command, pCopy, queue);
status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream);
if (status != hipSuccess) return status;
// Transfers from device memory to pageable host memory and transfers from any host memory to any host memory
@@ -2507,13 +2507,13 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcO
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch,
size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status =
ihipMemcpyAtoDCommand(command, srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion,
dstRowPitch, dstSlicePitch, queue);
dstRowPitch, dstSlicePitch, hip_stream);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2521,13 +2521,13 @@ hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray* dstArray, amd::Coord3D srcO
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status =
ihipMemcpyDtoACommand(command, srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion,
srcRowPitch, srcSlicePitch, queue);
srcRowPitch, srcSlicePitch, hip_stream);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2536,13 +2536,13 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, void* dstDevice, amd::Coord3D srcOrig
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status = ihipMemcpyDtoDCommand(command, srcDevice, dstDevice, srcOrigin, dstOrigin,
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
dstSlicePitch, queue);
dstSlicePitch, hip_stream);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2551,13 +2551,13 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, void* dstHost, amd::Coord3D srcOrigin
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status = ihipMemcpyDtoHCommand(command, srcDevice, dstHost, srcOrigin, dstOrigin,
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
dstSlicePitch, queue, isAsync);
dstSlicePitch, hip_stream, isAsync);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2566,13 +2566,13 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D src
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status = ihipMemcpyHtoDCommand(command, srcHost, dstDevice, srcOrigin, dstOrigin,
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
dstSlicePitch, queue, isAsync);
dstSlicePitch, hip_stream, isAsync);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2580,12 +2580,12 @@ hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D s
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream,
bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status = ihipMemcpyAtoACommand(command, srcArray, dstArray, srcOrigin, dstOrigin,
copyRegion, queue);
copyRegion, hip_stream);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2593,13 +2593,13 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status =
ihipMemcpyHtoACommand(command, srcHost, dstArray, srcOrigin, dstOrigin, copyRegion,
srcRowPitch, srcSlicePitch, queue, isAsync);
srcRowPitch, srcSlicePitch, hip_stream, isAsync);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2607,13 +2607,13 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOri
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch,
size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) {
amd::Command* command;
amd::HostQueue* queue = hip::getQueue(stream);
if (queue == nullptr) {
hip::Stream* hip_stream = hip::getStream(stream);
if (hip_stream == nullptr) {
return hipErrorInvalidValue;
}
hipError_t status =
ihipMemcpyAtoHCommand(command, srcArray, dstHost, srcOrigin, dstOrigin, copyRegion,
dstRowPitch, dstSlicePitch, queue, isAsync);
dstRowPitch, dstSlicePitch, hip_stream, isAsync);
if (status != hipSuccess) return status;
return ihipMemcpyCmdEnqueue(command, isAsync);
}
@@ -2673,9 +2673,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) {
}
hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p,
amd::HostQueue* queue) {
hip::Stream* stream) {
const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p);
return ihipGetMemcpyParam3DCommand(command, &desc, queue);
return ihipGetMemcpyParam3DCommand(command, &desc, stream);
}
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
@@ -2733,8 +2733,8 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset,
int64_t value, size_t valueSize, size_t sizeBytes,
amd::HostQueue* queue) {
if ((memory == nullptr) || (queue == nullptr)) {
hip::Stream* stream) {
if ((memory == nullptr) || (stream == nullptr)) {
return hipErrorInvalidValue;
}
@@ -2744,7 +2744,7 @@ hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, si
// surface=[pitch, width, height]
amd::Coord3D surface(sizeBytes, sizeBytes, 1);
amd::FillMemoryCommand* fillMemCommand =
new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
new amd::FillMemoryCommand(*stream, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
&value, valueSize, fillOffset, fillSize, surface);
if (fillMemCommand == nullptr) {
return hipErrorOutOfMemory;
@@ -2810,7 +2810,7 @@ hipError_t ihipGraphMemsetParams_validate(const hipMemsetParams* pNodeParams) {
}
hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, int64_t value,
size_t valueSize, size_t sizeBytes, amd::HostQueue* queue) {
size_t valueSize, size_t sizeBytes, hip::Stream* stream) {
hipError_t hip_error = hipSuccess;
auto aligned_dst = amd::alignUp(reinterpret_cast<address>(dst), sizeof(uint64_t));
size_t offset = 0;
@@ -2820,7 +2820,7 @@ hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, in
amd::Command* command;
hip_error = packFillMemoryCommand(command, memory, offset, value, valueSize, sizeBytes,
queue);
stream);
commands.push_back(command);
return hip_error;
@@ -2854,8 +2854,8 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
}
}
std::vector<amd::Command*> commands;
amd::HostQueue* queue = hip::getQueue(stream);
hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, queue);
hip::Stream* hip_stream = hip::getStream(stream);
hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, hip_stream);
if (hip_error != hipSuccess) {
break;
}
@@ -2972,13 +2972,13 @@ hipError_t ihipMemset3D_validate(hipPitchedPtr pitchedDevPtr, int value, hipExte
}
hipError_t ihipMemset3DCommand(std::vector<amd::Command*> &commands, hipPitchedPtr pitchedDevPtr,
int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1) {
int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1) {
size_t offset = 0;
auto sizeBytes = extent.width * extent.height * extent.depth;
amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset);
if (pitchedDevPtr.pitch == extent.width) {
return ihipMemsetCommand(commands, pitchedDevPtr.ptr, value, elementSize,
static_cast<size_t>(sizeBytes), queue);
static_cast<size_t>(sizeBytes), stream);
}
// Workaround for cases when pitch > row until fill kernel will be updated to support pitch.
// Fall back to filling one row at a time.
@@ -2994,7 +2994,7 @@ hipError_t ihipMemset3DCommand(std::vector<amd::Command*> &commands, hipPitchedP
}
amd::FillMemoryCommand* command;
command = new amd::FillMemoryCommand(
*queue, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(),
*stream, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(),
&value, elementSize, origin, region, surface);
commands.push_back(command);
return hipSuccess;
@@ -3025,9 +3025,9 @@ hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent
isAsync = true;
}
}
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
std::vector<amd::Command*> commands;
status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, queue);
status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, hip_stream);
if (status != hipSuccess) {
return status;
}
@@ -3946,9 +3946,9 @@ hipError_t ihipMipmappedArrayDestroy(hipMipmappedArray_t mipmapped_array_ptr) {
}
for (auto& dev : g_devices) {
amd::HostQueue* queue = dev->NullStream(true);
if (queue != nullptr) {
queue->finish();
hip::Stream* stream = dev->NullStream(true);
if (stream != nullptr) {
stream->finish();
}
}
+17 -17
Просмотреть файл
@@ -305,7 +305,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY,
uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY,
uint32_t blockDimZ, uint32_t sharedMemBytes,
amd::HostQueue* queue, void** kernelParams, void** extra,
hip::Stream* stream, void** kernelParams, void** extra,
hipEvent_t startEvent = nullptr, hipEvent_t stopEvent = nullptr,
uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0,
uint32_t numGrids = 0, uint64_t prevGridSum = 0,
@@ -328,7 +328,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
}
amd::NDRangeKernelCommand* kernelCommand = new amd::NDRangeKernelCommand(
*queue, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum,
*stream, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum,
allGridSum, firstDevice, profileNDRange);
if (!kernelCommand) {
return hipErrorOutOfMemory;
@@ -371,9 +371,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
return status;
}
amd::Command* command = nullptr;
amd::HostQueue* queue = hip::getQueue(hStream);
hip::Stream* hip_stream = hip::getStream(hStream);
status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
blockDimX, blockDimY, blockDimZ, sharedMemBytes, queue,
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hip_stream,
kernelParams, extra, startEvent, stopEvent, flags, params,
gridId, numGrids, prevGridSum, allGridSum, firstDevice);
if (status != hipSuccess) {
@@ -544,8 +544,8 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
}
if (launch.hStream != nullptr) {
// Validate devices to make sure it dosn't have duplicates
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.hStream)->asHostQueue();
auto device = &queue->vdev()->device();
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(launch.hStream);
auto device = &hip_stream->vdev()->device();
for (int j = 0; j < numDevices; ++j) {
if (mgpu_list[j] == device) {
return hipErrorInvalidDevice;
@@ -562,23 +562,23 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
// Sync the execution streams on all devices
if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) {
for (int i = 0; i < numDevices; ++i) {
amd::HostQueue* queue =
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream)->asHostQueue();
queue->finish();
hip::Stream* hip_stream =
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream);
hip_stream->finish();
}
}
for (int i = 0; i < numDevices; ++i) {
const hipFunctionLaunchParams& launch = launchParamsList[i];
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.hStream)->asHostQueue();
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(launch.hStream);
if (i == 0) {
// The order of devices in the launch may not match the order in the global array
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
// Find the matching device
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) {
// Save ROCclr index of the first device in the launch
firstDevice = queue->vdev()->device().index();
firstDevice = hip_stream->vdev()->device().index();
break;
}
}
@@ -608,9 +608,9 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
// Sync the execution streams on all devices
if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) {
for (int i = 0; i < numDevices; ++i) {
amd::HostQueue* queue =
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream)->asHostQueue();
queue->finish();
hip::Stream* hip_stream =
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream);
hip_stream->finish();
}
}
@@ -739,12 +739,12 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
return hipErrorInvalidValue;
}
amd::HostQueue* queue = hip::getQueue(launch.stream);
hip::Stream* hip_stream = hip::getStream(launch.stream);
hipFunction_t func = nullptr;
// The order of devices in the launch may not match the order in the global array
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
// Find the matching device and request the kernel function
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) {
IHIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, launch.func, dev));
break;
}
+4 -4
Просмотреть файл
@@ -34,7 +34,7 @@ PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
// forward declaration of methods required for __hipRegisrterManagedVar
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
hip::Stream& stream, bool isAsync = false);
struct __CudaFatBinaryWrapper {
unsigned int magic;
@@ -146,9 +146,9 @@ extern "C" void __hipRegisterManagedVar(
HIP_INIT_VOID();
hipError_t status = ihipMallocManaged(pointer, size, align);
if (status == hipSuccess) {
amd::HostQueue* queue = hip::getNullStream();
if (queue != nullptr) {
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
hip::Stream* stream = hip::getNullStream();
if (stream != nullptr) {
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *stream);
guarantee((status == hipSuccess), "Error during memcpy to managed memory!");
} else {
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
+53 -89
Просмотреть файл
@@ -31,7 +31,8 @@ namespace hip {
// ================================================================================================
Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream,
const std::vector<uint32_t>& cuMask, hipStreamCaptureStatus captureStatus)
: queue_(nullptr),
: amd::HostQueue(*dev->asContext(), *dev->devices()[0], 0, amd::CommandQueue::RealTimeDisabled,
convertToQueuePriority(p), cuMask),
lock_("Stream Callback lock"),
device_(dev),
priority_(p),
@@ -40,18 +41,11 @@ Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream,
cuMask_(cuMask),
captureStatus_(captureStatus),
originStream_(false),
captureID_(0) {}
// ================================================================================================
Stream::~Stream() {
if (queue_ != nullptr) {
amd::ScopedLock lock(streamSetLock);
streamSet.erase(this);
queue_->release();
queue_ = nullptr;
}
}
captureID_(0)
{
amd::ScopedLock lock(streamSetLock);
streamSet.insert(this);
}
// ================================================================================================
hipError_t Stream::EndCapture() {
@@ -77,38 +71,16 @@ hipError_t Stream::EndCapture() {
// ================================================================================================
bool Stream::Create() {
amd::CommandQueue::Priority p;
switch (priority_) {
case Priority::High:
p = amd::CommandQueue::Priority::High;
break;
case Priority::Low:
p = amd::CommandQueue::Priority::Low;
break;
case Priority::Normal:
default:
p = amd::CommandQueue::Priority::Normal;
break;
}
amd::HostQueue* queue = new amd::HostQueue(*device_->asContext(), *device_->devices()[0],
0, amd::CommandQueue::RealTimeDisabled,
p, cuMask_);
return create();
}
// Create a host queue
bool result = (queue != nullptr) ? queue->create() : false;
// Insert just created stream into the list of the blocking queues
if (result) {
// ================================================================================================
bool Stream::terminate() {
{
amd::ScopedLock lock(streamSetLock);
streamSet.insert(this);
queue_ = queue;
device_->SaveQueue(queue);
} else if (queue != nullptr) {
// Queue creation has failed, and virtual device associated with the queue may not be created.
// Just need to delete the queue instance.
delete queue;
streamSet.erase(this);
}
return result;
return HostQueue::terminate();
}
// ================================================================================================
@@ -130,29 +102,6 @@ bool isValid(hipStream_t& stream) {
return true;
}
// ================================================================================================
amd::HostQueue* Stream::asHostQueue(bool skip_alloc) {
if (queue_ != nullptr) {
return queue_;
}
// Access to the stream object is lock protected, because possible allocation
amd::ScopedLock l(Lock());
if (queue_ == nullptr) {
// Create the host queue for the first time
if (!skip_alloc) {
Create();
}
}
return queue_;
}
// ================================================================================================
void Stream::Finish() const {
if (queue_ != nullptr) {
queue_->finish();
}
}
// ================================================================================================
int Stream::DeviceId() const {
return device_->deviceId();
@@ -176,7 +125,7 @@ void Stream::syncNonBlockingStreams(int deviceId) {
for (auto& it : streamSet) {
if (it->Flags() & hipStreamNonBlocking) {
if (it->DeviceId() == deviceId) {
it->asHostQueue()->finish();
it->finish();
}
}
}
@@ -203,7 +152,7 @@ void Stream::destroyAllStreams(int deviceId) {
}
}
for (auto& it : toBeDeleted) {
delete it;
it->release();
}
}
@@ -211,36 +160,48 @@ bool Stream::StreamCaptureOngoing(void) {
return (g_allCapturingStreams.empty() == true) ? false : true;
}
bool Stream::existsActiveStreamForDevice(hip::Device* device) {
amd::ScopedLock lock(streamSetLock);
for (const auto& active_stream : streamSet) {
if ((active_stream->GetDevice() == device) &&
active_stream->GetQueueStatus()) {
return true;
}
}
return false;
}
};// hip namespace
// ================================================================================================
void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) {
void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream) {
amd::Command::EventWaitList eventWaitList(0);
bool submitMarker = 0;
{
amd::ScopedLock lock(streamSetLock);
for (const auto& stream : streamSet) {
amd::HostQueue* active_queue = stream->asHostQueue();
for (const auto& active_stream : streamSet) {
// If it's the current device
if ((&active_queue->device() == &blocking_queue->device()) &&
if ((&active_stream->device() == &blocking_stream->device()) &&
// Make sure it's a default stream
((stream->Flags() & hipStreamNonBlocking) == 0) &&
((active_stream->Flags() & hipStreamNonBlocking) == 0) &&
// and it's not the current stream
(active_queue != blocking_queue) &&
(active_stream != blocking_stream) &&
// check for a wait on the null stream
(stream->Null() == wait_null_stream)) {
(active_stream->Null() == wait_null_stream)) {
// Get the last valid command
amd::Command* command = active_queue->getLastQueuedCommand(true);
amd::Command* command = active_stream->getLastQueuedCommand(true);
if (command != nullptr) {
amd::Event& event = command->event();
// Check HW status of the ROCcrl event.
// Note: not all ROCclr modes support HW status
bool ready = active_queue->device().IsHwEventReady(event);
bool ready = active_stream->device().IsHwEventReady(event);
if (!ready) {
ready = (command->status() == CL_COMPLETE);
}
submitMarker |= active_queue->vdev()->isFenceDirty();
submitMarker |= active_stream->vdev()->isFenceDirty();
// Check the current active status
if (!ready) {
command->notifyCmdQueue();
@@ -259,7 +220,7 @@ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream
// Check if we have to wait anything
if (eventWaitList.size() > 0 || submitMarker) {
amd::Command* command = new amd::Marker(*blocking_queue, kMarkerDisableFlush, eventWaitList);
amd::Command* command = new amd::Marker(*blocking_stream, kMarkerDisableFlush, eventWaitList);
if (command != nullptr) {
command->enqueue();
command->release();
@@ -288,8 +249,11 @@ static hipError_t ihipStreamCreate(hipStream_t* stream,
}
hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags, false, cuMask);
if (hStream == nullptr || !hStream->Create()) {
delete hStream;
if (hStream == nullptr) {
return hipErrorOutOfMemory;
}
else if (!hStream->Create()) {
hStream->release();
return hipErrorOutOfMemory;
}
@@ -310,7 +274,7 @@ stream_per_thread::stream_per_thread() {
stream_per_thread::~stream_per_thread() {
for (auto &stream:m_streams) {
if (stream != nullptr && hip::isValid(stream)) {
delete reinterpret_cast<hip::Stream*>(stream);
reinterpret_cast<hip::Stream*>(stream)->release();
stream = nullptr;
}
}
@@ -449,7 +413,7 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
}
}
// Wait for the current host queue
hip::getQueue(stream)->finish();
hip::getStream(stream)->finish();
return hipSuccess;
}
@@ -498,7 +462,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) {
if (l_it != hip::tls.capture_streams_.end()) {
hip::tls.capture_streams_.erase(l_it);
}
delete s;
s->release();
HIP_RETURN(hipSuccess);
}
@@ -564,9 +528,9 @@ hipError_t hipStreamQuery_common(hipStream_t stream) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
}
amd::HostQueue* hostQueue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command* command = hostQueue->getLastQueuedCommand(true);
amd::Command* command = hip_stream->getLastQueuedCommand(true);
if (command == nullptr) {
// Nothing was submitted to the queue
return hipSuccess;
@@ -604,13 +568,13 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void*
return hipErrorContextIsDestroyed;
}
amd::HostQueue* hostQueue = hip::getQueue(stream);
amd::Command* last_command = hostQueue->getLastQueuedCommand(true);
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command* last_command = hip_stream->getLastQueuedCommand(true);
amd::Command::EventWaitList eventWaitList;
if (last_command != nullptr) {
eventWaitList.push_back(last_command);
}
amd::Command* command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList);
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList);
if (command == nullptr) {
return hipErrorInvalidValue;
}
@@ -630,7 +594,7 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void*
// Add the new barrier to stall the stream, until the callback is done
eventWaitList.clear();
eventWaitList.push_back(command);
amd::Command* block_command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList);
amd::Command* block_command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList);
if (block_command == nullptr) {
return hipErrorInvalidValue;
}
+2 -2
Просмотреть файл
@@ -69,11 +69,11 @@ hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void
return hipErrorInvalidValue;
}
amd::HostQueue* queue = hip::getQueue(stream);
hip::Stream* hip_stream = hip::getStream(stream);
amd::Command::EventWaitList waitList;
amd::StreamOperationCommand* command =
new amd::StreamOperationCommand(*queue, cmdType, waitList, *memory->asBuffer(),
new amd::StreamOperationCommand(*hip_stream, cmdType, waitList, *memory->asBuffer(),
value, mask, outFlags, offset, sizeBytes);
if (command == nullptr) {
+17 -17
Просмотреть файл
@@ -26,7 +26,7 @@
#include "platform/sampler.hpp"
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false);
hip::Stream& stream, bool isAsync = false);
hipError_t ihipFree(void* ptr);
@@ -575,8 +575,8 @@ hipError_t hipBindTexture2D(size_t* offset,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t ihipBindTextureToArray(const textureReference* texref,
@@ -624,8 +624,8 @@ hipError_t hipBindTextureToArray(const textureReference* texref,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref,
@@ -674,8 +674,8 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t hipUnbindTexture(const textureReference* texref) {
@@ -701,8 +701,8 @@ hipError_t hipBindTexture(size_t* offset,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc,
@@ -966,8 +966,8 @@ hipError_t hipTexRefSetArray(textureReference* texRef,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr,
@@ -1049,8 +1049,8 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t hipTexRefSetAddress2D(textureReference* texRef,
@@ -1091,8 +1091,8 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
@@ -1454,8 +1454,8 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef,
HIP_RETURN(err);
}
// Copy to device.
amd::HostQueue* queue = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
hip::Stream* stream = hip::getNullStream();
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
}
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject,