SWDEV-381402 - Derive hip::Stream from amd::HostQueue
Change-Id: I6c1aca5eb350c32d974ae4ffcc725705355956d8
Этот коммит содержится в:
@@ -32,7 +32,7 @@ THE SOFTWARE.
|
||||
#include <elf/elf.hpp>
|
||||
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
hip::Stream& stream, bool isAsync = false);
|
||||
hipError_t ihipFree(void* ptr);
|
||||
// forward declaration of methods required for managed variables
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
@@ -635,10 +635,10 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) {
|
||||
it->second->setManagedVarInfo(pointer, dvar->size());
|
||||
|
||||
// copy initial value to the managed variable to the managed memory allocated
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if (queue != nullptr) {
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
if (stream != nullptr) {
|
||||
status = ihipMemcpy(pointer, reinterpret_cast<address>(dvar->device_ptr()), dvar->size(),
|
||||
hipMemcpyDeviceToDevice, *queue);
|
||||
hipMemcpyDeviceToDevice, *stream);
|
||||
if (status != hipSuccess) {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status,
|
||||
managedVar.c_str());
|
||||
@@ -658,7 +658,7 @@ hipError_t DynCO::initDynManagedVars(const std::string& managedVar) {
|
||||
}
|
||||
// copy managed memory pointer to the managed device variable
|
||||
status = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), &pointer, dvar->size(),
|
||||
hipMemcpyHostToDevice, *queue);
|
||||
hipMemcpyHostToDevice, *stream);
|
||||
if (status != hipSuccess) {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Status %d, failed to copy device ptr:%s", status,
|
||||
managedVar.c_str());
|
||||
@@ -895,10 +895,10 @@ hipError_t StatCO::initStatManagedVarDevicePtr(int deviceId) {
|
||||
DeviceVar* dvar = nullptr;
|
||||
IHIP_RETURN_ONFAIL(var->getStatDeviceVar(&dvar, deviceId));
|
||||
|
||||
amd::HostQueue* queue = g_devices.at(deviceId)->NullStream();
|
||||
if (queue != nullptr) {
|
||||
hip::Stream* stream = g_devices.at(deviceId)->NullStream();
|
||||
if (stream != nullptr) {
|
||||
err = ihipMemcpy(reinterpret_cast<address>(dvar->device_ptr()), var->getManagedVarPtr(),
|
||||
dvar->size(), hipMemcpyHostToDevice, *queue);
|
||||
dvar->size(), hipMemcpyHostToDevice, *stream);
|
||||
} else {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
||||
return hipErrorInvalidResourceHandle;
|
||||
|
||||
@@ -91,21 +91,21 @@ void setCurrentDevice(unsigned int index) {
|
||||
amd::Os::setPreferredNumaNode(preferredNumaNode);
|
||||
}
|
||||
|
||||
amd::HostQueue* getQueue(hipStream_t stream) {
|
||||
hip::Stream* getStream(hipStream_t stream) {
|
||||
if (stream == nullptr) {
|
||||
return getNullStream();
|
||||
} else {
|
||||
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(stream)->asHostQueue();
|
||||
if (!(reinterpret_cast<hip::Stream*>(stream)->Flags() & hipStreamNonBlocking)) {
|
||||
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(stream);
|
||||
if (!(hip_stream->Flags() & hipStreamNonBlocking)) {
|
||||
constexpr bool WaitNullStreamOnly = true;
|
||||
iHipWaitActiveStreams(queue, WaitNullStreamOnly);
|
||||
iHipWaitActiveStreams(hip_stream, WaitNullStreamOnly);
|
||||
}
|
||||
return queue;
|
||||
return hip_stream;
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
amd::HostQueue* getNullStream(amd::Context& ctx) {
|
||||
hip::Stream* getNullStream(amd::Context& ctx) {
|
||||
for (auto& it : g_devices) {
|
||||
if (it->asContext() == &ctx) {
|
||||
return it->NullStream();
|
||||
@@ -131,7 +131,7 @@ int getDeviceID(amd::Context& ctx) {
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
amd::HostQueue* getNullStream() {
|
||||
hip::Stream* getNullStream() {
|
||||
Device* device = getCurrentDevice();
|
||||
return device ? device->NullStream() : nullptr;
|
||||
}
|
||||
|
||||
@@ -26,25 +26,31 @@
|
||||
namespace hip {
|
||||
|
||||
// ================================================================================================
|
||||
amd::HostQueue* Device::NullStream(bool skip_alloc) {
|
||||
amd::HostQueue* null_queue = null_stream_.asHostQueue(skip_alloc);
|
||||
if (null_queue == nullptr) {
|
||||
hip::Stream* Device::NullStream(bool skip_alloc) {
|
||||
if (null_stream_ == nullptr && !skip_alloc) {
|
||||
null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true);
|
||||
}
|
||||
|
||||
if (null_stream_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
// Wait for all active streams before executing commands on the default
|
||||
iHipWaitActiveStreams(null_queue);
|
||||
return null_queue;
|
||||
iHipWaitActiveStreams(null_stream_);
|
||||
return null_stream_;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
Stream* Device::GetNullStream() {
|
||||
amd::HostQueue* null_queue = null_stream_.asHostQueue();
|
||||
if (null_queue == nullptr) {
|
||||
hip::Stream* Device::GetNullStream() {
|
||||
if (null_stream_ == nullptr) {
|
||||
null_stream_ = new Stream(this, Stream::Priority::Normal, 0, true);
|
||||
}
|
||||
|
||||
if (null_stream_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
// Wait for all active streams before executing commands on the default
|
||||
iHipWaitActiveStreams(null_queue);
|
||||
return &null_stream_;
|
||||
iHipWaitActiveStreams(null_stream_);
|
||||
return null_stream_;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -128,6 +134,10 @@ Device::~Device() {
|
||||
if (default_mem_pool_ != nullptr) {
|
||||
default_mem_pool_->release();
|
||||
}
|
||||
|
||||
if (null_stream_!= nullptr) {
|
||||
delete null_stream_;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -512,9 +512,9 @@ hipError_t hipDeviceSetSharedMemConfig ( hipSharedMemConfig config ) {
|
||||
hipError_t hipDeviceSynchronize ( void ) {
|
||||
HIP_INIT_API(hipDeviceSynchronize);
|
||||
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
|
||||
if (!queue) {
|
||||
if (!stream) {
|
||||
HIP_RETURN(hipErrorOutOfMemory);
|
||||
}
|
||||
|
||||
@@ -522,7 +522,7 @@ hipError_t hipDeviceSynchronize ( void ) {
|
||||
HIP_RETURN(hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
|
||||
queue->finish();
|
||||
stream->finish();
|
||||
|
||||
hip::Stream::syncNonBlockingStreams(hip::getCurrentDevice()->deviceId());
|
||||
|
||||
|
||||
@@ -177,12 +177,12 @@ int64_t EventDD::time(bool getStartTs) const {
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t Event::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) {
|
||||
hipError_t Event::streamWaitCommand(amd::Command*& command, hip::Stream* stream) {
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
if (event_ != nullptr) {
|
||||
eventWaitList.push_back(event_);
|
||||
}
|
||||
command = new amd::Marker(*queue, kMarkerDisableFlush, eventWaitList);
|
||||
command = new amd::Marker(*stream, kMarkerDisableFlush, eventWaitList);
|
||||
|
||||
if (command == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -196,17 +196,17 @@ hipError_t Event::enqueueStreamWaitCommand(hipStream_t stream, amd::Command* com
|
||||
}
|
||||
|
||||
hipError_t Event::streamWait(hipStream_t stream, uint flags) {
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
// Access to event_ object must be lock protected
|
||||
amd::ScopedLock lock(lock_);
|
||||
if ((event_ == nullptr) || (event_->command().queue() == queue) || ready()) {
|
||||
if ((event_ == nullptr) || (event_->command().queue() == hip_stream) || ready()) {
|
||||
return hipSuccess;
|
||||
}
|
||||
if (!event_->notifyCmdQueue()) {
|
||||
return hipErrorLaunchOutOfResources;
|
||||
}
|
||||
amd::Command* command;
|
||||
hipError_t status = streamWaitCommand(command, queue);
|
||||
hipError_t status = streamWaitCommand(command, hip_stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -218,7 +218,7 @@ hipError_t Event::streamWait(hipStream_t stream, uint flags) {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue,
|
||||
hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* stream,
|
||||
uint32_t ext_flags ) {
|
||||
if (command == nullptr) {
|
||||
int32_t releaseFlags = ((ext_flags == 0) ? flags : ext_flags) &
|
||||
@@ -231,7 +231,7 @@ hipError_t Event::recordCommand(amd::Command*& command, amd::HostQueue* queue,
|
||||
releaseFlags = amd::Device::kCacheStateIgnore;
|
||||
}
|
||||
// Always submit a EventMarker.
|
||||
command = new hip::EventMarker(*queue, !kMarkerDisableFlush, true, releaseFlags);
|
||||
command = new hip::EventMarker(*stream, !kMarkerDisableFlush, true, releaseFlags);
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -249,10 +249,10 @@ hipError_t Event::enqueueRecordCommand(hipStream_t stream, amd::Command* command
|
||||
}
|
||||
|
||||
hipError_t Event::addMarker(hipStream_t stream, amd::Command* command, bool record) {
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
// Keep the lock always at the beginning of this to avoid a race. SWDEV-277847
|
||||
amd::ScopedLock lock(lock_);
|
||||
hipError_t status = recordCommand(command, queue);
|
||||
hipError_t status = recordCommand(command, hip_stream);
|
||||
if (status != hipSuccess) {
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -379,8 +379,8 @@ hipError_t hipEventRecord_common(hipEvent_t event, hipStream_t stream) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event);
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (g_devices[e->deviceId()]->devices()[0] != &queue->device()) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (g_devices[e->deviceId()]->devices()[0] != &hip_stream->device()) {
|
||||
return hipErrorInvalidHandle;
|
||||
}
|
||||
return e->addMarker(stream, nullptr, true);
|
||||
|
||||
@@ -78,9 +78,9 @@ typedef struct ihipIpcEventShmem_s {
|
||||
|
||||
class EventMarker : public amd::Marker {
|
||||
public:
|
||||
EventMarker(amd::HostQueue& queue, bool disableFlush, bool markerTs = false,
|
||||
EventMarker(amd::HostQueue& stream, bool disableFlush, bool markerTs = false,
|
||||
int32_t scope = amd::Device::kCacheStateInvalid)
|
||||
: amd::Marker(queue, disableFlush) {
|
||||
: amd::Marker(stream, disableFlush) {
|
||||
profilingInfo_.enabled_ = true;
|
||||
profilingInfo_.callback_ = nullptr;
|
||||
profilingInfo_.marker_ts_ = markerTs;
|
||||
@@ -116,11 +116,11 @@ class Event {
|
||||
virtual hipError_t synchronize();
|
||||
hipError_t elapsedTime(Event& eStop, float& ms);
|
||||
|
||||
virtual hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue);
|
||||
virtual hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream);
|
||||
virtual hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command);
|
||||
virtual hipError_t streamWait(hipStream_t stream, uint flags);
|
||||
|
||||
virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* queue,
|
||||
virtual hipError_t recordCommand(amd::Command*& command, amd::HostQueue* stream,
|
||||
uint32_t flags = 0);
|
||||
virtual hipError_t enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record);
|
||||
hipError_t addMarker(hipStream_t stream, amd::Command* command, bool record);
|
||||
@@ -175,7 +175,7 @@ class Event {
|
||||
|
||||
protected:
|
||||
amd::Monitor lock_;
|
||||
amd::HostQueue* stream_;
|
||||
hip::Stream* stream_;
|
||||
amd::Event* event_;
|
||||
int device_id_;
|
||||
//! Flag to indicate hipEventRecord has not been called. This is needed for
|
||||
@@ -224,7 +224,7 @@ class IPCEvent : public Event {
|
||||
hipError_t synchronize();
|
||||
hipError_t query();
|
||||
|
||||
hipError_t streamWaitCommand(amd::Command*& command, amd::HostQueue* queue);
|
||||
hipError_t streamWaitCommand(amd::Command*& command, hip::Stream* stream);
|
||||
hipError_t enqueueStreamWaitCommand(hipStream_t stream, amd::Command* command);
|
||||
hipError_t streamWait(hipStream_t stream, uint flags);
|
||||
|
||||
|
||||
@@ -102,8 +102,8 @@ hipError_t IPCEvent::synchronize() {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, amd::HostQueue* queue) {
|
||||
command = new amd::Marker(*queue, false);
|
||||
hipError_t IPCEvent::streamWaitCommand(amd::Command*& command, hip::Stream* stream) {
|
||||
command = new amd::Marker(*stream, false);
|
||||
if (command == NULL) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
@@ -125,12 +125,12 @@ hipError_t IPCEvent::enqueueStreamWaitCommand(hipStream_t stream, amd::Command*
|
||||
}
|
||||
|
||||
hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) {
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
|
||||
amd::ScopedLock lock(lock_);
|
||||
if(query() != hipSuccess) {
|
||||
amd::Command* command;
|
||||
hipError_t status = streamWaitCommand(command, queue);
|
||||
hipError_t status = streamWaitCommand(command, hip_stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -140,18 +140,17 @@ hipError_t IPCEvent::streamWait(hipStream_t stream, uint flags) {
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* queue, uint32_t flags) {
|
||||
hipError_t IPCEvent::recordCommand(amd::Command*& command, amd::HostQueue* stream, uint32_t flags) {
|
||||
bool unrecorded = isUnRecorded();
|
||||
if (unrecorded) {
|
||||
command = new amd::Marker(*queue, kMarkerDisableFlush);
|
||||
command = new amd::Marker(*stream, kMarkerDisableFlush);
|
||||
} else {
|
||||
return Event::recordCommand(command, queue);
|
||||
return Event::recordCommand(command, stream);
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t IPCEvent::enqueueRecordCommand(hipStream_t stream, amd::Command* command, bool record) {
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
bool unrecorded = isUnRecorded();
|
||||
if (unrecorded) {
|
||||
amd::Event& tEvent = command->event();
|
||||
|
||||
@@ -637,13 +637,12 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources,
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (nullptr == queue) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (nullptr == hip_stream) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
amd::HostQueue& hostQueue = *queue;
|
||||
|
||||
if (!hostQueue.context().glenv() || !hostQueue.context().glenv()->isAssociated()) {
|
||||
if (!hip_stream->context().glenv() || !hip_stream->context().glenv()->isAssociated()) {
|
||||
LogWarning("\"amdContext\" is not created from GL context or share list");
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
@@ -658,7 +657,7 @@ hipError_t hipGraphicsMapResources(int count, hipGraphicsResource_t* resources,
|
||||
|
||||
//! Now create command and enqueue
|
||||
amd::AcquireExtObjectsCommand* command = new amd::AcquireExtObjectsCommand(
|
||||
hostQueue, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS);
|
||||
*hip_stream, nullWaitList, count, memObjects, CL_COMMAND_ACQUIRE_GL_OBJECTS);
|
||||
if (command == nullptr) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
@@ -712,13 +711,12 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources
|
||||
}
|
||||
|
||||
// Wait for the current host queue
|
||||
hip::getQueue(stream)->finish();
|
||||
hip::getStream(stream)->finish();
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (nullptr == queue) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (nullptr == hip_stream) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
amd::HostQueue& hostQueue = *queue;
|
||||
|
||||
std::vector<amd::Memory*> memObjects;
|
||||
hipError_t err = hipSetInteropObjects(count, reinterpret_cast<void**>(resources), memObjects);
|
||||
@@ -730,7 +728,7 @@ hipError_t hipGraphicsUnmapResources(int count, hipGraphicsResource_t* resources
|
||||
|
||||
// Now create command and enqueue
|
||||
amd::ReleaseExtObjectsCommand* command = new amd::ReleaseExtObjectsCommand(
|
||||
hostQueue, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS);
|
||||
*hip_stream, nullWaitList, count, memObjects, CL_COMMAND_RELEASE_GL_OBJECTS);
|
||||
if (command == nullptr) {
|
||||
HIP_RETURN(hipErrorUnknown);
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p);
|
||||
hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind);
|
||||
|
||||
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes,
|
||||
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false);
|
||||
hipMemcpyKind kind, hip::Stream& stream, bool isAsync = false);
|
||||
|
||||
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue);
|
||||
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream);
|
||||
|
||||
bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind);
|
||||
|
||||
@@ -26,19 +26,19 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, uint32_t sharedMemBytes,
|
||||
amd::HostQueue* queue, void** kernelParams, void** extra,
|
||||
hip::Stream* stream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent, hipEvent_t stopEvent, uint32_t flags,
|
||||
uint32_t params, uint32_t gridId, uint32_t numGrids,
|
||||
uint64_t prevGridSum, uint64_t allGridSum, uint32_t firstDevice);
|
||||
|
||||
hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p,
|
||||
amd::HostQueue* queue);
|
||||
hip::Stream* stream);
|
||||
|
||||
hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, int64_t value,
|
||||
size_t valueSize, size_t sizeBytes, amd::HostQueue* queue);
|
||||
size_t valueSize, size_t sizeBytes, hip::Stream* stream);
|
||||
|
||||
hipError_t ihipMemset3DCommand(std::vector<amd::Command*>& commands, hipPitchedPtr pitchedDevPtr,
|
||||
int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1);
|
||||
int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1);
|
||||
|
||||
hipError_t ihipMemcpySymbol_validate(const void* symbol, size_t sizeBytes, size_t offset,
|
||||
size_t& sym_size, hipDeviceptr_t& device_ptr);
|
||||
|
||||
@@ -98,56 +98,6 @@ hipError_t hipGraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGraphMemcpyNode1D::SetCommandParams(void* dst, const void* src, size_t count,
|
||||
hipMemcpyKind kind) {
|
||||
hipError_t status = ihipMemcpy_validate(dst, src, count, kind);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
size_t sOffsetOrig = 0;
|
||||
amd::Memory* origSrcMemory = getMemoryObject(src, sOffsetOrig);
|
||||
size_t dOffsetOrig = 0;
|
||||
amd::Memory* origDstMemory = getMemoryObject(dst, dOffsetOrig);
|
||||
|
||||
size_t sOffset = 0;
|
||||
amd::Memory* srcMemory = getMemoryObject(src, sOffset);
|
||||
size_t dOffset = 0;
|
||||
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
|
||||
|
||||
if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
|
||||
if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*dstMemory->asBuffer(), dOffset, count, src);
|
||||
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
|
||||
if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory->asBuffer(), sOffset, count, dst);
|
||||
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
|
||||
if (origDstMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
if (origSrcMemory->getContext().devices()[0] != srcMemory->getContext().devices()[0]) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
amd::CopyMemoryP2PCommand* command = reinterpret_cast<amd::CopyMemoryP2PCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count);
|
||||
// Make sure runtime has valid memory for the command execution. P2P access
|
||||
// requires page table mapping on the current device to another GPU memory
|
||||
if (!static_cast<amd::CopyMemoryP2PCommand*>(command)->validateMemory()) {
|
||||
delete command;
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, count);
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParams) {
|
||||
hipError_t status = ihipMemcpy3D_validate(pNodeParams);
|
||||
if (status != hipSuccess) {
|
||||
@@ -297,185 +247,6 @@ hipError_t hipGraphMemcpyNode::ValidateParams(const hipMemcpy3DParms* pNodeParam
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t hipGraphMemcpyNode::SetCommandParams(const hipMemcpy3DParms* pNodeParams) {
|
||||
hipError_t status = ihipMemcpy3D_validate(pNodeParams);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pNodeParams);
|
||||
// If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the
|
||||
// (unified virtual address space) base address of the source data and the bytes per row to apply.
|
||||
// {src/dst}Array is ignored.
|
||||
hipMemoryType srcMemoryType = pCopy.srcMemoryType;
|
||||
if (srcMemoryType == hipMemoryTypeUnified) {
|
||||
srcMemoryType =
|
||||
amd::MemObjMap::FindMemObj(pCopy.srcDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost;
|
||||
if (srcMemoryType == hipMemoryTypeHost) {
|
||||
// {src/dst}Host may be unitialized. Copy over {src/dst}Device into it if we detect system
|
||||
// memory.
|
||||
const_cast<HIP_MEMCPY3D*>(&pCopy)->srcHost = pCopy.srcDevice;
|
||||
}
|
||||
}
|
||||
hipMemoryType dstMemoryType = pCopy.dstMemoryType;
|
||||
if (dstMemoryType == hipMemoryTypeUnified) {
|
||||
dstMemoryType =
|
||||
amd::MemObjMap::FindMemObj(pCopy.dstDevice) ? hipMemoryTypeDevice : hipMemoryTypeHost;
|
||||
if (srcMemoryType == hipMemoryTypeHost) {
|
||||
const_cast<HIP_MEMCPY3D*>(&pCopy)->dstHost = pCopy.dstDevice;
|
||||
}
|
||||
}
|
||||
|
||||
// If {src/dst}MemoryType is hipMemoryTypeHost, check if the memory was prepinned.
|
||||
// In that case upgrade the copy type to hipMemoryTypeDevice to avoid extra pinning.
|
||||
if (srcMemoryType == hipMemoryTypeHost) {
|
||||
amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.srcHost);
|
||||
srcMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost;
|
||||
if (srcMemoryType == hipMemoryTypeDevice) {
|
||||
const_cast<HIP_MEMCPY3D*>(&pCopy)->srcDevice = const_cast<void*>(pCopy.srcHost);
|
||||
}
|
||||
}
|
||||
if (dstMemoryType == hipMemoryTypeHost) {
|
||||
amd::Memory* mem = amd::MemObjMap::FindMemObj(pCopy.dstHost);
|
||||
dstMemoryType = mem ? hipMemoryTypeDevice : hipMemoryTypeHost;
|
||||
if (dstMemoryType == hipMemoryTypeDevice) {
|
||||
const_cast<HIP_MEMCPY3D*>(&pCopy)->dstDevice = const_cast<void*>(pCopy.dstDevice);
|
||||
}
|
||||
}
|
||||
|
||||
amd::Coord3D srcOrigin = {pCopy.srcXInBytes, pCopy.srcY, pCopy.srcZ};
|
||||
amd::Coord3D dstOrigin = {pCopy.dstXInBytes, pCopy.dstY, pCopy.dstZ};
|
||||
amd::Coord3D copyRegion = {pCopy.WidthInBytes, pCopy.Height, pCopy.Depth};
|
||||
|
||||
if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeDevice)) {
|
||||
// Host to Device.
|
||||
|
||||
amd::Memory* dstMemory;
|
||||
amd::BufferRect srcRect;
|
||||
amd::BufferRect dstRect;
|
||||
|
||||
status =
|
||||
ihipMemcpyHtoDValidate(pCopy.srcHost, pCopy.dstDevice, srcOrigin, dstOrigin, copyRegion,
|
||||
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch,
|
||||
pCopy.dstPitch * pCopy.dstHeight, dstMemory, srcRect, dstRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*dstMemory, {dstRect.start_, 0, 0}, copyRegion, pCopy.srcHost, dstRect,
|
||||
srcRect);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) {
|
||||
// Device to Host.
|
||||
amd::Memory* srcMemory;
|
||||
amd::BufferRect srcRect;
|
||||
amd::BufferRect dstRect;
|
||||
status =
|
||||
ihipMemcpyDtoHValidate(pCopy.srcDevice, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion,
|
||||
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, pCopy.dstPitch,
|
||||
pCopy.dstPitch * pCopy.dstHeight, srcMemory, srcRect, dstRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory, {srcRect.start_, 0, 0}, copyRegion, pCopy.dstHost, srcRect,
|
||||
dstRect);
|
||||
command->setSource(*srcMemory);
|
||||
command->setOrigin({srcRect.start_, 0, 0});
|
||||
command->setSize(copyRegion);
|
||||
command->setDestination(pCopy.dstHost);
|
||||
command->setBufRect(srcRect);
|
||||
command->setHostRect(dstRect);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) {
|
||||
// Device to Device.
|
||||
amd::Memory* srcMemory;
|
||||
amd::Memory* dstMemory;
|
||||
amd::BufferRect srcRect;
|
||||
amd::BufferRect dstRect;
|
||||
|
||||
status = ihipMemcpyDtoDValidate(pCopy.srcDevice, pCopy.dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight,
|
||||
pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcMemory,
|
||||
dstMemory, srcRect, dstRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory, *dstMemory, {srcRect.start_, 0, 0}, {dstRect.start_, 0, 0},
|
||||
copyRegion, srcRect, dstRect);
|
||||
} else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
amd::Image* dstImage;
|
||||
amd::BufferRect srcRect;
|
||||
|
||||
status =
|
||||
ihipMemcpyHtoAValidate(pCopy.srcHost, pCopy.dstArray, srcOrigin, dstOrigin, copyRegion,
|
||||
pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight, dstImage, srcRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::WriteMemoryCommand* command = reinterpret_cast<amd::WriteMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*dstImage, dstOrigin, copyRegion,
|
||||
static_cast<const char*>(pCopy.srcHost) + srcRect.start_, pCopy.srcPitch,
|
||||
pCopy.srcPitch * pCopy.srcHeight);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) {
|
||||
// Image to Host.
|
||||
amd::Image* srcImage;
|
||||
amd::BufferRect dstRect;
|
||||
|
||||
status =
|
||||
ihipMemcpyAtoHValidate(pCopy.srcArray, pCopy.dstHost, srcOrigin, dstOrigin, copyRegion,
|
||||
pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight, srcImage, dstRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::ReadMemoryCommand* command = reinterpret_cast<amd::ReadMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcImage, srcOrigin, copyRegion,
|
||||
static_cast<char*>(pCopy.dstHost) + dstRect.start_, pCopy.dstPitch,
|
||||
pCopy.dstPitch * pCopy.dstHeight);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
// Device to Image.
|
||||
amd::Image* dstImage;
|
||||
amd::Memory* srcMemory;
|
||||
amd::BufferRect dstRect;
|
||||
amd::BufferRect srcRect;
|
||||
status = ihipMemcpyDtoAValidate(pCopy.srcDevice, pCopy.dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy.srcPitch, pCopy.srcPitch * pCopy.srcHeight,
|
||||
dstImage, srcMemory, dstRect, srcRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcMemory, *dstImage, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) {
|
||||
// Image to Device.
|
||||
amd::BufferRect srcRect;
|
||||
amd::BufferRect dstRect;
|
||||
amd::Memory* dstMemory;
|
||||
amd::Image* srcImage;
|
||||
status = ihipMemcpyAtoDValidate(pCopy.srcArray, pCopy.dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy.dstPitch, pCopy.dstPitch * pCopy.dstHeight,
|
||||
dstMemory, srcImage, srcRect, dstRect);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcImage, *dstMemory, srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
amd::Image* srcImage;
|
||||
amd::Image* dstImage;
|
||||
|
||||
status = ihipMemcpyAtoAValidate(pCopy.srcArray, pCopy.dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, srcImage, dstImage);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::CopyMemoryCommand* command = reinterpret_cast<amd::CopyMemoryCommand*>(commands_[0]);
|
||||
command->setParams(*srcImage, *dstImage, srcOrigin, dstOrigin, copyRegion);
|
||||
} else {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
|
||||
bool ihipGraph::isGraphValid(ihipGraph* pGraph) {
|
||||
amd::ScopedLock lock(graphSetLock_);
|
||||
if (graphSet_.find(pGraph) == graphSet_.end()) {
|
||||
@@ -685,7 +456,9 @@ hipError_t hipGraphExec::CreateStreams(uint32_t num_streams) {
|
||||
auto stream = new hip::Stream(hip::getCurrentDevice(),
|
||||
hip::Stream::Priority::Normal, hipStreamNonBlocking);
|
||||
if (stream == nullptr || !stream->Create()) {
|
||||
delete stream;
|
||||
if (stream != nullptr) {
|
||||
stream->release();
|
||||
}
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_CODE, "[hipGraph] Failed to create parallel stream!\n");
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
@@ -708,7 +481,7 @@ hipError_t hipGraphExec::Init() {
|
||||
hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
|
||||
std::unordered_map<Node, std::vector<Node>>& nodeWaitLists,
|
||||
std::vector<Node>& levelOrder, std::vector<amd::Command*>& rootCommands,
|
||||
amd::Command*& endCommand, amd::HostQueue* queue) {
|
||||
amd::Command*& endCommand, hip::Stream* stream) {
|
||||
hipError_t status;
|
||||
for (auto& node : levelOrder) {
|
||||
// TODO: clone commands from next launch
|
||||
@@ -758,7 +531,7 @@ hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
|
||||
}
|
||||
}
|
||||
if (!graphLastCmdWaitList.empty()) {
|
||||
endCommand = new amd::Marker(*queue, false, graphLastCmdWaitList);
|
||||
endCommand = new amd::Marker(*stream, false, graphLastCmdWaitList);
|
||||
if (endCommand == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
@@ -787,8 +560,8 @@ void UpdateStream(std::vector<std::vector<Node>>& parallelLists, hip::Stream* st
|
||||
|
||||
hipError_t hipGraphExec::Run(hipStream_t stream) {
|
||||
hipError_t status;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
|
||||
if (hip::getStream(stream) == nullptr) {
|
||||
return hipErrorInvalidResourceHandle;
|
||||
}
|
||||
if (flags_ == hipGraphInstantiateFlagAutoFreeOnLaunch) {
|
||||
@@ -802,7 +575,7 @@ hipError_t hipGraphExec::Run(hipStream_t stream) {
|
||||
std::vector<amd::Command*> rootCommands;
|
||||
amd::Command* endCommand = nullptr;
|
||||
status =
|
||||
FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, queue);
|
||||
FillCommands(parallelLists_, nodeWaitLists_, levelOrder_, rootCommands, endCommand, hip_stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
|
||||
@@ -38,7 +38,7 @@ typedef hipGraphNode* Node;
|
||||
hipError_t FillCommands(std::vector<std::vector<Node>>& parallelLists,
|
||||
std::unordered_map<Node, std::vector<Node>>& nodeWaitLists,
|
||||
std::vector<Node>& levelOrder, std::vector<amd::Command*>& rootCommands,
|
||||
amd::Command*& endCommand, amd::HostQueue* queue);
|
||||
amd::Command*& endCommand, hip::Stream* stream);
|
||||
void UpdateStream(std::vector<std::vector<Node>>& parallelLists, hip::Stream* stream,
|
||||
hipGraphExec* ptr);
|
||||
|
||||
@@ -155,7 +155,6 @@ struct hipGraphNodeDOTAttribute {
|
||||
struct hipGraphNode : public hipGraphNodeDOTAttribute {
|
||||
protected:
|
||||
hip::Stream* stream_ = nullptr;
|
||||
amd::HostQueue* queue_;
|
||||
uint32_t level_;
|
||||
unsigned int id_;
|
||||
hipGraphNodeType type_;
|
||||
@@ -222,16 +221,15 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute {
|
||||
return true;
|
||||
}
|
||||
|
||||
amd::HostQueue* GetQueue() { return queue_; }
|
||||
hip::Stream* GetQueue() { return stream_; }
|
||||
|
||||
virtual void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) {
|
||||
stream_ = stream;
|
||||
queue_ = stream->asHostQueue();
|
||||
}
|
||||
/// Create amd::command for the graph node
|
||||
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
virtual hipError_t CreateCommand(hip::Stream* stream) {
|
||||
commands_.clear();
|
||||
queue_ = queue;
|
||||
stream_ = stream;
|
||||
return hipSuccess;
|
||||
}
|
||||
/// Return node unique ID
|
||||
@@ -350,8 +348,8 @@ struct hipGraphNode : public hipGraphNodeDOTAttribute {
|
||||
(type_ == hipGraphNodeTypeKernel || type_ == hipGraphNodeTypeMemcpy ||
|
||||
type_ == hipGraphNodeTypeMemset)) {
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
|
||||
command->enqueue();
|
||||
command->release();
|
||||
return;
|
||||
@@ -575,7 +573,9 @@ struct hipGraphExec {
|
||||
// new commands are launched for every launch they are destroyed as and when command is
|
||||
// terminated after it complete execution
|
||||
for (auto stream : parallel_streams_) {
|
||||
delete stream;
|
||||
if (stream != nullptr) {
|
||||
stream->release();
|
||||
}
|
||||
}
|
||||
for (auto it = clonedNodes_.begin(); it != clonedNodes_.end(); it++) delete it->second;
|
||||
amd::ScopedLock lock(graphExecSetLock_);
|
||||
@@ -645,7 +645,6 @@ struct hipChildGraphNode : public hipGraphNode {
|
||||
|
||||
void SetStream(hip::Stream* stream, hipGraphExec* ptr = nullptr) {
|
||||
stream_ = stream;
|
||||
queue_ = stream->asHostQueue();
|
||||
UpdateStream(parallelLists_, stream, ptr);
|
||||
}
|
||||
|
||||
@@ -654,8 +653,8 @@ struct hipChildGraphNode : public hipGraphNode {
|
||||
std::vector<amd::Command*>& GetCommands() { return parallelLists_[0].back()->GetCommands(); }
|
||||
|
||||
// Create child graph node commands and set waitlists
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -663,7 +662,7 @@ struct hipChildGraphNode : public hipGraphNode {
|
||||
std::vector<amd::Command*> rootCommands;
|
||||
amd::Command* endCommand = nullptr;
|
||||
status = FillCommands(parallelLists_, nodeWaitLists_, childGraphlevelOrder_, rootCommands,
|
||||
endCommand, queue);
|
||||
endCommand, stream);
|
||||
for (auto& cmd : rootCommands) {
|
||||
commands_.push_back(cmd);
|
||||
}
|
||||
@@ -933,14 +932,14 @@ class hipGraphKernelNode : public hipGraphNode {
|
||||
return new hipGraphKernelNode(static_cast<hipGraphKernelNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipFunction_t func = nullptr;
|
||||
hipError_t status = validateKernelParams(pKernelParams_, &func,
|
||||
queue ? hip::getDeviceID(queue->context()) : -1);
|
||||
stream ? hip::getDeviceID(stream->context()) : -1);
|
||||
if (hipSuccess != status) {
|
||||
return status;
|
||||
}
|
||||
status = hipGraphNode::CreateCommand(queue);
|
||||
status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -951,7 +950,7 @@ class hipGraphKernelNode : public hipGraphNode {
|
||||
pKernelParams_->gridDim.y * pKernelParams_->blockDim.y,
|
||||
pKernelParams_->gridDim.z * pKernelParams_->blockDim.z, pKernelParams_->blockDim.x,
|
||||
pKernelParams_->blockDim.y, pKernelParams_->blockDim.z, pKernelParams_->sharedMemBytes,
|
||||
queue, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0,
|
||||
stream, pKernelParams_->kernelParams, pKernelParams_->extra, nullptr, nullptr, 0, 0, 0, 0, 0,
|
||||
0, 0);
|
||||
commands_.emplace_back(command);
|
||||
return status;
|
||||
@@ -1044,22 +1043,6 @@ class hipGraphKernelNode : public hipGraphNode {
|
||||
}
|
||||
return hipSuccess;
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(const hipKernelNodeParams* params) {
|
||||
// updates kernel params
|
||||
hipError_t status = validateKernelParams(params);
|
||||
if (hipSuccess != status) {
|
||||
return status;
|
||||
}
|
||||
size_t globalWorkOffset[3] = {0};
|
||||
size_t globalWorkSize[3] = {params->gridDim.x, params->gridDim.y, params->gridDim.z};
|
||||
size_t localWorkSize[3] = {params->blockDim.x, params->blockDim.y, params->blockDim.z};
|
||||
reinterpret_cast<amd::NDRangeKernelCommand*>(commands_[0])
|
||||
->setSizes(globalWorkOffset, globalWorkSize, localWorkSize);
|
||||
reinterpret_cast<amd::NDRangeKernelCommand*>(commands_[0])
|
||||
->setSharedMemBytes(params->sharedMemBytes);
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
hipError_t SetParams(hipGraphNode* node) {
|
||||
const hipGraphKernelNode* kernelNode = static_cast<hipGraphKernelNode const*>(node);
|
||||
@@ -1110,17 +1093,17 @@ class hipGraphMemcpyNode : public hipGraphNode {
|
||||
return new hipGraphMemcpyNode(static_cast<hipGraphMemcpyNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
|
||||
return hipSuccess;
|
||||
}
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
commands_.reserve(1);
|
||||
amd::Command* command;
|
||||
status = ihipMemcpy3DCommand(command, pCopyParams_, queue);
|
||||
status = ihipMemcpy3DCommand(command, pCopyParams_, stream);
|
||||
commands_.emplace_back(command);
|
||||
return status;
|
||||
}
|
||||
@@ -1129,7 +1112,7 @@ class hipGraphMemcpyNode : public hipGraphNode {
|
||||
if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
|
||||
ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr,
|
||||
pCopyParams_->extent.width * pCopyParams_->extent.height *
|
||||
pCopyParams_->extent.depth, *hip::getQueue(stream));
|
||||
pCopyParams_->extent.depth, *hip::getStream(stream));
|
||||
return;
|
||||
}
|
||||
hipGraphNode::EnqueueCommands(stream);
|
||||
@@ -1150,8 +1133,6 @@ class hipGraphMemcpyNode : public hipGraphNode {
|
||||
const hipGraphMemcpyNode* memcpyNode = static_cast<hipGraphMemcpyNode const*>(node);
|
||||
return SetParams(memcpyNode->pCopyParams_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(const hipMemcpy3DParms* pNodeParams);
|
||||
hipError_t ValidateParams(const hipMemcpy3DParms* pNodeParams);
|
||||
std::string GetLabel(hipGraphDebugDotFlags flag) {
|
||||
const HIP_MEMCPY3D pCopy = hip::getDrvMemcpy3DDesc(*pCopyParams_);
|
||||
@@ -1256,17 +1237,17 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
return new hipGraphMemcpyNode1D(static_cast<hipGraphMemcpyNode1D const&>(*this));
|
||||
}
|
||||
|
||||
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
virtual hipError_t CreateCommand(hip::Stream* stream) {
|
||||
if (IsHtoHMemcpy(dst_, src_, kind_)) {
|
||||
return hipSuccess;
|
||||
}
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
commands_.reserve(1);
|
||||
amd::Command* command = nullptr;
|
||||
status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *queue);
|
||||
status = ihipMemcpyCommand(command, dst_, src_, count_, kind_, *stream);
|
||||
commands_.emplace_back(command);
|
||||
return status;
|
||||
}
|
||||
@@ -1281,14 +1262,14 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
if (isEnabled_) {
|
||||
//HtoH
|
||||
if (isH2H) {
|
||||
ihipHtoHMemcpy(dst_, src_, count_, *hip::getQueue(stream));
|
||||
ihipHtoHMemcpy(dst_, src_, count_, *hip::getStream(stream));
|
||||
return;
|
||||
}
|
||||
amd::Command* command = commands_[0];
|
||||
amd::HostQueue* cmdQueue = command->queue();
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
|
||||
if (cmdQueue == queue) {
|
||||
if (cmdQueue == hip_stream) {
|
||||
command->enqueue();
|
||||
command->release();
|
||||
return;
|
||||
@@ -1296,7 +1277,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::Command* depdentMarker = nullptr;
|
||||
amd::Command* cmd = queue->getLastQueuedCommand(true);
|
||||
amd::Command* cmd = hip_stream->getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
amd::Command* depdentMarker = new amd::Marker(*cmdQueue, true, waitList);
|
||||
@@ -1313,7 +1294,7 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
if (cmd != nullptr) {
|
||||
waitList.clear();
|
||||
waitList.push_back(cmd);
|
||||
amd::Command* depdentMarker = new amd::Marker(*queue, true, waitList);
|
||||
amd::Command* depdentMarker = new amd::Marker(*hip_stream, true, waitList);
|
||||
if (depdentMarker != nullptr) {
|
||||
depdentMarker->enqueue(); // Make sure future commands of queue synced with command
|
||||
depdentMarker->release();
|
||||
@@ -1322,8 +1303,8 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
}
|
||||
} else {
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, waitList);
|
||||
command->enqueue();
|
||||
command->release();
|
||||
}
|
||||
@@ -1346,8 +1327,6 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
|
||||
return SetParams(memcpy1DNode->dst_, memcpy1DNode->src_, memcpy1DNode->count_,
|
||||
memcpy1DNode->kind_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(void* dst, const void* src, size_t count, hipMemcpyKind kind);
|
||||
static hipError_t ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind);
|
||||
std::string GetLabel(hipGraphDebugDotFlags flag) {
|
||||
size_t sOffsetOrig = 0;
|
||||
@@ -1414,8 +1393,8 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
|
||||
static_cast<hipGraphMemcpyNodeFromSymbol const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -1428,7 +1407,7 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *queue);
|
||||
status = ihipMemcpyCommand(command, dst_, device_ptr, count_, kind_, *stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -1474,18 +1453,6 @@ class hipGraphMemcpyNodeFromSymbol : public hipGraphMemcpyNode1D {
|
||||
return SetParams(memcpyNode->dst_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_,
|
||||
memcpyNode->kind_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(void* dst, const void* symbol, size_t count, size_t offset,
|
||||
hipMemcpyKind kind) {
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
return hipGraphMemcpyNode1D::SetCommandParams(dst, device_ptr, count, kind);
|
||||
}
|
||||
};
|
||||
class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
|
||||
const void* symbol_;
|
||||
@@ -1504,8 +1471,8 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
|
||||
return new hipGraphMemcpyNodeToSymbol(static_cast<hipGraphMemcpyNodeToSymbol const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -1518,7 +1485,7 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *queue);
|
||||
status = ihipMemcpyCommand(command, device_ptr, src_, count_, kind_, *stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -1562,18 +1529,6 @@ class hipGraphMemcpyNodeToSymbol : public hipGraphMemcpyNode1D {
|
||||
return SetParams(memcpyNode->src_, memcpyNode->symbol_, memcpyNode->count_, memcpyNode->offset_,
|
||||
memcpyNode->kind_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(const void* symbol, const void* src, size_t count, size_t offset,
|
||||
hipMemcpyKind kind) {
|
||||
size_t sym_size = 0;
|
||||
hipDeviceptr_t device_ptr = nullptr;
|
||||
|
||||
hipError_t status = ihipMemcpySymbol_validate(symbol, count, offset, sym_size, device_ptr);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
return hipGraphMemcpyNode1D::SetCommandParams(device_ptr, src, count, kind);
|
||||
}
|
||||
};
|
||||
|
||||
class hipGraphMemsetNode : public hipGraphNode {
|
||||
@@ -1633,21 +1588,21 @@ class hipGraphMemsetNode : public hipGraphNode {
|
||||
}
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
if (pMemsetParams_->height == 1) {
|
||||
size_t sizeBytes = pMemsetParams_->width * pMemsetParams_->elementSize;
|
||||
hipError_t status = ihipMemsetCommand(commands_, pMemsetParams_->dst, pMemsetParams_->value,
|
||||
pMemsetParams_->elementSize, sizeBytes, queue);
|
||||
pMemsetParams_->elementSize, sizeBytes, stream);
|
||||
} else {
|
||||
hipError_t status = ihipMemset3DCommand(
|
||||
commands_,
|
||||
{pMemsetParams_->dst, pMemsetParams_->pitch, pMemsetParams_->width * pMemsetParams_->elementSize,
|
||||
pMemsetParams_->height},
|
||||
pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, queue, pMemsetParams_->elementSize);
|
||||
pMemsetParams_->value, {pMemsetParams_->width * pMemsetParams_->elementSize, pMemsetParams_->height, 1}, stream, pMemsetParams_->elementSize);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
@@ -1706,15 +1661,15 @@ class hipGraphEventRecordNode : public hipGraphNode {
|
||||
return new hipGraphEventRecordNode(static_cast<hipGraphEventRecordNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
|
||||
commands_.reserve(1);
|
||||
amd::Command* command = nullptr;
|
||||
status = e->recordCommand(command, queue);
|
||||
status = e->recordCommand(command, stream);
|
||||
commands_.emplace_back(command);
|
||||
return status;
|
||||
}
|
||||
@@ -1744,16 +1699,6 @@ class hipGraphEventRecordNode : public hipGraphNode {
|
||||
static_cast<hipGraphEventRecordNode const*>(node);
|
||||
return SetParams(eventRecordNode->event_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(hipEvent_t event) {
|
||||
amd::HostQueue* queue;
|
||||
if (!commands_.empty()) {
|
||||
queue = commands_[0]->queue();
|
||||
commands_[0]->release();
|
||||
}
|
||||
commands_.clear();
|
||||
return CreateCommand(queue);
|
||||
}
|
||||
};
|
||||
|
||||
class hipGraphEventWaitNode : public hipGraphNode {
|
||||
@@ -1769,15 +1714,15 @@ class hipGraphEventWaitNode : public hipGraphNode {
|
||||
return new hipGraphEventWaitNode(static_cast<hipGraphEventWaitNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
hip::Event* e = reinterpret_cast<hip::Event*>(event_);
|
||||
commands_.reserve(1);
|
||||
amd::Command* command;
|
||||
status = e->streamWaitCommand(command, queue);
|
||||
status = e->streamWaitCommand(command, stream);
|
||||
commands_.emplace_back(command);
|
||||
return status;
|
||||
}
|
||||
@@ -1806,16 +1751,6 @@ class hipGraphEventWaitNode : public hipGraphNode {
|
||||
const hipGraphEventWaitNode* eventWaitNode = static_cast<hipGraphEventWaitNode const*>(node);
|
||||
return SetParams(eventWaitNode->event_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(hipEvent_t event) {
|
||||
amd::HostQueue* queue;
|
||||
if (!commands_.empty()) {
|
||||
queue = commands_[0]->queue();
|
||||
commands_[0]->release();
|
||||
}
|
||||
commands_.clear();
|
||||
return CreateCommand(queue);
|
||||
}
|
||||
};
|
||||
|
||||
class hipGraphHostNode : public hipGraphNode {
|
||||
@@ -1836,14 +1771,14 @@ class hipGraphHostNode : public hipGraphNode {
|
||||
return new hipGraphHostNode(static_cast<hipGraphHostNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::Command::EventWaitList waitList;
|
||||
commands_.reserve(1);
|
||||
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
|
||||
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
|
||||
commands_.emplace_back(command);
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -1885,8 +1820,6 @@ class hipGraphHostNode : public hipGraphNode {
|
||||
const hipGraphHostNode* hostNode = static_cast<hipGraphHostNode const*>(node);
|
||||
return SetParams(hostNode->pNodeParams_);
|
||||
}
|
||||
// ToDo: use this when commands are cloned and command params are to be updated
|
||||
hipError_t SetCommandParams(const hipHostNodeParams* params);
|
||||
};
|
||||
|
||||
class hipGraphEmptyNode : public hipGraphNode {
|
||||
@@ -1898,14 +1831,14 @@ class hipGraphEmptyNode : public hipGraphNode {
|
||||
return new hipGraphEmptyNode(static_cast<hipGraphEmptyNode const&>(*this));
|
||||
}
|
||||
|
||||
hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(queue);
|
||||
hipError_t CreateCommand(hip::Stream* stream) {
|
||||
hipError_t status = hipGraphNode::CreateCommand(stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::Command::EventWaitList waitList;
|
||||
commands_.reserve(1);
|
||||
amd::Command* command = new amd::Marker(*queue, !kMarkerDisableFlush, waitList);
|
||||
amd::Command* command = new amd::Marker(*stream, !kMarkerDisableFlush, waitList);
|
||||
commands_.emplace_back(command);
|
||||
return hipSuccess;
|
||||
}
|
||||
@@ -1925,8 +1858,8 @@ class hipGraphMemAllocNode : public hipGraphNode {
|
||||
return new hipGraphMemAllocNode(static_cast<hipGraphMemAllocNode const&>(*this));
|
||||
}
|
||||
|
||||
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
auto error = hipGraphNode::CreateCommand(queue);
|
||||
virtual hipError_t CreateCommand(hip::Stream* stream) {
|
||||
auto error = hipGraphNode::CreateCommand(stream);
|
||||
auto ptr = Execute(stream_);
|
||||
return error;
|
||||
}
|
||||
@@ -1966,8 +1899,8 @@ class hipGraphMemFreeNode : public hipGraphNode {
|
||||
return new hipGraphMemFreeNode(static_cast<hipGraphMemFreeNode const&>(*this));
|
||||
}
|
||||
|
||||
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
|
||||
auto error = hipGraphNode::CreateCommand(queue);
|
||||
virtual hipError_t CreateCommand(hip::Stream* stream) {
|
||||
auto error = hipGraphNode::CreateCommand(stream);
|
||||
Execute(stream_);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -94,7 +94,7 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
|
||||
HIP_RETURN(hipErrorInvalidDevice);
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = nullptr;
|
||||
hip::Stream* hip_stream = nullptr;
|
||||
amd::Device* dev = nullptr;
|
||||
bool cpu_access = false;
|
||||
|
||||
@@ -106,19 +106,19 @@ hipError_t hipMemPrefetchAsync(const void* dev_ptr, size_t count, int device,
|
||||
// Pick the specified stream or Null one from the provided device
|
||||
if (device == hipCpuDeviceId) {
|
||||
cpu_access = true;
|
||||
queue = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getQueue(stream);
|
||||
hip_stream = (stream == nullptr) ? hip::getCurrentDevice()->NullStream() : hip::getStream(stream);
|
||||
} else {
|
||||
dev = g_devices[device]->devices()[0];
|
||||
queue = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getQueue(stream);
|
||||
hip_stream = (stream == nullptr) ? g_devices[device]->NullStream() : hip::getStream(stream);
|
||||
}
|
||||
|
||||
if (queue == nullptr) {
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::SvmPrefetchAsyncCommand* command =
|
||||
new amd::SvmPrefetchAsyncCommand(*queue, waitList, dev_ptr, count, dev, cpu_access);
|
||||
new amd::SvmPrefetchAsyncCommand(*hip_stream, waitList, dev_ptr, count, dev, cpu_access);
|
||||
if (command == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
@@ -225,12 +225,11 @@ public:
|
||||
namespace hip {
|
||||
class Device;
|
||||
class MemoryPool;
|
||||
class Stream {
|
||||
class Stream : public amd::HostQueue {
|
||||
public:
|
||||
enum Priority : int { High = -1, Normal = 0, Low = 1 };
|
||||
|
||||
private:
|
||||
amd::HostQueue* queue_;
|
||||
mutable amd::Monitor lock_;
|
||||
Device* device_;
|
||||
Priority priority_;
|
||||
@@ -260,18 +259,20 @@ namespace hip {
|
||||
/// Capture events
|
||||
std::unordered_set<hipEvent_t> captureEvents_;
|
||||
unsigned long long captureID_;
|
||||
|
||||
static inline CommandQueue::Priority convertToQueuePriority(Priority p){
|
||||
return p == Priority::High ? amd::CommandQueue::Priority::High : p == Priority::Low ?
|
||||
amd::CommandQueue::Priority::Low : amd::CommandQueue::Priority::Normal;
|
||||
}
|
||||
|
||||
public:
|
||||
Stream(Device* dev, Priority p = Priority::Normal, unsigned int f = 0, bool null_stream = false,
|
||||
const std::vector<uint32_t>& cuMask = {},
|
||||
hipStreamCaptureStatus captureStatus = hipStreamCaptureStatusNone);
|
||||
~Stream();
|
||||
|
||||
/// Creates the hip stream object, including AMD host queue
|
||||
bool Create();
|
||||
|
||||
/// Get device AMD host queue object. The method can allocate the queue
|
||||
amd::HostQueue* asHostQueue(bool skip_alloc = false);
|
||||
|
||||
void Finish() const;
|
||||
virtual bool terminate() override;
|
||||
/// Get device ID associated with the current stream;
|
||||
int DeviceId() const;
|
||||
/// Get HIP device associated with the stream
|
||||
@@ -378,6 +379,7 @@ namespace hip {
|
||||
parallelCaptureStreams_.erase(it);
|
||||
}
|
||||
}
|
||||
static bool existsActiveStreamForDevice(hip::Device* device);
|
||||
};
|
||||
|
||||
/// HIP Device class
|
||||
@@ -389,7 +391,7 @@ namespace hip {
|
||||
/// Store it here so we don't have to loop through the device list every time
|
||||
int deviceId_;
|
||||
/// ROCclr host queue for default streams
|
||||
Stream null_stream_;
|
||||
Stream* null_stream_ = nullptr;
|
||||
/// Store device flags
|
||||
unsigned int flags_;
|
||||
/// Maintain list of user enabled peers
|
||||
@@ -398,7 +400,6 @@ namespace hip {
|
||||
/// True if this device is active
|
||||
bool isActive_;
|
||||
|
||||
std::vector<amd::HostQueue*> queues_;
|
||||
|
||||
MemoryPool* default_mem_pool_;
|
||||
MemoryPool* current_mem_pool_;
|
||||
@@ -408,7 +409,6 @@ namespace hip {
|
||||
public:
|
||||
Device(amd::Context* ctx, int devId): context_(ctx),
|
||||
deviceId_(devId),
|
||||
null_stream_(this, Stream::Priority::Normal, 0, true),
|
||||
flags_(hipDeviceScheduleSpin),
|
||||
isActive_(false),
|
||||
default_mem_pool_(nullptr),
|
||||
@@ -445,22 +445,16 @@ namespace hip {
|
||||
void setFlags(unsigned int flags) { flags_ = flags; }
|
||||
void Reset();
|
||||
|
||||
amd::HostQueue* NullStream(bool skip_alloc = false);
|
||||
Stream* GetNullStream();
|
||||
hip::Stream* NullStream(bool skip_alloc = false);
|
||||
Stream* GetNullStream();
|
||||
|
||||
void SaveQueue(amd::HostQueue* queue) {
|
||||
amd::ScopedLock lock(lock_);
|
||||
queues_.push_back(queue);
|
||||
}
|
||||
|
||||
bool GetActiveStatus() {
|
||||
amd::ScopedLock lock(lock_);
|
||||
if (isActive_) return true;
|
||||
for (int i = 0; i < queues_.size(); i++) {
|
||||
if (queues_[i]->GetQueueStatus()) {
|
||||
isActive_ = true;
|
||||
return true;
|
||||
}
|
||||
if (Stream::existsActiveStreamForDevice(this)) {
|
||||
isActive_ = true;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -524,11 +518,11 @@ namespace hip {
|
||||
/// Get ROCclr queue associated with hipStream
|
||||
/// Note: This follows the CUDA spec to sync with default streams
|
||||
/// and Blocking streams
|
||||
extern amd::HostQueue* getQueue(hipStream_t stream);
|
||||
extern hip::Stream* getStream(hipStream_t stream);
|
||||
/// Get default stream associated with the ROCclr context
|
||||
extern amd::HostQueue* getNullStream(amd::Context&);
|
||||
extern hip::Stream* getNullStream(amd::Context&);
|
||||
/// Get default stream of the thread
|
||||
extern amd::HostQueue* getNullStream();
|
||||
extern hip::Stream* getNullStream();
|
||||
/// Get device ID associated with the ROCclr context
|
||||
int getDeviceID(amd::Context& ctx);
|
||||
/// Check if stream is valid
|
||||
@@ -542,7 +536,7 @@ extern void WaitThenDecrementSignal(hipStream_t stream, hipError_t status, void*
|
||||
|
||||
/// Wait all active streams on the blocking queue. The method enqueues a wait command and
|
||||
/// doesn't stall the current thread
|
||||
extern void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream = false);
|
||||
extern void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream = false);
|
||||
|
||||
extern std::vector<hip::Device*> g_devices;
|
||||
extern hipError_t ihipDeviceGetCount(int* count);
|
||||
|
||||
+145
-145
@@ -78,9 +78,9 @@ hipError_t ihipFree(void *ptr) {
|
||||
auto dev = g_devices[device_id];
|
||||
// Skip stream allocation, since if it wasn't allocated until free, then the device wasn't used
|
||||
constexpr bool SkipStreamAlloc = true;
|
||||
amd::HostQueue* queue = dev->NullStream(SkipStreamAlloc);
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
hip::Stream* stream = dev->NullStream(SkipStreamAlloc);
|
||||
if (stream != nullptr) {
|
||||
stream->finish();
|
||||
}
|
||||
hip::Stream::syncNonBlockingStreams(device_id);
|
||||
// Find out if memory belongs to any memory pool
|
||||
@@ -195,15 +195,15 @@ hipError_t hipSignalExternalSemaphoresAsync(
|
||||
if (extSemArray == nullptr || paramsArray == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < numExtSems; i++) {
|
||||
if (extSemArray[i] != nullptr) {
|
||||
amd::ExternalSemaphoreCmd* command =
|
||||
new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value,
|
||||
new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value,
|
||||
amd::ExternalSemaphoreCmd::COMMAND_SIGNAL_EXTSEMAPHORE);
|
||||
if (command == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -227,15 +227,15 @@ hipError_t hipWaitExternalSemaphoresAsync(const hipExternalSemaphore_t* extSemAr
|
||||
if (extSemArray == nullptr || paramsArray == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < numExtSems; i++) {
|
||||
if (extSemArray[i] != nullptr) {
|
||||
amd::ExternalSemaphoreCmd* command =
|
||||
new amd::ExternalSemaphoreCmd(*queue, extSemArray[i], paramsArray[i].params.fence.value,
|
||||
new amd::ExternalSemaphoreCmd(*hip_stream, extSemArray[i], paramsArray[i].params.fence.value,
|
||||
amd::ExternalSemaphoreCmd::COMMAND_WAIT_EXTSEMAPHORE);
|
||||
if (command == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -343,35 +343,35 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes,
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes,
|
||||
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync) {
|
||||
hipMemcpyKind kind, hip::Stream& stream, bool isAsync) {
|
||||
amd::Command::EventWaitList waitList;
|
||||
size_t sOffset = 0;
|
||||
amd::Memory* srcMemory = getMemoryObject(src, sOffset);
|
||||
size_t dOffset = 0;
|
||||
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
|
||||
amd::Device* queueDevice = &queue.device();
|
||||
amd::Device* queueDevice = &stream.device();
|
||||
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
|
||||
if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
|
||||
amd::HostQueue* pQueue = &queue;
|
||||
hip::Stream* pStream = &stream;
|
||||
if (queueDevice != dstMemory->getContext().devices()[0]) {
|
||||
pQueue = hip::getNullStream(dstMemory->getContext());
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
pStream = hip::getNullStream(dstMemory->getContext());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
}
|
||||
command = new amd::WriteMemoryCommand(*pQueue, CL_COMMAND_WRITE_BUFFER, waitList,
|
||||
command = new amd::WriteMemoryCommand(*pStream, CL_COMMAND_WRITE_BUFFER, waitList,
|
||||
*dstMemory->asBuffer(), dOffset, sizeBytes, src, 0, 0, copyMetadata);
|
||||
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
|
||||
amd::HostQueue* pQueue = &queue;
|
||||
hip::Stream* pStream = &stream;
|
||||
if (queueDevice != srcMemory->getContext().devices()[0]) {
|
||||
pQueue = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
pStream = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
}
|
||||
command = new amd::ReadMemoryCommand(*pQueue, CL_COMMAND_READ_BUFFER, waitList,
|
||||
command = new amd::ReadMemoryCommand(*pStream, CL_COMMAND_READ_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(), sOffset, sizeBytes, dst, 0, 0, copyMetadata);
|
||||
} else if ((srcMemory != nullptr) && (dstMemory != nullptr)) {
|
||||
// Check if the queue device doesn't match the device on any memory object.
|
||||
@@ -380,7 +380,7 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
if ((srcMemory->getContext().devices()[0] != dstMemory->getContext().devices()[0]) &&
|
||||
((srcMemory->getContext().devices().size() == 1) &&
|
||||
(dstMemory->getContext().devices().size() == 1))) {
|
||||
command = new amd::CopyMemoryP2PCommand(queue, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
command = new amd::CopyMemoryP2PCommand(stream, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes);
|
||||
if (command == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -392,12 +392,12 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
} else {
|
||||
amd::HostQueue* pQueue = &queue;
|
||||
hip::Stream* pStream = &stream;
|
||||
if ((srcMemory->getContext().devices()[0] == dstMemory->getContext().devices()[0]) &&
|
||||
(queueDevice != srcMemory->getContext().devices()[0])) {
|
||||
copyMetadata.copyEnginePreference_ = amd::CopyMetadata::CopyEnginePreference::NONE;
|
||||
pQueue = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
pStream = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
@@ -405,22 +405,22 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
|
||||
// Scenarios such as DtoH where dst is pinned memory
|
||||
if ((queueDevice != srcMemory->getContext().devices()[0]) &&
|
||||
(dstMemory->getContext().devices().size() != 1)) {
|
||||
pQueue = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
pStream = hip::getNullStream(srcMemory->getContext());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
// Scenarios such as HtoD where src is pinned memory
|
||||
} else if ((queueDevice != dstMemory->getContext().devices()[0]) &&
|
||||
(srcMemory->getContext().devices().size() != 1)) {
|
||||
pQueue = hip::getNullStream(dstMemory->getContext());
|
||||
amd::Command* cmd = queue.getLastQueuedCommand(true);
|
||||
pStream = hip::getNullStream(dstMemory->getContext());
|
||||
amd::Command* cmd = stream.getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
}
|
||||
}
|
||||
}
|
||||
command = new amd::CopyMemoryCommand(*pQueue, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
command = new amd::CopyMemoryCommand(*pStream, CL_COMMAND_COPY_BUFFER, waitList,
|
||||
*srcMemory->asBuffer(), *dstMemory->asBuffer(), sOffset, dOffset, sizeBytes,
|
||||
copyMetadata);
|
||||
}
|
||||
@@ -445,13 +445,13 @@ bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue) {
|
||||
queue.finish();
|
||||
void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, hip::Stream& stream) {
|
||||
stream.finish();
|
||||
memcpy(dst, src, sizeBytes);
|
||||
}
|
||||
// ================================================================================================
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false) {
|
||||
hip::Stream& stream, bool isAsync = false) {
|
||||
hipError_t status;
|
||||
if (sizeBytes == 0) {
|
||||
// Skip if nothing needs writing.
|
||||
@@ -470,7 +470,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
size_t dOffset = 0;
|
||||
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
|
||||
if (srcMemory == nullptr && dstMemory == nullptr) {
|
||||
ihipHtoHMemcpy(dst, src, sizeBytes, queue);
|
||||
ihipHtoHMemcpy(dst, src, sizeBytes, stream);
|
||||
return hipSuccess;
|
||||
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
|
||||
isAsync = false;
|
||||
@@ -483,7 +483,7 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
isP2P = true;
|
||||
}
|
||||
amd::Command* command = nullptr;
|
||||
status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, queue, isAsync);
|
||||
status = ihipMemcpyCommand(command, dst, src, sizeBytes, kind, stream, isAsync);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -491,22 +491,22 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
|
||||
if (!isAsync) {
|
||||
command->awaitCompletion();
|
||||
} else if (isP2P) {
|
||||
amd::HostQueue* pQueue = hip::getNullStream(dstMemory->getContext());
|
||||
hip::Stream* pStream = hip::getNullStream(dstMemory->getContext());
|
||||
amd::Command::EventWaitList waitList;
|
||||
waitList.push_back(command);
|
||||
amd::Command* depdentMarker = new amd::Marker(*pQueue, false, waitList);
|
||||
amd::Command* depdentMarker = new amd::Marker(*pStream, false, waitList);
|
||||
if (depdentMarker != nullptr) {
|
||||
depdentMarker->enqueue();
|
||||
depdentMarker->release();
|
||||
}
|
||||
} else {
|
||||
amd::HostQueue* newQueue = command->queue();
|
||||
if (newQueue != &queue) {
|
||||
if (newQueue != &stream) {
|
||||
amd::Command::EventWaitList waitList;
|
||||
amd::Command* cmd = newQueue->getLastQueuedCommand(true);
|
||||
if (cmd != nullptr) {
|
||||
waitList.push_back(cmd);
|
||||
amd::Command* depdentMarker = new amd::Marker(queue, true, waitList);
|
||||
amd::Command* depdentMarker = new amd::Marker(stream, true, waitList);
|
||||
if (depdentMarker != nullptr) {
|
||||
depdentMarker->enqueue();
|
||||
depdentMarker->release();
|
||||
@@ -611,18 +611,18 @@ hipError_t hipFree(void* ptr) {
|
||||
hipError_t hipMemcpy_common(void* dst, const void* src, size_t sizeBytes,
|
||||
hipMemcpyKind kind, hipStream_t stream = nullptr) {
|
||||
CHECK_STREAM_CAPTURING();
|
||||
amd::HostQueue* queue = nullptr;
|
||||
hip::Stream* hip_stream = nullptr;
|
||||
|
||||
if (stream != nullptr) {
|
||||
queue = hip::getQueue(stream);
|
||||
hip_stream = hip::getStream(stream);
|
||||
} else {
|
||||
queue = hip::getNullStream();
|
||||
hip_stream = hip::getNullStream();
|
||||
}
|
||||
|
||||
if (queue == nullptr) {
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
return ihipMemcpy(dst, src, sizeBytes, kind, *queue);
|
||||
return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind) {
|
||||
@@ -643,12 +643,12 @@ hipError_t hipMemcpyWithStream(void* dst, const void* src, size_t sizeBytes,
|
||||
HIP_RETURN(hipErrorContextIsDestroyed);
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *queue, false));
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, false));
|
||||
}
|
||||
|
||||
hipError_t hipMemPtrGetInfo(void *ptr, size_t *size) {
|
||||
@@ -697,9 +697,9 @@ hipError_t ihipArrayDestroy(hipArray* array) {
|
||||
}
|
||||
|
||||
for (auto& dev : g_devices) {
|
||||
amd::HostQueue* queue = dev->NullStream(true);
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
hip::Stream* stream = dev->NullStream(true);
|
||||
if (stream != nullptr) {
|
||||
stream->finish();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1205,9 +1205,9 @@ hipError_t ihipHostUnregister(void* hostPtr) {
|
||||
// Wait on the device, associated with the current memory object during allocation
|
||||
auto device_id = mem->getUserData().deviceId;
|
||||
|
||||
amd::HostQueue* queue = g_devices[device_id]->NullStream(true);
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
hip::Stream* stream = g_devices[device_id]->NullStream(true);
|
||||
if (stream != nullptr) {
|
||||
stream->finish();
|
||||
}
|
||||
|
||||
amd::MemObjMap::RemoveMemObj(hostPtr);
|
||||
@@ -1392,11 +1392,11 @@ hipError_t hipMemcpyHtoD(hipDeviceptr_t dstDevice,
|
||||
size_t ByteCount) {
|
||||
HIP_INIT_API(hipMemcpyHtoD, dstDevice, srcHost, ByteCount);
|
||||
CHECK_STREAM_CAPTURING();
|
||||
amd::HostQueue* queue = hip::getQueue(nullptr);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* stream = hip::getStream(nullptr);
|
||||
if (stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *queue));
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcHost, ByteCount, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoH(void* dstHost,
|
||||
@@ -1404,11 +1404,11 @@ hipError_t hipMemcpyDtoH(void* dstHost,
|
||||
size_t ByteCount) {
|
||||
HIP_INIT_API(hipMemcpyDtoH, dstHost, srcDevice, ByteCount);
|
||||
CHECK_STREAM_CAPTURING();
|
||||
amd::HostQueue* queue = hip::getQueue(nullptr);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* stream = hip::getStream(nullptr);
|
||||
if (stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *queue));
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstHost, srcDevice, ByteCount, hipMemcpyDeviceToHost, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice,
|
||||
@@ -1416,22 +1416,22 @@ hipError_t hipMemcpyDtoD(hipDeviceptr_t dstDevice,
|
||||
size_t ByteCount) {
|
||||
HIP_INIT_API(hipMemcpyDtoD, dstDevice, srcDevice, ByteCount);
|
||||
CHECK_STREAM_CAPTURING();
|
||||
amd::HostQueue* queue = hip::getQueue(nullptr);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* stream = hip::getStream(nullptr);
|
||||
if (stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *queue));
|
||||
HIP_RETURN_DURATION(ihipMemcpy(dstDevice, srcDevice, ByteCount, hipMemcpyDeviceToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyAsync_common(void* dst, const void* src, size_t sizeBytes,
|
||||
hipMemcpyKind kind, hipStream_t stream) {
|
||||
STREAM_CAPTURE(hipMemcpyAsync, stream, dst, src, sizeBytes, kind);
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
return ihipMemcpy(dst, src, sizeBytes, kind, *queue, true);
|
||||
return ihipMemcpy(dst, src, sizeBytes, kind, *hip_stream, true);
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyAsync(void* dst, const void* src, size_t sizeBytes,
|
||||
@@ -1452,12 +1452,12 @@ hipError_t hipMemcpyHtoDAsync(hipDeviceptr_t dstDevice, void* srcHost, size_t By
|
||||
HIP_INIT_API(hipMemcpyHtoDAsync, dstDevice, srcHost, ByteCount, stream);
|
||||
hipMemcpyKind kind = hipMemcpyHostToDevice;
|
||||
STREAM_CAPTURE(hipMemcpyHtoDAsync, stream, dstDevice, srcHost, ByteCount, kind);
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(
|
||||
ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *queue, true));
|
||||
ihipMemcpy(dstDevice, srcHost, ByteCount, kind, *hip_stream, true));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice, size_t ByteCount,
|
||||
@@ -1465,12 +1465,12 @@ hipError_t hipMemcpyDtoDAsync(hipDeviceptr_t dstDevice, hipDeviceptr_t srcDevice
|
||||
HIP_INIT_API(hipMemcpyDtoDAsync, dstDevice, srcDevice, ByteCount, stream);
|
||||
hipMemcpyKind kind = hipMemcpyDeviceToDevice;
|
||||
STREAM_CAPTURE(hipMemcpyDtoDAsync, stream, dstDevice, srcDevice, ByteCount, kind);
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(
|
||||
ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *queue, true));
|
||||
ihipMemcpy(dstDevice, srcDevice, ByteCount, kind, *hip_stream, true));
|
||||
}
|
||||
|
||||
hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t ByteCount,
|
||||
@@ -1478,12 +1478,12 @@ hipError_t hipMemcpyDtoHAsync(void* dstHost, hipDeviceptr_t srcDevice, size_t By
|
||||
HIP_INIT_API(hipMemcpyDtoHAsync, dstHost, srcDevice, ByteCount, stream);
|
||||
hipMemcpyKind kind = hipMemcpyDeviceToHost;
|
||||
STREAM_CAPTURE(hipMemcpyDtoHAsync, stream, dstHost, srcDevice, ByteCount, kind);
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
HIP_RETURN_DURATION(
|
||||
ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *queue, true));
|
||||
ihipMemcpy(dstHost, srcDevice, ByteCount, kind, *hip_stream, true));
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coord3D& srcOrigin,
|
||||
@@ -1532,7 +1532,7 @@ hipError_t ihipMemcpyAtoDValidate(hipArray* srcArray, void* dstDevice, amd::Coor
|
||||
hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, void* dstDevice,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
amd::HostQueue* queue) {
|
||||
hip::Stream* stream) {
|
||||
amd::BufferRect srcRect;
|
||||
amd::BufferRect dstRect;
|
||||
amd::Memory* dstMemory;
|
||||
@@ -1544,7 +1544,7 @@ hipError_t ihipMemcpyAtoDCommand(amd::Command*& command, hipArray* srcArray, voi
|
||||
return status;
|
||||
}
|
||||
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE_TO_BUFFER,
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE_TO_BUFFER,
|
||||
amd::Command::EventWaitList{}, *srcImage, *dstMemory,
|
||||
srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
|
||||
|
||||
@@ -1606,7 +1606,7 @@ hipError_t ihipMemcpyDtoAValidate(void* srcDevice, hipArray* dstArray, amd::Coor
|
||||
hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArray* dstArray,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
amd::HostQueue* queue) {
|
||||
hip::Stream* stream) {
|
||||
amd::Image* dstImage;
|
||||
amd::Memory* srcMemory;
|
||||
amd::BufferRect dstRect;
|
||||
@@ -1617,7 +1617,7 @@ hipError_t ihipMemcpyDtoACommand(amd::Command*& command, void* srcDevice, hipArr
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
|
||||
amd::Command::EventWaitList{}, *srcMemory, *dstImage,
|
||||
srcOrigin, dstOrigin, copyRegion, srcRect, dstRect);
|
||||
|
||||
@@ -1679,7 +1679,7 @@ hipError_t ihipMemcpyDtoDValidate(void* srcDevice, void* dstDevice, amd::Coord3D
|
||||
hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void* dstDevice,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue) {
|
||||
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream) {
|
||||
amd::Memory* srcMemory;
|
||||
amd::Memory* dstMemory;
|
||||
amd::BufferRect srcRect;
|
||||
@@ -1694,7 +1694,7 @@ hipError_t ihipMemcpyDtoDCommand(amd::Command*& command, void* srcDevice, void*
|
||||
amd::Coord3D srcStart(srcRect.start_, 0, 0);
|
||||
amd::Coord3D dstStart(dstRect.start_, 0, 0);
|
||||
amd::CopyMemoryCommand* copyCommand = new amd::CopyMemoryCommand(
|
||||
*queue, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory,
|
||||
*stream, CL_COMMAND_COPY_BUFFER_RECT, amd::Command::EventWaitList{}, *srcMemory, *dstMemory,
|
||||
srcStart, dstStart, copyRegion, srcRect, dstRect);
|
||||
|
||||
if (copyCommand == nullptr) {
|
||||
@@ -1744,7 +1744,7 @@ hipError_t ihipMemcpyDtoHValidate(void* srcDevice, void* dstHost, amd::Coord3D&
|
||||
hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void* dstHost,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream,
|
||||
bool isAsync = false) {
|
||||
amd::Memory* srcMemory;
|
||||
amd::BufferRect srcRect;
|
||||
@@ -1758,7 +1758,7 @@ hipError_t ihipMemcpyDtoHCommand(amd::Command*& command, void* srcDevice, void*
|
||||
amd::Coord3D srcStart(srcRect.start_, 0, 0);
|
||||
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
|
||||
amd::ReadMemoryCommand* readCommand =
|
||||
new amd::ReadMemoryCommand(*queue, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{},
|
||||
new amd::ReadMemoryCommand(*stream, CL_COMMAND_READ_BUFFER_RECT, amd::Command::EventWaitList{},
|
||||
*srcMemory, srcStart, copyRegion, dstHost, srcRect, dstRect,
|
||||
copyMetadata);
|
||||
|
||||
@@ -1809,7 +1809,7 @@ hipError_t ihipMemcpyHtoDValidate(const void* srcHost, void* dstDevice, amd::Coo
|
||||
hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, void* dstDevice,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, amd::HostQueue* queue,
|
||||
size_t dstRowPitch, size_t dstSlicePitch, hip::Stream* stream,
|
||||
bool isAsync = false) {
|
||||
amd::Memory* dstMemory;
|
||||
amd::BufferRect srcRect;
|
||||
@@ -1824,7 +1824,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo
|
||||
amd::Coord3D dstStart(dstRect.start_, 0, 0);
|
||||
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
|
||||
amd::WriteMemoryCommand* writeCommand = new amd::WriteMemoryCommand(
|
||||
*queue, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart,
|
||||
*stream, CL_COMMAND_WRITE_BUFFER_RECT, amd::Command::EventWaitList{}, *dstMemory, dstStart,
|
||||
copyRegion, srcHost, dstRect, srcRect, copyMetadata);
|
||||
|
||||
if (writeCommand == nullptr) {
|
||||
@@ -1842,7 +1842,7 @@ hipError_t ihipMemcpyHtoDCommand(amd::Command*& command, const void* srcHost, vo
|
||||
hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOrigin,
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
|
||||
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
amd::HostQueue* queue) {
|
||||
hip::Stream* stream) {
|
||||
if ((srcHost == nullptr) || (dstHost == nullptr)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
@@ -1859,8 +1859,8 @@ hipError_t ihipMemcpyHtoH(const void* srcHost, void* dstHost, amd::Coord3D srcOr
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (queue) {
|
||||
queue->finish();
|
||||
if (stream) {
|
||||
stream->finish();
|
||||
}
|
||||
|
||||
for (size_t slice = 0; slice < copyRegion[2]; slice++) {
|
||||
@@ -1909,7 +1909,7 @@ hipError_t ihipMemcpyAtoAValidate(hipArray* srcArray, hipArray* dstArray, amd::C
|
||||
|
||||
hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hipArray* dstArray,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, amd::HostQueue* queue) {
|
||||
amd::Coord3D copyRegion, hip::Stream* stream) {
|
||||
amd::Image* srcImage;
|
||||
amd::Image* dstImage;
|
||||
|
||||
@@ -1919,7 +1919,7 @@ hipError_t ihipMemcpyAtoACommand(amd::Command*& command, hipArray* srcArray, hip
|
||||
return status;
|
||||
}
|
||||
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*queue, CL_COMMAND_COPY_IMAGE,
|
||||
amd::CopyMemoryCommand* cpyMemCmd = new amd::CopyMemoryCommand(*stream, CL_COMMAND_COPY_IMAGE,
|
||||
amd::Command::EventWaitList{}, *srcImage, *dstImage,
|
||||
srcOrigin, dstOrigin, copyRegion);
|
||||
|
||||
@@ -1968,7 +1968,7 @@ hipError_t ihipMemcpyHtoAValidate(const void* srcHost, hipArray* dstArray,
|
||||
hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hipArray* dstArray,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t srcRowPitch, size_t srcSlicePitch,
|
||||
amd::HostQueue* queue, bool isAsync = false) {
|
||||
hip::Stream* stream, bool isAsync = false) {
|
||||
amd::Image* dstImage;
|
||||
amd::BufferRect srcRect;
|
||||
|
||||
@@ -1980,7 +1980,7 @@ hipError_t ihipMemcpyHtoACommand(amd::Command*& command, const void* srcHost, hi
|
||||
|
||||
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
|
||||
amd::WriteMemoryCommand* writeMemCmd = new amd::WriteMemoryCommand(
|
||||
*queue, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin,
|
||||
*stream, CL_COMMAND_WRITE_IMAGE, amd::Command::EventWaitList{}, *dstImage, dstOrigin,
|
||||
copyRegion, static_cast<const char*>(srcHost) + srcRect.start_, srcRowPitch, srcSlicePitch,
|
||||
copyMetadata);
|
||||
|
||||
@@ -2029,7 +2029,7 @@ hipError_t ihipMemcpyAtoHValidate(hipArray* srcArray, void* dstHost, amd::Coord3
|
||||
hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, void* dstHost,
|
||||
amd::Coord3D srcOrigin, amd::Coord3D dstOrigin,
|
||||
amd::Coord3D copyRegion, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
amd::HostQueue* queue, bool isAsync = false) {
|
||||
hip::Stream* stream, bool isAsync = false) {
|
||||
amd::Image* srcImage;
|
||||
amd::BufferRect dstRect;
|
||||
amd::CopyMetadata copyMetadata(isAsync, amd::CopyMetadata::CopyEnginePreference::SDMA);
|
||||
@@ -2041,7 +2041,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi
|
||||
}
|
||||
|
||||
amd::ReadMemoryCommand* readMemCmd = new amd::ReadMemoryCommand(
|
||||
*queue, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin,
|
||||
*stream, CL_COMMAND_READ_IMAGE, amd::Command::EventWaitList{}, *srcImage, srcOrigin,
|
||||
copyRegion, static_cast<char*>(dstHost) + dstRect.start_, dstRowPitch, dstSlicePitch,
|
||||
copyMetadata);
|
||||
|
||||
@@ -2058,7 +2058,7 @@ hipError_t ihipMemcpyAtoHCommand(amd::Command*& command, hipArray* srcArray, voi
|
||||
}
|
||||
|
||||
hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3D* pCopy,
|
||||
amd::HostQueue* queue) {
|
||||
hip::Stream* stream) {
|
||||
// If {src/dst}MemoryType is hipMemoryTypeUnified, {src/dst}Device and {src/dst}Pitch specify the
|
||||
// (unified virtual address space) base address of the source data and the bytes per row to apply.
|
||||
// {src/dst}Array is ignored.
|
||||
@@ -2106,41 +2106,41 @@ hipError_t ihipGetMemcpyParam3DCommand(amd::Command*& command, const HIP_MEMCPY3
|
||||
// Host to Device.
|
||||
return ihipMemcpyHtoDCommand(command, pCopy->srcHost, pCopy->dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeHost)) {
|
||||
// Device to Host.
|
||||
return ihipMemcpyDtoHCommand(command, pCopy->srcDevice, pCopy->dstHost, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeDevice)) {
|
||||
// Device to Device.
|
||||
return ihipMemcpyDtoDCommand(command, pCopy->srcDevice, pCopy->dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, queue);
|
||||
pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight, stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeHost) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
// Host to Image.
|
||||
return ihipMemcpyHtoACommand(command, pCopy->srcHost, pCopy->dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
|
||||
queue);
|
||||
stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeHost)) {
|
||||
// Image to Host.
|
||||
return ihipMemcpyAtoHCommand(command, pCopy->srcArray, pCopy->dstHost, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight,
|
||||
queue);
|
||||
stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeDevice) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
// Device to Image.
|
||||
return ihipMemcpyDtoACommand(command, pCopy->srcDevice, pCopy->dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight,
|
||||
queue);
|
||||
stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeDevice)) {
|
||||
// Image to Device.
|
||||
return ihipMemcpyAtoDCommand(command, pCopy->srcArray, pCopy->dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, pCopy->dstPitch, pCopy->dstPitch * pCopy->dstHeight,
|
||||
queue);
|
||||
stream);
|
||||
} else if ((srcMemoryType == hipMemoryTypeArray) && (dstMemoryType == hipMemoryTypeArray)) {
|
||||
// Image to Image.
|
||||
return ihipMemcpyAtoACommand(command, pCopy->srcArray, pCopy->dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, queue);
|
||||
copyRegion, stream);
|
||||
} else {
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@@ -2212,14 +2212,14 @@ hipError_t ihipMemcpyParam3D(const HIP_MEMCPY3D* pCopy, hipStream_t stream, bool
|
||||
// Host to Host.
|
||||
return ihipMemcpyHtoH(pCopy->srcHost, pCopy->dstHost, srcOrigin, dstOrigin, copyRegion,
|
||||
pCopy->srcPitch, pCopy->srcPitch * pCopy->srcHeight, pCopy->dstPitch,
|
||||
pCopy->dstPitch * pCopy->dstHeight, hip::getQueue(stream));
|
||||
pCopy->dstPitch * pCopy->dstHeight, hip::getStream(stream));
|
||||
} else {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
status = ihipGetMemcpyParam3DCommand(command, pCopy, queue);
|
||||
status = ihipGetMemcpyParam3DCommand(command, pCopy, hip_stream);
|
||||
if (status != hipSuccess) return status;
|
||||
|
||||
// Transfers from device memory to pageable host memory and transfers from any host memory to any host memory
|
||||
@@ -2507,13 +2507,13 @@ hipError_t ihipMemcpyAtoD(hipArray* srcArray, void* dstDevice, amd::Coord3D srcO
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch,
|
||||
size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status =
|
||||
ihipMemcpyAtoDCommand(command, srcArray, dstDevice, srcOrigin, dstOrigin, copyRegion,
|
||||
dstRowPitch, dstSlicePitch, queue);
|
||||
dstRowPitch, dstSlicePitch, hip_stream);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2521,13 +2521,13 @@ hipError_t ihipMemcpyDtoA(void* srcDevice, hipArray* dstArray, amd::Coord3D srcO
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
|
||||
size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status =
|
||||
ihipMemcpyDtoACommand(command, srcDevice, dstArray, srcOrigin, dstOrigin, copyRegion,
|
||||
srcRowPitch, srcSlicePitch, queue);
|
||||
srcRowPitch, srcSlicePitch, hip_stream);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2536,13 +2536,13 @@ hipError_t ihipMemcpyDtoD(void* srcDevice, void* dstDevice, amd::Coord3D srcOrig
|
||||
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status = ihipMemcpyDtoDCommand(command, srcDevice, dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
|
||||
dstSlicePitch, queue);
|
||||
dstSlicePitch, hip_stream);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2551,13 +2551,13 @@ hipError_t ihipMemcpyDtoH(void* srcDevice, void* dstHost, amd::Coord3D srcOrigin
|
||||
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status = ihipMemcpyDtoHCommand(command, srcDevice, dstHost, srcOrigin, dstOrigin,
|
||||
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
|
||||
dstSlicePitch, queue, isAsync);
|
||||
dstSlicePitch, hip_stream, isAsync);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2566,13 +2566,13 @@ hipError_t ihipMemcpyHtoD(const void* srcHost, void* dstDevice, amd::Coord3D src
|
||||
size_t srcSlicePitch, size_t dstRowPitch, size_t dstSlicePitch,
|
||||
hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status = ihipMemcpyHtoDCommand(command, srcHost, dstDevice, srcOrigin, dstOrigin,
|
||||
copyRegion, srcRowPitch, srcSlicePitch, dstRowPitch,
|
||||
dstSlicePitch, queue, isAsync);
|
||||
dstSlicePitch, hip_stream, isAsync);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2580,12 +2580,12 @@ hipError_t ihipMemcpyAtoA(hipArray* srcArray, hipArray* dstArray, amd::Coord3D s
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, hipStream_t stream,
|
||||
bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status = ihipMemcpyAtoACommand(command, srcArray, dstArray, srcOrigin, dstOrigin,
|
||||
copyRegion, queue);
|
||||
copyRegion, hip_stream);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2593,13 +2593,13 @@ hipError_t ihipMemcpyHtoA(const void* srcHost, hipArray* dstArray, amd::Coord3D
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t srcRowPitch,
|
||||
size_t srcSlicePitch, hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status =
|
||||
ihipMemcpyHtoACommand(command, srcHost, dstArray, srcOrigin, dstOrigin, copyRegion,
|
||||
srcRowPitch, srcSlicePitch, queue, isAsync);
|
||||
srcRowPitch, srcSlicePitch, hip_stream, isAsync);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2607,13 +2607,13 @@ hipError_t ihipMemcpyAtoH(hipArray* srcArray, void* dstHost, amd::Coord3D srcOri
|
||||
amd::Coord3D dstOrigin, amd::Coord3D copyRegion, size_t dstRowPitch,
|
||||
size_t dstSlicePitch, hipStream_t stream, bool isAsync = false) {
|
||||
amd::Command* command;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
if (queue == nullptr) {
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
if (hip_stream == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
hipError_t status =
|
||||
ihipMemcpyAtoHCommand(command, srcArray, dstHost, srcOrigin, dstOrigin, copyRegion,
|
||||
dstRowPitch, dstSlicePitch, queue, isAsync);
|
||||
dstRowPitch, dstSlicePitch, hip_stream, isAsync);
|
||||
if (status != hipSuccess) return status;
|
||||
return ihipMemcpyCmdEnqueue(command, isAsync);
|
||||
}
|
||||
@@ -2673,9 +2673,9 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) {
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpy3DCommand(amd::Command*& command, const hipMemcpy3DParms* p,
|
||||
amd::HostQueue* queue) {
|
||||
hip::Stream* stream) {
|
||||
const HIP_MEMCPY3D desc = hip::getDrvMemcpy3DDesc(*p);
|
||||
return ihipGetMemcpyParam3DCommand(command, &desc, queue);
|
||||
return ihipGetMemcpyParam3DCommand(command, &desc, stream);
|
||||
}
|
||||
|
||||
hipError_t ihipMemcpy3D(const hipMemcpy3DParms* p, hipStream_t stream, bool isAsync = false) {
|
||||
@@ -2733,8 +2733,8 @@ hipError_t hipDrvMemcpy3DAsync(const HIP_MEMCPY3D* pCopy, hipStream_t stream) {
|
||||
|
||||
hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, size_t offset,
|
||||
int64_t value, size_t valueSize, size_t sizeBytes,
|
||||
amd::HostQueue* queue) {
|
||||
if ((memory == nullptr) || (queue == nullptr)) {
|
||||
hip::Stream* stream) {
|
||||
if ((memory == nullptr) || (stream == nullptr)) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -2744,7 +2744,7 @@ hipError_t packFillMemoryCommand(amd::Command*& command, amd::Memory* memory, si
|
||||
// surface=[pitch, width, height]
|
||||
amd::Coord3D surface(sizeBytes, sizeBytes, 1);
|
||||
amd::FillMemoryCommand* fillMemCommand =
|
||||
new amd::FillMemoryCommand(*queue, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
|
||||
new amd::FillMemoryCommand(*stream, CL_COMMAND_FILL_BUFFER, waitList, *memory->asBuffer(),
|
||||
&value, valueSize, fillOffset, fillSize, surface);
|
||||
if (fillMemCommand == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -2810,7 +2810,7 @@ hipError_t ihipGraphMemsetParams_validate(const hipMemsetParams* pNodeParams) {
|
||||
}
|
||||
|
||||
hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, int64_t value,
|
||||
size_t valueSize, size_t sizeBytes, amd::HostQueue* queue) {
|
||||
size_t valueSize, size_t sizeBytes, hip::Stream* stream) {
|
||||
hipError_t hip_error = hipSuccess;
|
||||
auto aligned_dst = amd::alignUp(reinterpret_cast<address>(dst), sizeof(uint64_t));
|
||||
size_t offset = 0;
|
||||
@@ -2820,7 +2820,7 @@ hipError_t ihipMemsetCommand(std::vector<amd::Command*>& commands, void* dst, in
|
||||
amd::Command* command;
|
||||
|
||||
hip_error = packFillMemoryCommand(command, memory, offset, value, valueSize, sizeBytes,
|
||||
queue);
|
||||
stream);
|
||||
commands.push_back(command);
|
||||
|
||||
return hip_error;
|
||||
@@ -2854,8 +2854,8 @@ hipError_t ihipMemset(void* dst, int64_t value, size_t valueSize, size_t sizeByt
|
||||
}
|
||||
}
|
||||
std::vector<amd::Command*> commands;
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, queue);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
hip_error = ihipMemsetCommand(commands, dst, value, valueSize, sizeBytes, hip_stream);
|
||||
if (hip_error != hipSuccess) {
|
||||
break;
|
||||
}
|
||||
@@ -2972,13 +2972,13 @@ hipError_t ihipMemset3D_validate(hipPitchedPtr pitchedDevPtr, int value, hipExte
|
||||
}
|
||||
|
||||
hipError_t ihipMemset3DCommand(std::vector<amd::Command*> &commands, hipPitchedPtr pitchedDevPtr,
|
||||
int value, hipExtent extent, amd::HostQueue* queue, size_t elementSize = 1) {
|
||||
int value, hipExtent extent, hip::Stream* stream, size_t elementSize = 1) {
|
||||
size_t offset = 0;
|
||||
auto sizeBytes = extent.width * extent.height * extent.depth;
|
||||
amd::Memory* memory = getMemoryObject(pitchedDevPtr.ptr, offset);
|
||||
if (pitchedDevPtr.pitch == extent.width) {
|
||||
return ihipMemsetCommand(commands, pitchedDevPtr.ptr, value, elementSize,
|
||||
static_cast<size_t>(sizeBytes), queue);
|
||||
static_cast<size_t>(sizeBytes), stream);
|
||||
}
|
||||
// Workaround for cases when pitch > row until fill kernel will be updated to support pitch.
|
||||
// Fall back to filling one row at a time.
|
||||
@@ -2994,7 +2994,7 @@ hipError_t ihipMemset3DCommand(std::vector<amd::Command*> &commands, hipPitchedP
|
||||
}
|
||||
amd::FillMemoryCommand* command;
|
||||
command = new amd::FillMemoryCommand(
|
||||
*queue, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(),
|
||||
*stream, CL_COMMAND_FILL_BUFFER, amd::Command::EventWaitList{}, *memory->asBuffer(),
|
||||
&value, elementSize, origin, region, surface);
|
||||
commands.push_back(command);
|
||||
return hipSuccess;
|
||||
@@ -3025,9 +3025,9 @@ hipError_t ihipMemset3D(hipPitchedPtr pitchedDevPtr, int value, hipExtent extent
|
||||
isAsync = true;
|
||||
}
|
||||
}
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
std::vector<amd::Command*> commands;
|
||||
status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, queue);
|
||||
status = ihipMemset3DCommand(commands, pitchedDevPtr, value, extent, hip_stream);
|
||||
if (status != hipSuccess) {
|
||||
return status;
|
||||
}
|
||||
@@ -3946,9 +3946,9 @@ hipError_t ihipMipmappedArrayDestroy(hipMipmappedArray_t mipmapped_array_ptr) {
|
||||
}
|
||||
|
||||
for (auto& dev : g_devices) {
|
||||
amd::HostQueue* queue = dev->NullStream(true);
|
||||
if (queue != nullptr) {
|
||||
queue->finish();
|
||||
hip::Stream* stream = dev->NullStream(true);
|
||||
if (stream != nullptr) {
|
||||
stream->finish();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -305,7 +305,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
uint32_t globalWorkSizeX, uint32_t globalWorkSizeY,
|
||||
uint32_t globalWorkSizeZ, uint32_t blockDimX, uint32_t blockDimY,
|
||||
uint32_t blockDimZ, uint32_t sharedMemBytes,
|
||||
amd::HostQueue* queue, void** kernelParams, void** extra,
|
||||
hip::Stream* stream, void** kernelParams, void** extra,
|
||||
hipEvent_t startEvent = nullptr, hipEvent_t stopEvent = nullptr,
|
||||
uint32_t flags = 0, uint32_t params = 0, uint32_t gridId = 0,
|
||||
uint32_t numGrids = 0, uint64_t prevGridSum = 0,
|
||||
@@ -328,7 +328,7 @@ hipError_t ihipLaunchKernelCommand(amd::Command*& command, hipFunction_t f,
|
||||
}
|
||||
|
||||
amd::NDRangeKernelCommand* kernelCommand = new amd::NDRangeKernelCommand(
|
||||
*queue, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum,
|
||||
*stream, waitList, *kernel, ndrange, sharedMemBytes, params, gridId, numGrids, prevGridSum,
|
||||
allGridSum, firstDevice, profileNDRange);
|
||||
if (!kernelCommand) {
|
||||
return hipErrorOutOfMemory;
|
||||
@@ -371,9 +371,9 @@ hipError_t ihipModuleLaunchKernel(hipFunction_t f, uint32_t globalWorkSizeX,
|
||||
return status;
|
||||
}
|
||||
amd::Command* command = nullptr;
|
||||
amd::HostQueue* queue = hip::getQueue(hStream);
|
||||
hip::Stream* hip_stream = hip::getStream(hStream);
|
||||
status = ihipLaunchKernelCommand(command, f, globalWorkSizeX, globalWorkSizeY, globalWorkSizeZ,
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, queue,
|
||||
blockDimX, blockDimY, blockDimZ, sharedMemBytes, hip_stream,
|
||||
kernelParams, extra, startEvent, stopEvent, flags, params,
|
||||
gridId, numGrids, prevGridSum, allGridSum, firstDevice);
|
||||
if (status != hipSuccess) {
|
||||
@@ -544,8 +544,8 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
|
||||
}
|
||||
if (launch.hStream != nullptr) {
|
||||
// Validate devices to make sure it dosn't have duplicates
|
||||
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.hStream)->asHostQueue();
|
||||
auto device = &queue->vdev()->device();
|
||||
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(launch.hStream);
|
||||
auto device = &hip_stream->vdev()->device();
|
||||
for (int j = 0; j < numDevices; ++j) {
|
||||
if (mgpu_list[j] == device) {
|
||||
return hipErrorInvalidDevice;
|
||||
@@ -562,23 +562,23 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
|
||||
// Sync the execution streams on all devices
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPreSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream)->asHostQueue();
|
||||
queue->finish();
|
||||
hip::Stream* hip_stream =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream);
|
||||
hip_stream->finish();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
const hipFunctionLaunchParams& launch = launchParamsList[i];
|
||||
amd::HostQueue* queue = reinterpret_cast<hip::Stream*>(launch.hStream)->asHostQueue();
|
||||
hip::Stream* hip_stream = reinterpret_cast<hip::Stream*>(launch.hStream);
|
||||
|
||||
if (i == 0) {
|
||||
// The order of devices in the launch may not match the order in the global array
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
// Find the matching device
|
||||
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
|
||||
if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) {
|
||||
// Save ROCclr index of the first device in the launch
|
||||
firstDevice = queue->vdev()->device().index();
|
||||
firstDevice = hip_stream->vdev()->device().index();
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -608,9 +608,9 @@ hipError_t ihipModuleLaunchCooperativeKernelMultiDevice(hipFunctionLaunchParams*
|
||||
// Sync the execution streams on all devices
|
||||
if ((flags & hipCooperativeLaunchMultiDeviceNoPostSync) == 0) {
|
||||
for (int i = 0; i < numDevices; ++i) {
|
||||
amd::HostQueue* queue =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream)->asHostQueue();
|
||||
queue->finish();
|
||||
hip::Stream* hip_stream =
|
||||
reinterpret_cast<hip::Stream*>(launchParamsList[i].hStream);
|
||||
hip_stream->finish();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -739,12 +739,12 @@ hipError_t ihipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsL
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(launch.stream);
|
||||
hip::Stream* hip_stream = hip::getStream(launch.stream);
|
||||
hipFunction_t func = nullptr;
|
||||
// The order of devices in the launch may not match the order in the global array
|
||||
for (size_t dev = 0; dev < g_devices.size(); ++dev) {
|
||||
// Find the matching device and request the kernel function
|
||||
if (&queue->vdev()->device() == g_devices[dev]->devices()[0]) {
|
||||
if (&hip_stream->vdev()->device() == g_devices[dev]->devices()[0]) {
|
||||
IHIP_RETURN_ONFAIL(PlatformState::instance().getStatFunc(&func, launch.func, dev));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ PlatformState* PlatformState::platform_; // Initiaized as nullptr by default
|
||||
// forward declaration of methods required for __hipRegisrterManagedVar
|
||||
hipError_t ihipMallocManaged(void** ptr, size_t size, unsigned int align = 0);
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
hip::Stream& stream, bool isAsync = false);
|
||||
|
||||
struct __CudaFatBinaryWrapper {
|
||||
unsigned int magic;
|
||||
@@ -146,9 +146,9 @@ extern "C" void __hipRegisterManagedVar(
|
||||
HIP_INIT_VOID();
|
||||
hipError_t status = ihipMallocManaged(pointer, size, align);
|
||||
if (status == hipSuccess) {
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
if (queue != nullptr) {
|
||||
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *queue);
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
if (stream != nullptr) {
|
||||
status = ihipMemcpy(*pointer, init_value, size, hipMemcpyHostToDevice, *stream);
|
||||
guarantee((status == hipSuccess), "Error during memcpy to managed memory!");
|
||||
} else {
|
||||
ClPrint(amd::LOG_ERROR, amd::LOG_API, "Host Queue is NULL");
|
||||
|
||||
@@ -31,7 +31,8 @@ namespace hip {
|
||||
// ================================================================================================
|
||||
Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream,
|
||||
const std::vector<uint32_t>& cuMask, hipStreamCaptureStatus captureStatus)
|
||||
: queue_(nullptr),
|
||||
: amd::HostQueue(*dev->asContext(), *dev->devices()[0], 0, amd::CommandQueue::RealTimeDisabled,
|
||||
convertToQueuePriority(p), cuMask),
|
||||
lock_("Stream Callback lock"),
|
||||
device_(dev),
|
||||
priority_(p),
|
||||
@@ -40,18 +41,11 @@ Stream::Stream(hip::Device* dev, Priority p, unsigned int f, bool null_stream,
|
||||
cuMask_(cuMask),
|
||||
captureStatus_(captureStatus),
|
||||
originStream_(false),
|
||||
captureID_(0) {}
|
||||
|
||||
// ================================================================================================
|
||||
Stream::~Stream() {
|
||||
if (queue_ != nullptr) {
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
streamSet.erase(this);
|
||||
|
||||
queue_->release();
|
||||
queue_ = nullptr;
|
||||
}
|
||||
}
|
||||
captureID_(0)
|
||||
{
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
streamSet.insert(this);
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
hipError_t Stream::EndCapture() {
|
||||
@@ -77,38 +71,16 @@ hipError_t Stream::EndCapture() {
|
||||
|
||||
// ================================================================================================
|
||||
bool Stream::Create() {
|
||||
amd::CommandQueue::Priority p;
|
||||
switch (priority_) {
|
||||
case Priority::High:
|
||||
p = amd::CommandQueue::Priority::High;
|
||||
break;
|
||||
case Priority::Low:
|
||||
p = amd::CommandQueue::Priority::Low;
|
||||
break;
|
||||
case Priority::Normal:
|
||||
default:
|
||||
p = amd::CommandQueue::Priority::Normal;
|
||||
break;
|
||||
}
|
||||
amd::HostQueue* queue = new amd::HostQueue(*device_->asContext(), *device_->devices()[0],
|
||||
0, amd::CommandQueue::RealTimeDisabled,
|
||||
p, cuMask_);
|
||||
return create();
|
||||
}
|
||||
|
||||
// Create a host queue
|
||||
bool result = (queue != nullptr) ? queue->create() : false;
|
||||
// Insert just created stream into the list of the blocking queues
|
||||
if (result) {
|
||||
// ================================================================================================
|
||||
bool Stream::terminate() {
|
||||
{
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
streamSet.insert(this);
|
||||
queue_ = queue;
|
||||
device_->SaveQueue(queue);
|
||||
} else if (queue != nullptr) {
|
||||
// Queue creation has failed, and virtual device associated with the queue may not be created.
|
||||
// Just need to delete the queue instance.
|
||||
delete queue;
|
||||
streamSet.erase(this);
|
||||
}
|
||||
|
||||
return result;
|
||||
return HostQueue::terminate();
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
@@ -130,29 +102,6 @@ bool isValid(hipStream_t& stream) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
amd::HostQueue* Stream::asHostQueue(bool skip_alloc) {
|
||||
if (queue_ != nullptr) {
|
||||
return queue_;
|
||||
}
|
||||
// Access to the stream object is lock protected, because possible allocation
|
||||
amd::ScopedLock l(Lock());
|
||||
if (queue_ == nullptr) {
|
||||
// Create the host queue for the first time
|
||||
if (!skip_alloc) {
|
||||
Create();
|
||||
}
|
||||
}
|
||||
return queue_;
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
void Stream::Finish() const {
|
||||
if (queue_ != nullptr) {
|
||||
queue_->finish();
|
||||
}
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
int Stream::DeviceId() const {
|
||||
return device_->deviceId();
|
||||
@@ -176,7 +125,7 @@ void Stream::syncNonBlockingStreams(int deviceId) {
|
||||
for (auto& it : streamSet) {
|
||||
if (it->Flags() & hipStreamNonBlocking) {
|
||||
if (it->DeviceId() == deviceId) {
|
||||
it->asHostQueue()->finish();
|
||||
it->finish();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -203,7 +152,7 @@ void Stream::destroyAllStreams(int deviceId) {
|
||||
}
|
||||
}
|
||||
for (auto& it : toBeDeleted) {
|
||||
delete it;
|
||||
it->release();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,36 +160,48 @@ bool Stream::StreamCaptureOngoing(void) {
|
||||
return (g_allCapturingStreams.empty() == true) ? false : true;
|
||||
}
|
||||
|
||||
bool Stream::existsActiveStreamForDevice(hip::Device* device) {
|
||||
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
for (const auto& active_stream : streamSet) {
|
||||
if ((active_stream->GetDevice() == device) &&
|
||||
active_stream->GetQueueStatus()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
};// hip namespace
|
||||
|
||||
// ================================================================================================
|
||||
void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream) {
|
||||
void iHipWaitActiveStreams(hip::Stream* blocking_stream, bool wait_null_stream) {
|
||||
amd::Command::EventWaitList eventWaitList(0);
|
||||
bool submitMarker = 0;
|
||||
{
|
||||
amd::ScopedLock lock(streamSetLock);
|
||||
|
||||
for (const auto& stream : streamSet) {
|
||||
amd::HostQueue* active_queue = stream->asHostQueue();
|
||||
for (const auto& active_stream : streamSet) {
|
||||
// If it's the current device
|
||||
if ((&active_queue->device() == &blocking_queue->device()) &&
|
||||
if ((&active_stream->device() == &blocking_stream->device()) &&
|
||||
// Make sure it's a default stream
|
||||
((stream->Flags() & hipStreamNonBlocking) == 0) &&
|
||||
((active_stream->Flags() & hipStreamNonBlocking) == 0) &&
|
||||
// and it's not the current stream
|
||||
(active_queue != blocking_queue) &&
|
||||
(active_stream != blocking_stream) &&
|
||||
// check for a wait on the null stream
|
||||
(stream->Null() == wait_null_stream)) {
|
||||
(active_stream->Null() == wait_null_stream)) {
|
||||
// Get the last valid command
|
||||
amd::Command* command = active_queue->getLastQueuedCommand(true);
|
||||
amd::Command* command = active_stream->getLastQueuedCommand(true);
|
||||
if (command != nullptr) {
|
||||
amd::Event& event = command->event();
|
||||
// Check HW status of the ROCcrl event.
|
||||
// Note: not all ROCclr modes support HW status
|
||||
bool ready = active_queue->device().IsHwEventReady(event);
|
||||
bool ready = active_stream->device().IsHwEventReady(event);
|
||||
if (!ready) {
|
||||
ready = (command->status() == CL_COMPLETE);
|
||||
}
|
||||
submitMarker |= active_queue->vdev()->isFenceDirty();
|
||||
submitMarker |= active_stream->vdev()->isFenceDirty();
|
||||
// Check the current active status
|
||||
if (!ready) {
|
||||
command->notifyCmdQueue();
|
||||
@@ -259,7 +220,7 @@ void iHipWaitActiveStreams(amd::HostQueue* blocking_queue, bool wait_null_stream
|
||||
|
||||
// Check if we have to wait anything
|
||||
if (eventWaitList.size() > 0 || submitMarker) {
|
||||
amd::Command* command = new amd::Marker(*blocking_queue, kMarkerDisableFlush, eventWaitList);
|
||||
amd::Command* command = new amd::Marker(*blocking_stream, kMarkerDisableFlush, eventWaitList);
|
||||
if (command != nullptr) {
|
||||
command->enqueue();
|
||||
command->release();
|
||||
@@ -288,8 +249,11 @@ static hipError_t ihipStreamCreate(hipStream_t* stream,
|
||||
}
|
||||
hip::Stream* hStream = new hip::Stream(hip::getCurrentDevice(), priority, flags, false, cuMask);
|
||||
|
||||
if (hStream == nullptr || !hStream->Create()) {
|
||||
delete hStream;
|
||||
if (hStream == nullptr) {
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
else if (!hStream->Create()) {
|
||||
hStream->release();
|
||||
return hipErrorOutOfMemory;
|
||||
}
|
||||
|
||||
@@ -310,7 +274,7 @@ stream_per_thread::stream_per_thread() {
|
||||
stream_per_thread::~stream_per_thread() {
|
||||
for (auto &stream:m_streams) {
|
||||
if (stream != nullptr && hip::isValid(stream)) {
|
||||
delete reinterpret_cast<hip::Stream*>(stream);
|
||||
reinterpret_cast<hip::Stream*>(stream)->release();
|
||||
stream = nullptr;
|
||||
}
|
||||
}
|
||||
@@ -449,7 +413,7 @@ hipError_t hipStreamSynchronize_common(hipStream_t stream) {
|
||||
}
|
||||
}
|
||||
// Wait for the current host queue
|
||||
hip::getQueue(stream)->finish();
|
||||
hip::getStream(stream)->finish();
|
||||
return hipSuccess;
|
||||
}
|
||||
|
||||
@@ -498,7 +462,7 @@ hipError_t hipStreamDestroy(hipStream_t stream) {
|
||||
if (l_it != hip::tls.capture_streams_.end()) {
|
||||
hip::tls.capture_streams_.erase(l_it);
|
||||
}
|
||||
delete s;
|
||||
s->release();
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
@@ -564,9 +528,9 @@ hipError_t hipStreamQuery_common(hipStream_t stream) {
|
||||
HIP_RETURN(hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
}
|
||||
amd::HostQueue* hostQueue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
|
||||
amd::Command* command = hostQueue->getLastQueuedCommand(true);
|
||||
amd::Command* command = hip_stream->getLastQueuedCommand(true);
|
||||
if (command == nullptr) {
|
||||
// Nothing was submitted to the queue
|
||||
return hipSuccess;
|
||||
@@ -604,13 +568,13 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void*
|
||||
return hipErrorContextIsDestroyed;
|
||||
}
|
||||
|
||||
amd::HostQueue* hostQueue = hip::getQueue(stream);
|
||||
amd::Command* last_command = hostQueue->getLastQueuedCommand(true);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
amd::Command* last_command = hip_stream->getLastQueuedCommand(true);
|
||||
amd::Command::EventWaitList eventWaitList;
|
||||
if (last_command != nullptr) {
|
||||
eventWaitList.push_back(last_command);
|
||||
}
|
||||
amd::Command* command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList);
|
||||
amd::Command* command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList);
|
||||
if (command == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
@@ -630,7 +594,7 @@ hipError_t streamCallback_common(hipStream_t stream, StreamCallback* cbo, void*
|
||||
// Add the new barrier to stall the stream, until the callback is done
|
||||
eventWaitList.clear();
|
||||
eventWaitList.push_back(command);
|
||||
amd::Command* block_command = new amd::Marker(*hostQueue, !kMarkerDisableFlush, eventWaitList);
|
||||
amd::Command* block_command = new amd::Marker(*hip_stream, !kMarkerDisableFlush, eventWaitList);
|
||||
if (block_command == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
@@ -69,11 +69,11 @@ hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
amd::HostQueue* queue = hip::getQueue(stream);
|
||||
hip::Stream* hip_stream = hip::getStream(stream);
|
||||
amd::Command::EventWaitList waitList;
|
||||
|
||||
amd::StreamOperationCommand* command =
|
||||
new amd::StreamOperationCommand(*queue, cmdType, waitList, *memory->asBuffer(),
|
||||
new amd::StreamOperationCommand(*hip_stream, cmdType, waitList, *memory->asBuffer(),
|
||||
value, mask, outFlags, offset, sizeBytes);
|
||||
|
||||
if (command == nullptr) {
|
||||
|
||||
@@ -26,7 +26,7 @@
|
||||
#include "platform/sampler.hpp"
|
||||
|
||||
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
||||
amd::HostQueue& queue, bool isAsync = false);
|
||||
hip::Stream& stream, bool isAsync = false);
|
||||
|
||||
hipError_t ihipFree(void* ptr);
|
||||
|
||||
@@ -575,8 +575,8 @@ hipError_t hipBindTexture2D(size_t* offset,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToArray(const textureReference* texref,
|
||||
@@ -624,8 +624,8 @@ hipError_t hipBindTextureToArray(const textureReference* texref,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t ihipBindTextureToMipmappedArray(const textureReference* texref,
|
||||
@@ -674,8 +674,8 @@ hipError_t hipBindTextureToMipmappedArray(const textureReference* texref,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipUnbindTexture(const textureReference* texref) {
|
||||
@@ -701,8 +701,8 @@ hipError_t hipBindTexture(size_t* offset,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texref, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipGetChannelDesc(hipChannelFormatDesc* desc,
|
||||
@@ -966,8 +966,8 @@ hipError_t hipTexRefSetArray(textureReference* texRef,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipTexRefGetAddress(hipDeviceptr_t* dptr,
|
||||
@@ -1049,8 +1049,8 @@ hipError_t hipTexRefSetAddress(size_t* ByteOffset,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipTexRefSetAddress2D(textureReference* texRef,
|
||||
@@ -1091,8 +1091,8 @@ hipError_t hipTexRefSetAddress2D(textureReference* texRef,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipChannelFormatDesc hipCreateChannelDesc(int x, int y, int z, int w, hipChannelFormatKind f) {
|
||||
@@ -1454,8 +1454,8 @@ hipError_t hipTexRefSetMipmappedArray(textureReference* texRef,
|
||||
HIP_RETURN(err);
|
||||
}
|
||||
// Copy to device.
|
||||
amd::HostQueue* queue = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *queue));
|
||||
hip::Stream* stream = hip::getNullStream();
|
||||
HIP_RETURN(ihipMemcpy(refDevPtr, texRef, refDevSize, hipMemcpyHostToDevice, *stream));
|
||||
}
|
||||
|
||||
hipError_t hipTexObjectCreate(hipTextureObject_t* pTexObject,
|
||||
|
||||
Ссылка в новой задаче
Block a user