From 2c010dec17914bf9b795d0abcebc9fb399a8db2d Mon Sep 17 00:00:00 2001 From: sdashmiz Date: Mon, 25 Apr 2022 13:42:17 -0400 Subject: [PATCH] SWDEV-325711: Add userobject functions for graph - add user obj APIs for creating release and retain of user onbjects Signed-off-by: sdashmiz Change-Id: I0bf2999c77e44269565b27c31c7c1461f8a160a2 --- hipamd/include/hip/amd_detail/hip_prof_str.h | 130 +++++++++++++++++- .../nvidia_detail/nvidia_hip_runtime_api.h | 24 ++++ hipamd/src/amdhip.def | 6 + hipamd/src/hip_graph.cpp | 68 ++++++++- hipamd/src/hip_graph_internal.cpp | 4 +- hipamd/src/hip_graph_internal.hpp | 86 +++++++++++- hipamd/src/hip_hcc.def.in | 7 +- hipamd/src/hip_hcc.map.in | 5 + 8 files changed, 323 insertions(+), 7 deletions(-) diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 5be425374a..f592a64a81 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -6,7 +6,6 @@ #ifndef _HIP_PROF_STR_H #define _HIP_PROF_STR_H #define HIP_PROF_VER 1 - // HIP API callbacks ID enumeration enum hip_api_id_t { HIP_API_ID_NONE = 0, @@ -354,7 +353,12 @@ enum hip_api_id_t { HIP_API_ID_hipDriverGetVersion = 341, HIP_API_ID_hipGraphUpload = 342, HIP_API_ID_hipRuntimeGetVersion = 343, - HIP_API_ID_LAST = 343, + HIP_API_ID_hipUserObjectCreate = 344, + HIP_API_ID_hipUserObjectRelease = 345, + HIP_API_ID_hipUserObjectRetain = 346, + HIP_API_ID_hipGraphRetainUserObject = 347, + HIP_API_ID_hipGraphReleaseUserObject = 348, + HIP_API_ID_LAST = 348, HIP_API_ID_hipArray3DGetDescriptor = HIP_API_ID_NONE, HIP_API_ID_hipArrayGetDescriptor = HIP_API_ID_NONE, @@ -760,6 +764,11 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipTexRefSetMipmappedArray: return "hipTexRefSetMipmappedArray"; case HIP_API_ID_hipThreadExchangeStreamCaptureMode: return "hipThreadExchangeStreamCaptureMode"; case HIP_API_ID_hipWaitExternalSemaphoresAsync: return "hipWaitExternalSemaphoresAsync"; + case HIP_API_ID_hipUserObjectCreate: return "hipUserObjectCreate"; + case HIP_API_ID_hipUserObjectRelease: return "hipUserObjectRelease"; + case HIP_API_ID_hipUserObjectRetain: return "hipUserObjectRetain"; + case HIP_API_ID_hipGraphRetainUserObject: return "hipGraphRetainUserObject"; + case HIP_API_ID_hipGraphReleaseUserObject: return "hipGraphReleaseUserObject"; }; return "unknown"; }; @@ -1106,6 +1115,11 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipTexRefSetMipmappedArray", name) == 0) return HIP_API_ID_hipTexRefSetMipmappedArray; if (strcmp("hipThreadExchangeStreamCaptureMode", name) == 0) return HIP_API_ID_hipThreadExchangeStreamCaptureMode; if (strcmp("hipWaitExternalSemaphoresAsync", name) == 0) return HIP_API_ID_hipWaitExternalSemaphoresAsync; + if (strcmp("hipUserObjectCreate", name) == 0) return HIP_API_ID_hipUserObjectCreate; + if (strcmp("hipUserObjectRelease", name) == 0) return HIP_API_ID_hipUserObjectRelease; + if (strcmp("hipUserObjectRetain", name) == 0) return HIP_API_ID_hipUserObjectRetain; + if (strcmp("hipGraphRetainUserObject", name) == 0) return HIP_API_ID_hipGraphRetainUserObject; + if (strcmp("hipGraphReleaseUserObject", name) == 0) return HIP_API_ID_hipGraphReleaseUserObject; return HIP_API_ID_NONE; } @@ -3100,6 +3114,34 @@ typedef struct hip_api_data_s { unsigned int numExtSems; hipStream_t stream; } hipWaitExternalSemaphoresAsync; + struct { + hipUserObject_t* object_out; + hipUserObject_t object_out__val; + void* ptr; + hipHostFn_t destroy; + unsigned int initialRefcount; + unsigned int flags; + } hipUserObjectCreate; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRelease; + struct { + hipUserObject_t object; + unsigned int count; + } hipUserObjectRetain; + + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + unsigned int flags; + } hipGraphRetainUserObject; + struct { + hipGraph_t graph; + hipUserObject_t object; + unsigned int count; + } hipGraphReleaseUserObject; } args; } hip_api_data_t; @@ -5134,6 +5176,37 @@ typedef struct hip_api_data_s { cb_data.args.hipWaitExternalSemaphoresAsync.numExtSems = (unsigned int)numExtSems; \ cb_data.args.hipWaitExternalSemaphoresAsync.stream = (hipStream_t)stream; \ }; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr'), ('hipHostFn_t', 'destroy'), ('unsigned int', 'initialRefcount'), ('unsigned int', 'flags')] +#define INIT_hipUserObjectCreate_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectCreate.object_out = (hipUserObject_t*)object_out; \ + cb_data.args.hipUserObjectCreate.ptr = (void*)ptr; \ + cb_data.args.hipUserObjectCreate.destroy = (hipHostFn_t)destroy; \ + cb_data.args.hipUserObjectCreate.initialRefcount = (unsigned int)initialRefcount; \ + cb_data.args.hipUserObjectCreate.flags = (unsigned int)flags; \ +}; +// hipUserObjectRelease[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRelease_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRelease.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRelease.count = (unsigned int)count; \ +}; +// hipUserObjectRetain[('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipUserObjectRetain_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipUserObjectRetain.object = (hipUserObject_t)object; \ + cb_data.args.hipUserObjectRetain.count = (unsigned int)count; \ +}; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] +#define INIT_hipGraphRetainUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphRetainUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphRetainUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphRetainUserObject.count = (unsigned int)count; \ + cb_data.args.hipGraphRetainUserObject.flags = (unsigned int)flags; \ +}; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] +#define INIT_hipGraphReleaseUserObject_CB_ARGS_DATA(cb_data) { \ + cb_data.args.hipGraphReleaseUserObject.graph = (hipGraph_t)graph; \ + cb_data.args.hipGraphReleaseUserObject.object = (hipUserObject_t)object; \ + cb_data.args.hipGraphReleaseUserObject.count = (unsigned int)count; \ +}; #define INIT_CB_ARGS_DATA(cb_id, cb_data) INIT_##cb_id##_CB_ARGS_DATA(cb_data) // Macros for non-public API primitives @@ -6585,6 +6658,22 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { if (data->args.hipWaitExternalSemaphoresAsync.extSemArray) data->args.hipWaitExternalSemaphoresAsync.extSemArray__val = *(data->args.hipWaitExternalSemaphoresAsync.extSemArray); if (data->args.hipWaitExternalSemaphoresAsync.paramsArray) data->args.hipWaitExternalSemaphoresAsync.paramsArray__val = *(data->args.hipWaitExternalSemaphoresAsync.paramsArray); break; +// hipUserObjectCreate[('hipUserObject_t*', 'object_out'), ('void*', 'ptr')] + case HIP_API_ID_hipUserObjectCreate: + if (data->args.hipUserObjectCreate.object_out) data->args.hipUserObjectCreate.object_out__val = *(data->args.hipUserObjectCreate.object_out); + break; +// hipUserObjectRelease[('hipUserObject_t', 'object')] + case HIP_API_ID_hipUserObjectRelease: + break; +// hipUserObjectRetain[('hipUserObject_t', 'object')] + case HIP_API_ID_hipUserObjectRetain: + break; +// hipGraphRetainUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count'), ('unsigned int', 'flags')] + case HIP_API_ID_hipGraphRetainUserObject: + break; +// hipGraphReleaseUserObject[('hipGraph_t', 'graph'), ('hipUserObject_t', 'object'), ('unsigned int', 'count')] + case HIP_API_ID_hipGraphReleaseUserObject: + break; default: break; }; } @@ -9275,6 +9364,43 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << ", stream=" << data->args.hipWaitExternalSemaphoresAsync.stream; oss << ")"; break; + case HIP_API_ID_hipUserObjectCreate: + oss << "hipUserObjectCreate("; + if (data->args.hipUserObjectCreate.object_out == NULL) oss << "object_out=NULL"; + else oss << "object_out=" << data->args.hipUserObjectCreate.object_out__val; + oss << ", ptr=" << data->args.hipUserObjectCreate.ptr; + oss << ", destroy=" << data->args.hipUserObjectCreate.destroy; + oss << ", initialRefcount=" << data->args.hipUserObjectCreate.initialRefcount; + oss << ", flags=" << data->args.hipUserObjectCreate.flags; + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRelease: + oss << "hipUserObjectRelease("; + oss << "object=" << data->args.hipUserObjectRelease.object; + oss << ", count=" << data->args.hipUserObjectRelease.count; + oss << ")"; + break; + case HIP_API_ID_hipUserObjectRetain: + oss << "hipUserObjectRetain("; + oss << "object=" << data->args.hipUserObjectRetain.object; + oss << ", count=" << data->args.hipUserObjectRetain.count; + oss << ")"; + break; + case HIP_API_ID_hipGraphRetainUserObject: + oss << "hipGraphRetainUserObject("; + oss << "graph=" << data->args.hipGraphRetainUserObject.graph; + oss << ", object=" << data->args.hipGraphRetainUserObject.object; + oss << ", count=" << data->args.hipGraphRetainUserObject.count; + oss << ", flags=" << data->args.hipGraphRetainUserObject.flags; + oss << ")"; + break; + case HIP_API_ID_hipGraphReleaseUserObject: + oss << "hipGraphReleaseUserObject("; + oss << "graph=" << data->args.hipGraphReleaseUserObject.graph; + oss << ", object=" << data->args.hipGraphReleaseUserObject.object; + oss << ", count=" << data->args.hipGraphReleaseUserObject.count; + oss << ")"; + break; default: oss << "unknown"; }; return strdup(oss.str().c_str()); diff --git a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h index 6fdf4dc2d6..b6c0a93323 100644 --- a/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +++ b/hipamd/include/hip/nvidia_detail/nvidia_hip_runtime_api.h @@ -1107,6 +1107,7 @@ inline static enum cudaChannelFormatKind hipChannelFormatKindToCudaChannelFormat typedef cudaGraph_t hipGraph_t; typedef cudaGraphNode_t hipGraphNode_t; typedef cudaGraphExec_t hipGraphExec_t; +typedef cudaUserObject_t hipUserObject_t; typedef enum cudaGraphNodeType hipGraphNodeType; #define hipGraphNodeTypeKernel cudaGraphNodeTypeKernel @@ -3232,6 +3233,29 @@ inline static hipError_t hipDeviceSetGraphMemAttribute(int device, hipGraphMemAt inline static hipError_t hipDeviceGraphMemTrim(int device) { return hipCUDAErrorTohipError(cudaDeviceGraphMemTrim(device)); } + +inline static hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags) { + return hipCUDAErrorTohipError(cudaUserObjectCreate(object_out, ptr, destroy, initialRefcount, flags)); +} + + +inline static hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); +} + + +inline static hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaUserObjectRelease(object, count)); +} + +inline static hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1), unsigned int flags __dparm(0)) { + return hipCUDAErrorTohipError(cudaGraphRetainUserObject(graph, object, count, flags)); +} + +inline static hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count __dparm(1)) { + return hipCUDAErrorTohipError(cudaGraphReleaseUserObject(graph, object, count)); +} #endif inline static hipError_t hipGraphHostNodeSetParams(hipGraphNode_t node, diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index 4b81564a12..a6890da096 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -427,3 +427,9 @@ hipMemcpy2DFromArrayAsync_spt hipMemcpy2DToArrayAsync_spt hipDrvGetErrorName hipDrvGetErrorString +hipUserObjectCreate +hipUserObjectRelease +hipUserObjectRetain +hipGraphRetainUserObject +hipGraphReleaseUserObject + diff --git a/hipamd/src/hip_graph.cpp b/hipamd/src/hip_graph.cpp index dfdabce860..007779ccb6 100644 --- a/hipamd/src/hip_graph.cpp +++ b/hipamd/src/hip_graph.cpp @@ -23,6 +23,7 @@ #include "hip_conversions.hpp" #include "hip_platform.hpp" #include "hip_event.hpp" +#include "top.hpp" std::vector g_captureStreams; amd::Monitor g_captureStreamsLock{"StreamCaptureGlobalList"}; @@ -1017,10 +1018,12 @@ hipError_t ihipGraphInstantiate(hipGraphExec_t* pGraphExec, hipGraph_t graph) { } std::vector> parallelLists; std::unordered_map> nodeWaitLists; + std::unordered_set graphExeUserObj; clonedGraph->GetRunList(parallelLists, nodeWaitLists); std::vector levelOrder; clonedGraph->LevelOrder(levelOrder); - *pGraphExec = new hipGraphExec(levelOrder, parallelLists, nodeWaitLists, clonedNodes); + clonedGraph->GetUserObjs(graphExeUserObj); + *pGraphExec = new hipGraphExec(levelOrder, parallelLists, nodeWaitLists, clonedNodes, graphExeUserObj); if (*pGraphExec != nullptr) { return (*pGraphExec)->Init(); } else { @@ -1965,4 +1968,65 @@ hipError_t hipDeviceGraphMemTrim(int device) { } // not implemented yet return HIP_RETURN(hipSuccess); -} \ No newline at end of file +} + +hipError_t hipUserObjectCreate(hipUserObject_t* object_out, void* ptr, hipHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) { + HIP_INIT_API(hipUserObjectCreate, object_out, ptr, destroy, initialRefcount, flags); + if (object_out == nullptr || flags != hipUserObjectNoDestructorSync) { + HIP_RETURN(hipErrorInvalidValue); + } + + *object_out = new hipUserObject(destroy, ptr, flags); + //! Creating object adds one reference. + if (initialRefcount > 1) { + (*object_out)->increaseRefCount(static_cast(initialRefcount - 1)); + } + HIP_RETURN(hipSuccess); +} + +hipError_t hipUserObjectRelease(hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipUserObjectRelease, object, count); + if (object == nullptr || !hipUserObject::isUserObjvalid(object) || (object->referenceCount() < count)) { + HIP_RETURN(hipErrorInvalidValue); + } + object->decreaseRefCount(count); + HIP_RETURN(hipSuccess); +} + +hipError_t hipUserObjectRetain(hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipUserObjectRetain, object, count); + if (object == nullptr || !hipUserObject::isUserObjvalid(object)) { + HIP_RETURN(hipErrorInvalidValue); + } + object->increaseRefCount(count); + HIP_RETURN(hipSuccess); +} + +hipError_t hipGraphRetainUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count, unsigned int flags) { + HIP_INIT_API(hipGraphRetainUserObject, graph, object, count, flags); + hipError_t status = hipSuccess; + if (graph == nullptr || object == nullptr || !hipUserObject::isUserObjvalid(object)) { + HIP_RETURN(hipErrorInvalidValue); + } + if (flags != hipGraphUserObjectMove) { + status = hipUserObjectRetain(object, count); + if (status != hipSuccess) { + HIP_RETURN(status); + } + } + graph->addUserObjGraph(object); + HIP_RETURN(status); +} + +hipError_t hipGraphReleaseUserObject(hipGraph_t graph, hipUserObject_t object, unsigned int count) { + HIP_INIT_API(hipGraphReleaseUserObject, graph, object, count); + if (graph == nullptr || object == nullptr || !ihipGraph::isUserObjGraphValid(object)) { + HIP_RETURN(hipErrorInvalidValue); + } + //! Obj is being destroyed + if (object->referenceCount() == count) { + graph->RemoveUserObjGraph(object); + } + hipError_t status = hipUserObjectRelease(object, count); + HIP_RETURN(status); +} diff --git a/hipamd/src/hip_graph_internal.cpp b/hipamd/src/hip_graph_internal.cpp index c2efb6480f..93beca5327 100644 --- a/hipamd/src/hip_graph_internal.cpp +++ b/hipamd/src/hip_graph_internal.cpp @@ -53,6 +53,9 @@ std::unordered_set ihipGraph::graphSet_; amd::Monitor ihipGraph::graphSetLock_{"Guards global graph set"}; std::unordered_set hipGraphExec::graphExecSet_; amd::Monitor hipGraphExec::graphExecSetLock_{"Guards global exec graph set"}; +std::unordered_set hipUserObject::ObjectSet_; +amd::Monitor hipUserObject::UserObjectLock_{"Guards global user object"}; +std::unordered_set ihipGraph::graphUserObj_; hipError_t hipGraphMemcpyNode1D::ValidateParams(void* dst, const void* src, size_t count, hipMemcpyKind kind) { @@ -635,7 +638,6 @@ void ihipGraph::LevelOrder(std::vector& levelOrder) { const ihipGraph* ihipGraph::getOriginalGraph() const { return pOriginalGraph_; } - void ihipGraph::setOriginalGraph(const ihipGraph* pOriginalGraph) { pOriginalGraph_ = pOriginalGraph; } diff --git a/hipamd/src/hip_graph_internal.hpp b/hipamd/src/hip_graph_internal.hpp index 886a7d3a84..43ddfdbb72 100644 --- a/hipamd/src/hip_graph_internal.hpp +++ b/hipamd/src/hip_graph_internal.hpp @@ -43,6 +43,55 @@ hipError_t FillCommands(std::vector>& parallelLists, void UpdateQueue(std::vector>& parallelLists, amd::HostQueue*& queue, hipGraphExec* ptr); +struct hipUserObject : public amd::ReferenceCountedObject { + typedef void(*UserCallbackDestructor)(void* data); + static std::unordered_set ObjectSet_; + static amd::Monitor UserObjectLock_; + public: + hipUserObject(UserCallbackDestructor callback, void* data, unsigned int flags) : ReferenceCountedObject(), + callback_(callback), data_(data), flags_(flags) { + amd::ScopedLock lock(UserObjectLock_); + ObjectSet_.insert(this); + } + + virtual ~hipUserObject() { + amd::ScopedLock lock(UserObjectLock_); + if (callback_ != nullptr) { + callback_(data_); + } + ObjectSet_.erase(this); + } + + void increaseRefCount(const unsigned int refCount) { + for (uint32_t i = 0; i < refCount; i++) { + retain(); + } + } + + void decreaseRefCount(const unsigned int refCount) { + assert((refCount <= referenceCount()) && "count is bigger than refcount"); + for (uint32_t i = 0; i < refCount; i++) { + release(); + } + } + + static bool isUserObjvalid(hipUserObject* pUsertObj) { + amd::ScopedLock lock(UserObjectLock_); + if (ObjectSet_.find(pUsertObj) == ObjectSet_.end()) { + return false; + } + return true; + } + + private: + UserCallbackDestructor callback_; + void* data_; + unsigned int flags_; + //! Disable default operator= + hipUserObject& operator=(const hipUserObject&) = delete; + //! Disable copy constructor + hipUserObject(const hipUserObject& obj) = delete; +}; struct hipGraphNode { protected: amd::HostQueue* queue_; @@ -237,6 +286,7 @@ struct ihipGraph { const ihipGraph* pOriginalGraph_ = nullptr; static std::unordered_set graphSet_; static amd::Monitor graphSetLock_; + static std::unordered_set graphUserObj_; public: ihipGraph() { @@ -250,6 +300,9 @@ struct ihipGraph { } amd::ScopedLock lock(graphSetLock_); graphSet_.erase(this); + for (auto userobj : graphUserObj_) { + userobj->release(); + } }; // check graphs validity @@ -272,6 +325,24 @@ struct ihipGraph { std::vector> GetEdges() const; // returns the original graph ptr if cloned const ihipGraph* getOriginalGraph() const; + // Add user obj resource to graph + void addUserObjGraph(hipUserObject* pUserObj) { + amd::ScopedLock lock(graphSetLock_); + graphUserObj_.insert(pUserObj); + } + // Check user obj resource from graph is valid + static bool isUserObjGraphValid(hipUserObject* pUserObj) { + amd::ScopedLock lock(graphSetLock_); + if (graphUserObj_.find(pUserObj) == graphUserObj_.end()) { + return false; + } + return true; + } + // Delete user obj resource from graph + void RemoveUserObjGraph(hipUserObject* pUserObj) { + amd::ScopedLock lock(graphSetLock_); + graphUserObj_.erase(pUserObj); + } // saves the original graph ptr if cloned void setOriginalGraph(const ihipGraph* pOriginalGraph); @@ -281,6 +352,14 @@ struct ihipGraph { void GetRunList(std::vector>& parallelLists, std::unordered_map>& dependencies); void LevelOrder(std::vector& levelOrder); + void GetUserObjs( std::unordered_set& graphExeUserObjs) { + for(auto userObj : graphUserObj_) + { + amd::ScopedLock lock(graphSetLock_); + userObj->retain(); + graphExeUserObjs.insert(userObj); + } + } ihipGraph* clone(std::unordered_map& clonedNodes) const; ihipGraph* clone() const; }; @@ -295,17 +374,19 @@ struct hipGraphExec { std::unordered_map clonedNodes_; amd::Command* lastEnqueuedCommand_; static std::unordered_set graphExecSet_; + std::unordered_set graphExeUserObj_; static amd::Monitor graphExecSetLock_; public: hipGraphExec(std::vector& levelOrder, std::vector>& lists, std::unordered_map>& nodeWaitLists, - std::unordered_map& clonedNodes) + std::unordered_map& clonedNodes, std::unordered_set& userObjs) : parallelLists_(lists), levelOrder_(levelOrder), nodeWaitLists_(nodeWaitLists), clonedNodes_(clonedNodes), lastEnqueuedCommand_(nullptr), + graphExeUserObj_(userObjs), currentQueueIndex_(0) { amd::ScopedLock lock(graphExecSetLock_); graphExecSet_.insert(this); @@ -319,6 +400,9 @@ struct hipGraphExec { } for (auto it = clonedNodes_.begin(); it != clonedNodes_.end(); it++) delete it->second; amd::ScopedLock lock(graphExecSetLock_); + for (auto userobj : graphExeUserObj_) { + userobj->release(); + } graphExecSet_.erase(this); } diff --git a/hipamd/src/hip_hcc.def.in b/hipamd/src/hip_hcc.def.in index 9ceda6bd6c..73be353b75 100644 --- a/hipamd/src/hip_hcc.def.in +++ b/hipamd/src/hip_hcc.def.in @@ -440,4 +440,9 @@ hipMemcpyFromArray_spt hipMemcpy2DToArray_spt hipMemcpy2DFromArrayAsync_spt hipDrvGetErrorName -hipDrvGetErrorString \ No newline at end of file +hipDrvGetErrorString +hipUserObjectCreate +hipUserObjectRelease +hipUserObjectRetain +hipGraphRetainUserObject +hipGraphReleaseUserObject diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index caae48fd90..4500c3829a 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -441,6 +441,11 @@ global: hipMemRetainAllocationHandle; hipMemSetAccess; hipMemUnmap; + hipUserObjectCreate; + hipUserObjectRelease; + hipUserObjectRetain; + hipGraphRetainUserObject; + hipGraphReleaseUserObject; local: *; } hip_5.0;