From 0f49c4a97ff22694fc15ed57401f832a020a2bcd Mon Sep 17 00:00:00 2001 From: "Manocha, Rahul" Date: Fri, 15 Aug 2025 12:51:35 -0700 Subject: [PATCH] SWDEV-545952 - API definitions for hipStreamSet/GetAttribute (#831) Co-authored-by: Rahul Manocha --- CHANGELOG.md | 2 + .../include/hip/amd_detail/hip_api_trace.hpp | 7 ++ hipamd/include/hip/amd_detail/hip_prof_str.h | 50 ++++++++++++- hipamd/src/amdhip.def | 2 + hipamd/src/hip_api_trace.cpp | 14 ++-- hipamd/src/hip_event.cpp | 8 ++- hipamd/src/hip_event.hpp | 4 +- hipamd/src/hip_hcc.map.in | 2 + hipamd/src/hip_stream.cpp | 72 +++++++++++++++++++ hipamd/src/hip_table_interface.cpp | 10 ++- rocclr/device/device.hpp | 14 +++- rocclr/device/rocm/rocdevice.cpp | 8 ++- rocclr/device/rocm/rocdevice.hpp | 6 +- rocclr/device/rocm/rocvirtual.hpp | 6 +- rocclr/platform/commandqueue.cpp | 6 +- rocclr/platform/commandqueue.hpp | 9 +++ 16 files changed, 200 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb0e7b19da..63ef0c856b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs - `hipMemsetD2D16Async` Used for setting 2D memory range with specified 16-bit values asynchronously - `hipMemsetD2D32` Used for setting 2D memory range with specified 32-bit values - `hipMemsetD2D32Async` Used for setting 2D memory range with specified 32-bit values asynchronously + - `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream + - `hipStreamGetAttribute` returns attributes such as priority for a given stream ## HIP 7.0 for ROCm 7.0 diff --git a/hipamd/include/hip/amd_detail/hip_api_trace.hpp b/hipamd/include/hip/amd_detail/hip_api_trace.hpp index de4273250a..59f27677c5 100644 --- a/hipamd/include/hip/amd_detail/hip_api_trace.hpp +++ b/hipamd/include/hip/amd_detail/hip_api_trace.hpp @@ -1059,6 +1059,11 @@ typedef hipError_t (*t_hipMemsetD2D32)(hipDeviceptr_t dst, size_t dstPitch, unsi size_t width, size_t height); typedef hipError_t (*t_hipMemsetD2D32Async)(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, size_t width, size_t height, hipStream_t stream); +typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue *value); +typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue *value_out); + // HIP Compiler dispatch table struct HipCompilerDispatchTable { // HIP_COMPILER_API_TABLE_STEP_VERSION == 0 @@ -1608,6 +1613,8 @@ struct HipDispatchTable { t_hipMemsetD2D16Async hipMemsetD2D16Async_fn; t_hipMemsetD2D32 hipMemsetD2D32_fn; t_hipMemsetD2D32Async hipMemsetD2D32Async_fn; + t_hipStreamGetAttribute hipStreamGetAttribute_fn; + t_hipStreamSetAttribute hipStreamSetAttribute_fn; // HIP_RUNTIME_API_TABLE_STEP_VERSION = 14 // removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams diff --git a/hipamd/include/hip/amd_detail/hip_prof_str.h b/hipamd/include/hip/amd_detail/hip_prof_str.h index 4179556414..6a10b8342f 100644 --- a/hipamd/include/hip/amd_detail/hip_prof_str.h +++ b/hipamd/include/hip/amd_detail/hip_prof_str.h @@ -445,7 +445,9 @@ enum hip_api_id_t { HIP_API_ID_hipMemsetD2D32Async = 425, HIP_API_ID_hipMemsetD2D8 = 426, HIP_API_ID_hipMemsetD2D8Async = 427, - HIP_API_ID_LAST = 427, + HIP_API_ID_hipStreamGetAttribute = 428, + HIP_API_ID_hipStreamSetAttribute = 429, + HIP_API_ID_LAST = 429, HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice), HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties), @@ -860,6 +862,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority"; case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy"; case HIP_API_ID_hipStreamEndCapture: return "hipStreamEndCapture"; + case HIP_API_ID_hipStreamGetAttribute: return "hipStreamGetAttribute"; case HIP_API_ID_hipStreamGetCaptureInfo: return "hipStreamGetCaptureInfo"; case HIP_API_ID_hipStreamGetCaptureInfo_v2: return "hipStreamGetCaptureInfo_v2"; case HIP_API_ID_hipStreamGetDevice: return "hipStreamGetDevice"; @@ -867,6 +870,7 @@ static inline const char* hip_api_name(const uint32_t id) { case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority"; case HIP_API_ID_hipStreamIsCapturing: return "hipStreamIsCapturing"; case HIP_API_ID_hipStreamQuery: return "hipStreamQuery"; + case HIP_API_ID_hipStreamSetAttribute: return "hipStreamSetAttribute"; case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize"; case HIP_API_ID_hipStreamUpdateCaptureDependencies: return "hipStreamUpdateCaptureDependencies"; case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent"; @@ -1282,6 +1286,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority; if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy; if (strcmp("hipStreamEndCapture", name) == 0) return HIP_API_ID_hipStreamEndCapture; + if (strcmp("hipStreamGetAttribute", name) == 0) return HIP_API_ID_hipStreamGetAttribute; if (strcmp("hipStreamGetCaptureInfo", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo; if (strcmp("hipStreamGetCaptureInfo_v2", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo_v2; if (strcmp("hipStreamGetDevice", name) == 0) return HIP_API_ID_hipStreamGetDevice; @@ -1289,6 +1294,7 @@ static inline uint32_t hipApiIdByName(const char* name) { if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority; if (strcmp("hipStreamIsCapturing", name) == 0) return HIP_API_ID_hipStreamIsCapturing; if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery; + if (strcmp("hipStreamSetAttribute", name) == 0) return HIP_API_ID_hipStreamSetAttribute; if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize; if (strcmp("hipStreamUpdateCaptureDependencies", name) == 0) return HIP_API_ID_hipStreamUpdateCaptureDependencies; if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent; @@ -3591,6 +3597,12 @@ typedef struct hip_api_data_s { hipGraph_t* pGraph; hipGraph_t pGraph__val; } hipStreamEndCapture; + struct { + hipStream_t stream; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value_out; + hipLaunchAttributeValue value_out__val; + } hipStreamGetAttribute; struct { hipStream_t stream; hipStreamCaptureStatus* pCaptureStatus; @@ -3634,6 +3646,12 @@ typedef struct hip_api_data_s { struct { hipStream_t stream; } hipStreamQuery; + struct { + hipStream_t stream; + hipLaunchAttributeID attr; + const hipLaunchAttributeValue* value; + hipLaunchAttributeValue value__val; + } hipStreamSetAttribute; struct { hipStream_t stream; } hipStreamSynchronize; @@ -6138,6 +6156,9 @@ typedef struct hip_api_data_s { cb_data.args.hipStreamEndCapture.stream = (hipStream_t)stream; \ cb_data.args.hipStreamEndCapture.pGraph = (hipGraph_t*)pGraph; \ }; +// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')] +#define INIT_hipStreamGetAttribute_CB_ARGS_DATA(cb_data) { \ +}; // hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] #define INIT_hipStreamGetCaptureInfo_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipStreamGetCaptureInfo.stream = (hipStream_t)stream; \ @@ -6177,6 +6198,9 @@ typedef struct hip_api_data_s { #define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \ }; +// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] +#define INIT_hipStreamSetAttribute_CB_ARGS_DATA(cb_data) { \ +}; // hipStreamSynchronize[('hipStream_t', 'stream')] #define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \ cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \ @@ -7889,6 +7913,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { case HIP_API_ID_hipStreamEndCapture: if (data->args.hipStreamEndCapture.pGraph) data->args.hipStreamEndCapture.pGraph__val = *(data->args.hipStreamEndCapture.pGraph); break; +// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')] + case HIP_API_ID_hipStreamGetAttribute: + if (data->args.hipStreamGetAttribute.value_out) data->args.hipStreamGetAttribute.value_out__val = *(data->args.hipStreamGetAttribute.value_out); + break; // hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')] case HIP_API_ID_hipStreamGetCaptureInfo: if (data->args.hipStreamGetCaptureInfo.pCaptureStatus) data->args.hipStreamGetCaptureInfo.pCaptureStatus__val = *(data->args.hipStreamGetCaptureInfo.pCaptureStatus); @@ -7921,6 +7949,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) { // hipStreamQuery[('hipStream_t', 'stream')] case HIP_API_ID_hipStreamQuery: break; +// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')] + case HIP_API_ID_hipStreamSetAttribute: + if (data->args.hipStreamSetAttribute.value) data->args.hipStreamSetAttribute.value__val = *(data->args.hipStreamSetAttribute.value); + break; // hipStreamSynchronize[('hipStream_t', 'stream')] case HIP_API_ID_hipStreamSynchronize: break; @@ -11094,6 +11126,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.pGraph__val); } oss << ")"; break; + case HIP_API_ID_hipStreamGetAttribute: + oss << "hipStreamGetAttribute("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.stream); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.attr); + if (data->args.hipStreamGetAttribute.value_out == NULL) oss << ", value_out=NULL"; + else { oss << ", value_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.value_out__val); } + oss << ")"; + break; case HIP_API_ID_hipStreamGetCaptureInfo: oss << "hipStreamGetCaptureInfo("; oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.stream); @@ -11151,6 +11191,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamQuery.stream); oss << ")"; break; + case HIP_API_ID_hipStreamSetAttribute: + oss << "hipStreamSetAttribute("; + oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.stream); + oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.attr); + if (data->args.hipStreamSetAttribute.value == NULL) oss << ", value=NULL"; + else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.value__val); } + oss << ")"; + break; case HIP_API_ID_hipStreamSynchronize: oss << "hipStreamSynchronize("; oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSynchronize.stream); diff --git a/hipamd/src/amdhip.def b/hipamd/src/amdhip.def index a93edcca5d..1f0372e078 100644 --- a/hipamd/src/amdhip.def +++ b/hipamd/src/amdhip.def @@ -499,3 +499,5 @@ hipMemsetD2D16 hipMemsetD2D16Async hipMemsetD2D32 hipMemsetD2D32Async +hipStreamGetAttribute +hipStreamSetAttribute diff --git a/hipamd/src/hip_api_trace.cpp b/hipamd/src/hip_api_trace.cpp index 1e4f4ca22d..72cbaba390 100644 --- a/hipamd/src/hip_api_trace.cpp +++ b/hipamd/src/hip_api_trace.cpp @@ -828,7 +828,7 @@ hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, unsigne hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args); hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params, void** extra); -hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, hipMemRangeHandleType handleType, unsigned long long flags); hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width, @@ -843,7 +843,10 @@ hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int valu size_t height); hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value, size_t width, size_t height, hipStream_t stream); - +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue *value); +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue *value); } // namespace hip namespace hip { @@ -1366,6 +1369,8 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) { ptrDispatchTable->hipMemsetD2D16Async_fn = hip::hipMemsetD2D16Async; ptrDispatchTable->hipMemsetD2D32_fn = hip::hipMemsetD2D32; ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async; + ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute; + ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute; } #if HIP_ROCPROFILER_REGISTER > 0 @@ -2017,14 +2022,15 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16_fn, 480); HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16Async_fn, 481); HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32_fn, 482); HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483); - +HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484); +HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485); // if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below // will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.: // // HIP_ENFORCE_ABI(, , 8) // // HIP_ENFORCE_ABI_VERSIONING(
, 9) <- 8 + 1 = 9 -HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 484) +HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 486) static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14, "If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function " diff --git a/hipamd/src/hip_event.cpp b/hipamd/src/hip_event.cpp index 786fac7edc..f18b2febc5 100644 --- a/hipamd/src/hip_event.cpp +++ b/hipamd/src/hip_event.cpp @@ -76,7 +76,9 @@ hipError_t Event::synchronize() { auto hip_device = g_devices[deviceId()]; // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status static constexpr bool kWaitCompletion = true; - if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, flags_)) { + amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : + amd::SyncPolicy::Auto; + if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, policy)) { event_->awaitCompletion(); } return hipSuccess; @@ -88,7 +90,9 @@ bool Event::awaitEventCompletion() { } bool EventDD::awaitEventCompletion() { - return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, flags_); + amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : + amd::SyncPolicy::Auto; + return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, policy); } hipError_t Event::elapsedTime(Event& eStop, float& ms) { diff --git a/hipamd/src/hip_event.hpp b/hipamd/src/hip_event.hpp index 33d80dfbf2..eed64a8701 100644 --- a/hipamd/src/hip_event.hpp +++ b/hipamd/src/hip_event.hpp @@ -98,7 +98,9 @@ class Event { std::vector nodesPrevToRecorded_; protected: bool CheckHwEvent() { - return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, flags_); + amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking : + amd::SyncPolicy::Auto; + return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, policy); } public: diff --git a/hipamd/src/hip_hcc.map.in b/hipamd/src/hip_hcc.map.in index f4ad7dba11..d5f1ab6610 100644 --- a/hipamd/src/hip_hcc.map.in +++ b/hipamd/src/hip_hcc.map.in @@ -615,6 +615,8 @@ global: hipMemsetD2D16Async; hipMemsetD2D32; hipMemsetD2D32Async; + hipStreamGetAttribute; + hipStreamSetAttribute; local: *; } hip_6.5; \ No newline at end of file diff --git a/hipamd/src/hip_stream.cpp b/hipamd/src/hip_stream.cpp index 4c0b778b78..9d4bfddafe 100644 --- a/hipamd/src/hip_stream.cpp +++ b/hipamd/src/hip_stream.cpp @@ -820,4 +820,76 @@ hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) { HIP_RETURN(hipSuccess); } +// ================================================================================================ +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue *value) { + HIP_INIT_API(hipStreamSetAttribute, stream, attr, value); + hipError_t status = hipSuccess; + if (value == nullptr) { + HIP_RETURN(hipErrorInvalidValue); + } + + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + getStreamPerThread(stream); + + // if stream is capturing, don't allow changing stream attributes + if (hip::Stream::StreamCaptureOngoing(stream) == true) { + HIP_RETURN(hipErrorStreamCaptureUnsupported); + } + + hip::Stream* s = reinterpret_cast(stream); + + switch (attr) { + case hipStreamAttributeSynchronizationPolicy: { + hipSynchronizationPolicy syncPolicy = value->syncPolicy; + // validate sync policy + if (syncPolicy < hipSyncPolicyAuto || syncPolicy > hipSyncPolicyBlockingSync) { + HIP_RETURN(hipErrorInvalidValue); + } + s->SetSyncPolicy(static_cast(syncPolicy)); + break; + } + default: { + HIP_RETURN(hipErrorInvalidValue); + } + } + + HIP_RETURN(hipSuccess); +} + +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue *value_out) { + HIP_INIT_API(hipStreamGetAttribute, stream, attr, value_out); + + if (value_out == nullptr) { + return hipErrorInvalidValue; + } + + if (!hip::isValid(stream)) { + HIP_RETURN(hipErrorInvalidResourceHandle); + } + + getStreamPerThread(stream); + + hip::Stream* s = reinterpret_cast(stream); + + switch(attr) { + case hipStreamAttributeSynchronizationPolicy: { + value_out->syncPolicy = static_cast(s->GetSyncPolicy()); + break; + } + case hipStreamAttributePriority: { + value_out->priority = s->GetPriority(); + break; + } + default: { + HIP_RETURN(hipErrorInvalidValue); + } + } + + HIP_RETURN(hipSuccess); +} } // hip namespace diff --git a/hipamd/src/hip_table_interface.cpp b/hipamd/src/hip_table_interface.cpp index dd4b1996c6..614f0a6eed 100644 --- a/hipamd/src/hip_table_interface.cpp +++ b/hipamd/src/hip_table_interface.cpp @@ -1883,7 +1883,7 @@ hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f return hip::GetHipDispatchTable()->hipDrvLaunchKernelEx_fn(config, f, kernel, extra); } -hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, +hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size, hipMemRangeHandleType handleType, unsigned long long flags) { return hip::GetHipDispatchTable()->hipMemGetHandleForAddressRange_fn(handle, dptr, size, @@ -1915,4 +1915,12 @@ hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int size_t height, hipStream_t stream) { return hip::GetHipDispatchTable()->hipMemsetD2D32Async_fn(dst, dstPitch, value, width, height, stream); +} +hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr, + const hipStreamAttrValue *value) { + return hip::GetHipDispatchTable()->hipStreamSetAttribute_fn(stream, attr, value); +} +hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr, + hipStreamAttrValue *value) { + return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value); } \ No newline at end of file diff --git a/rocclr/device/device.hpp b/rocclr/device/device.hpp index 6ad0dc7c9f..fc8f952c09 100644 --- a/rocclr/device/device.hpp +++ b/rocclr/device/device.hpp @@ -1383,6 +1383,14 @@ class VirtualDevice : public amd::HeapObject { namespace amd { /*! IHIP IPC MEMORY Structure */ #define AMD_IPC_MEM_HANDLE_SIZE 32 + +typedef enum SyncPolicy { + Auto = 1, + Spin = 2, + Yield = 3, + Blocking = 4 +} SyncPolicy; + //! MemoryObject map lookup class class MemObjMap : public AllStatic { public: @@ -2050,9 +2058,9 @@ class Device : public RuntimeObject { // Returns the status of HW event, associated with amd::Event virtual bool IsHwEventReady( - const amd::Event& event, //!< AMD event for HW status validation - bool wait = false, //!< If true then forces the event completion - uint32_t hip_event_flags = 0 //!< flags associated with the event. 0 = hipEventDefault + const amd::Event& event, //!< AMD event for HW status validation + bool wait = false, //!< If true then forces the event completion + amd::SyncPolicy policy = amd::SyncPolicy::Auto ) const { return false; }; diff --git a/rocclr/device/rocm/rocdevice.cpp b/rocclr/device/rocm/rocdevice.cpp index 3274cb22f3..7621d30412 100644 --- a/rocclr/device/rocm/rocdevice.cpp +++ b/rocclr/device/rocm/rocdevice.cpp @@ -2840,7 +2840,7 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI } // ================================================================================================ -bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_event_flags) const { +bool Device::IsHwEventReady(const amd::Event& event, bool wait, amd::SyncPolicy policy) const { void* hw_event = (event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent(); if (hw_event == nullptr) { @@ -2851,8 +2851,10 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve // when set the CPU gives up host thread for other work // when not set the CPU enters a busy-wait on the event to occur constexpr int kHipEventBlockingSync = 0x1; - bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait(); - return WaitForSignal(reinterpret_cast(hw_event)->signal_, active_wait); + bool active_wait = !((policy == amd::SyncPolicy::Blocking) & kHipEventBlockingSync) && + ActiveWait(); + bool yield = (policy == amd::SyncPolicy::Yield); + return WaitForSignal(reinterpret_cast(hw_event)->signal_, active_wait, yield); } auto signal = reinterpret_cast(hw_event)->signal_; diff --git a/rocclr/device/rocm/rocdevice.hpp b/rocclr/device/rocm/rocdevice.hpp index 0c7ce24805..1b06d0bc3a 100644 --- a/rocclr/device/rocm/rocdevice.hpp +++ b/rocclr/device/rocm/rocdevice.hpp @@ -291,7 +291,8 @@ class NullDevice : public amd::Device { } bool IsHwEventReady(const amd::Event& event, bool wait = false, - uint32_t hip_event_flags = 0) const override { + amd::SyncPolicy policy = amd::SyncPolicy::Auto) + const override { return false; } @@ -447,7 +448,8 @@ class Device : public NullDevice { cl_set_device_clock_mode_output_amd* pSetClockModeOutput); virtual bool IsHwEventReady(const amd::Event& event, bool wait = false, - uint32_t hip_event_flags = 0) const; + amd::SyncPolicy policy = amd::SyncPolicy::Auto) + const; virtual void getHwEventTime(const amd::Event& event, uint64_t* start, uint64_t* end) const; virtual void ReleaseGlobalSignal(void* signal) const; virtual bool CreateUserEvent(amd::UserEvent* event) const; diff --git a/rocclr/device/rocm/rocvirtual.hpp b/rocclr/device/rocm/rocvirtual.hpp index 1c54f653c3..5d985f224d 100644 --- a/rocclr/device/rocm/rocvirtual.hpp +++ b/rocclr/device/rocm/rocvirtual.hpp @@ -32,6 +32,7 @@ #include "hsa/hsa_ven_amd_aqlprofile.h" #include "rocsched.hpp" #include "device/device.hpp" +#include "os/os.hpp" #include namespace amd::roc { @@ -49,7 +50,7 @@ constexpr static uint64_t kUnlimitedWait = std::numeric_limits::max(); constexpr static uint64_t kTimeout4Secs = 4 * M; -inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) { +inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yield = false) { hsa_wait_state_t wait_state = HSA_WAIT_STATE_BLOCKED; if (active_wait) { @@ -81,6 +82,9 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) { "(0x%lx) for %d ns", signal.handle, kTimeout4Secs); return true; } + if (yield && wait_state == HSA_WAIT_STATE_ACTIVE) { + amd::Os::yield(); + } } } diff --git a/rocclr/platform/commandqueue.cpp b/rocclr/platform/commandqueue.cpp index 5fa3372213..26e69fe5fa 100644 --- a/rocclr/platform/commandqueue.cpp +++ b/rocclr/platform/commandqueue.cpp @@ -41,7 +41,8 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti lastEnqueueCommand_(nullptr), head_(nullptr), tail_(nullptr), - isActive_(false) { + isActive_(false), + sync_policy_(amd::SyncPolicy::Auto) { if (GPU_FORCE_QUEUE_PROFILING) { properties().set(CL_QUEUE_PROFILING_ENABLE); } @@ -198,9 +199,10 @@ void HostQueue::finish(bool cpu_wait) { } command->enqueue(); } + // Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status static constexpr bool kWaitCompletion = true; - if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion)) { + if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion, GetSyncPolicy())) { ClPrint(LOG_DEBUG, LOG_CMD, "No HW event or batch size is less than %zu, " "await command completion", diff --git a/rocclr/platform/commandqueue.hpp b/rocclr/platform/commandqueue.hpp index f46c59a7f3..5ce5728a20 100644 --- a/rocclr/platform/commandqueue.hpp +++ b/rocclr/platform/commandqueue.hpp @@ -307,6 +307,13 @@ class HostQueue : public CommandQueue { return thread_.vdev()->getQueueID(); } + //! Returns Synchronization Policy for the current stream + amd::SyncPolicy GetSyncPolicy() const { return sync_policy_; } + //! Set Synchronization Policy used by Queue + void SetSyncPolicy(amd::SyncPolicy value) { + sync_policy_ = value; + } + private: Command* head_; //!< Head of the batch list Command* tail_; //!< Tail of the batch list @@ -315,6 +322,8 @@ private: //! True if this command queue is active bool isActive_; bool forceDestroy_ = false; //!< Destroy the queue in the current state + + amd::SyncPolicy sync_policy_; //!< Used for controlling stream synchronization }; class DeviceQueue : public CommandQueue {