SWDEV-545952 - API definitions for hipStreamSet/GetAttribute (#831)

Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Этот коммит содержится в:
Manocha, Rahul
2025-08-15 12:51:35 -07:00
коммит произвёл GitHub
родитель a5be0f5346
Коммит 0f49c4a97f
16 изменённых файлов: 200 добавлений и 20 удалений
+2
Просмотреть файл
@@ -14,6 +14,8 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
- `hipMemsetD2D16Async` Used for setting 2D memory range with specified 16-bit values asynchronously
- `hipMemsetD2D32` Used for setting 2D memory range with specified 32-bit values
- `hipMemsetD2D32Async` Used for setting 2D memory range with specified 32-bit values asynchronously
- `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream
- `hipStreamGetAttribute` returns attributes such as priority for a given stream
## HIP 7.0 for ROCm 7.0
+7
Просмотреть файл
@@ -1059,6 +1059,11 @@ typedef hipError_t (*t_hipMemsetD2D32)(hipDeviceptr_t dst, size_t dstPitch, unsi
size_t width, size_t height);
typedef hipError_t (*t_hipMemsetD2D32Async)(hipDeviceptr_t dst, size_t dstPitch, unsigned int value,
size_t width, size_t height, hipStream_t stream);
typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrID attr,
const hipStreamAttrValue *value);
typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value_out);
// HIP Compiler dispatch table
struct HipCompilerDispatchTable {
// HIP_COMPILER_API_TABLE_STEP_VERSION == 0
@@ -1608,6 +1613,8 @@ struct HipDispatchTable {
t_hipMemsetD2D16Async hipMemsetD2D16Async_fn;
t_hipMemsetD2D32 hipMemsetD2D32_fn;
t_hipMemsetD2D32Async hipMemsetD2D32Async_fn;
t_hipStreamGetAttribute hipStreamGetAttribute_fn;
t_hipStreamSetAttribute hipStreamSetAttribute_fn;
// HIP_RUNTIME_API_TABLE_STEP_VERSION = 14
// removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams
+49 -1
Просмотреть файл
@@ -445,7 +445,9 @@ enum hip_api_id_t {
HIP_API_ID_hipMemsetD2D32Async = 425,
HIP_API_ID_hipMemsetD2D8 = 426,
HIP_API_ID_hipMemsetD2D8Async = 427,
HIP_API_ID_LAST = 427,
HIP_API_ID_hipStreamGetAttribute = 428,
HIP_API_ID_hipStreamSetAttribute = 429,
HIP_API_ID_LAST = 429,
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
@@ -860,6 +862,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority";
case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy";
case HIP_API_ID_hipStreamEndCapture: return "hipStreamEndCapture";
case HIP_API_ID_hipStreamGetAttribute: return "hipStreamGetAttribute";
case HIP_API_ID_hipStreamGetCaptureInfo: return "hipStreamGetCaptureInfo";
case HIP_API_ID_hipStreamGetCaptureInfo_v2: return "hipStreamGetCaptureInfo_v2";
case HIP_API_ID_hipStreamGetDevice: return "hipStreamGetDevice";
@@ -867,6 +870,7 @@ static inline const char* hip_api_name(const uint32_t id) {
case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority";
case HIP_API_ID_hipStreamIsCapturing: return "hipStreamIsCapturing";
case HIP_API_ID_hipStreamQuery: return "hipStreamQuery";
case HIP_API_ID_hipStreamSetAttribute: return "hipStreamSetAttribute";
case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize";
case HIP_API_ID_hipStreamUpdateCaptureDependencies: return "hipStreamUpdateCaptureDependencies";
case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent";
@@ -1282,6 +1286,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority;
if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy;
if (strcmp("hipStreamEndCapture", name) == 0) return HIP_API_ID_hipStreamEndCapture;
if (strcmp("hipStreamGetAttribute", name) == 0) return HIP_API_ID_hipStreamGetAttribute;
if (strcmp("hipStreamGetCaptureInfo", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo;
if (strcmp("hipStreamGetCaptureInfo_v2", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo_v2;
if (strcmp("hipStreamGetDevice", name) == 0) return HIP_API_ID_hipStreamGetDevice;
@@ -1289,6 +1294,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority;
if (strcmp("hipStreamIsCapturing", name) == 0) return HIP_API_ID_hipStreamIsCapturing;
if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery;
if (strcmp("hipStreamSetAttribute", name) == 0) return HIP_API_ID_hipStreamSetAttribute;
if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize;
if (strcmp("hipStreamUpdateCaptureDependencies", name) == 0) return HIP_API_ID_hipStreamUpdateCaptureDependencies;
if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent;
@@ -3591,6 +3597,12 @@ typedef struct hip_api_data_s {
hipGraph_t* pGraph;
hipGraph_t pGraph__val;
} hipStreamEndCapture;
struct {
hipStream_t stream;
hipLaunchAttributeID attr;
const hipLaunchAttributeValue* value_out;
hipLaunchAttributeValue value_out__val;
} hipStreamGetAttribute;
struct {
hipStream_t stream;
hipStreamCaptureStatus* pCaptureStatus;
@@ -3634,6 +3646,12 @@ typedef struct hip_api_data_s {
struct {
hipStream_t stream;
} hipStreamQuery;
struct {
hipStream_t stream;
hipLaunchAttributeID attr;
const hipLaunchAttributeValue* value;
hipLaunchAttributeValue value__val;
} hipStreamSetAttribute;
struct {
hipStream_t stream;
} hipStreamSynchronize;
@@ -6138,6 +6156,9 @@ typedef struct hip_api_data_s {
cb_data.args.hipStreamEndCapture.stream = (hipStream_t)stream; \
cb_data.args.hipStreamEndCapture.pGraph = (hipGraph_t*)pGraph; \
};
// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')]
#define INIT_hipStreamGetAttribute_CB_ARGS_DATA(cb_data) { \
};
// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')]
#define INIT_hipStreamGetCaptureInfo_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipStreamGetCaptureInfo.stream = (hipStream_t)stream; \
@@ -6177,6 +6198,9 @@ typedef struct hip_api_data_s {
#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \
};
// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')]
#define INIT_hipStreamSetAttribute_CB_ARGS_DATA(cb_data) { \
};
// hipStreamSynchronize[('hipStream_t', 'stream')]
#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \
cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \
@@ -7889,6 +7913,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
case HIP_API_ID_hipStreamEndCapture:
if (data->args.hipStreamEndCapture.pGraph) data->args.hipStreamEndCapture.pGraph__val = *(data->args.hipStreamEndCapture.pGraph);
break;
// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')]
case HIP_API_ID_hipStreamGetAttribute:
if (data->args.hipStreamGetAttribute.value_out) data->args.hipStreamGetAttribute.value_out__val = *(data->args.hipStreamGetAttribute.value_out);
break;
// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')]
case HIP_API_ID_hipStreamGetCaptureInfo:
if (data->args.hipStreamGetCaptureInfo.pCaptureStatus) data->args.hipStreamGetCaptureInfo.pCaptureStatus__val = *(data->args.hipStreamGetCaptureInfo.pCaptureStatus);
@@ -7921,6 +7949,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
// hipStreamQuery[('hipStream_t', 'stream')]
case HIP_API_ID_hipStreamQuery:
break;
// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')]
case HIP_API_ID_hipStreamSetAttribute:
if (data->args.hipStreamSetAttribute.value) data->args.hipStreamSetAttribute.value__val = *(data->args.hipStreamSetAttribute.value);
break;
// hipStreamSynchronize[('hipStream_t', 'stream')]
case HIP_API_ID_hipStreamSynchronize:
break;
@@ -11094,6 +11126,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.pGraph__val); }
oss << ")";
break;
case HIP_API_ID_hipStreamGetAttribute:
oss << "hipStreamGetAttribute(";
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.stream);
oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.attr);
if (data->args.hipStreamGetAttribute.value_out == NULL) oss << ", value_out=NULL";
else { oss << ", value_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.value_out__val); }
oss << ")";
break;
case HIP_API_ID_hipStreamGetCaptureInfo:
oss << "hipStreamGetCaptureInfo(";
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.stream);
@@ -11151,6 +11191,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamQuery.stream);
oss << ")";
break;
case HIP_API_ID_hipStreamSetAttribute:
oss << "hipStreamSetAttribute(";
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.stream);
oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.attr);
if (data->args.hipStreamSetAttribute.value == NULL) oss << ", value=NULL";
else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.value__val); }
oss << ")";
break;
case HIP_API_ID_hipStreamSynchronize:
oss << "hipStreamSynchronize(";
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSynchronize.stream);
+2
Просмотреть файл
@@ -499,3 +499,5 @@ hipMemsetD2D16
hipMemsetD2D16Async
hipMemsetD2D32
hipMemsetD2D32Async
hipStreamGetAttribute
hipStreamSetAttribute
+10 -4
Просмотреть файл
@@ -828,7 +828,7 @@ hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, unsigne
hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args);
hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params,
void** extra);
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
hipMemRangeHandleType handleType,
unsigned long long flags);
hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width,
@@ -843,7 +843,10 @@ hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int valu
size_t height);
hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value,
size_t width, size_t height, hipStream_t stream);
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value);
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
const hipStreamAttrValue *value);
} // namespace hip
namespace hip {
@@ -1366,6 +1369,8 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
ptrDispatchTable->hipMemsetD2D16Async_fn = hip::hipMemsetD2D16Async;
ptrDispatchTable->hipMemsetD2D32_fn = hip::hipMemsetD2D32;
ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async;
ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute;
ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute;
}
#if HIP_ROCPROFILER_REGISTER > 0
@@ -2017,14 +2022,15 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16_fn, 480);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16Async_fn, 481);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32_fn, 482);
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483);
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484);
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485);
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
//
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
//
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 484)
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 486)
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14,
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
+6 -2
Просмотреть файл
@@ -76,7 +76,9 @@ hipError_t Event::synchronize() {
auto hip_device = g_devices[deviceId()];
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
static constexpr bool kWaitCompletion = true;
if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, flags_)) {
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
amd::SyncPolicy::Auto;
if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, policy)) {
event_->awaitCompletion();
}
return hipSuccess;
@@ -88,7 +90,9 @@ bool Event::awaitEventCompletion() {
}
bool EventDD::awaitEventCompletion() {
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, flags_);
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
amd::SyncPolicy::Auto;
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, policy);
}
hipError_t Event::elapsedTime(Event& eStop, float& ms) {
+3 -1
Просмотреть файл
@@ -98,7 +98,9 @@ class Event {
std::vector<hip::GraphNode*> nodesPrevToRecorded_;
protected:
bool CheckHwEvent() {
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, flags_);
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
amd::SyncPolicy::Auto;
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, policy);
}
public:
+2
Просмотреть файл
@@ -615,6 +615,8 @@ global:
hipMemsetD2D16Async;
hipMemsetD2D32;
hipMemsetD2D32Async;
hipStreamGetAttribute;
hipStreamSetAttribute;
local:
*;
} hip_6.5;
+72
Просмотреть файл
@@ -820,4 +820,76 @@ hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) {
HIP_RETURN(hipSuccess);
}
// ================================================================================================
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
const hipStreamAttrValue *value) {
HIP_INIT_API(hipStreamSetAttribute, stream, attr, value);
hipError_t status = hipSuccess;
if (value == nullptr) {
HIP_RETURN(hipErrorInvalidValue);
}
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorInvalidResourceHandle);
}
getStreamPerThread(stream);
// if stream is capturing, don't allow changing stream attributes
if (hip::Stream::StreamCaptureOngoing(stream) == true) {
HIP_RETURN(hipErrorStreamCaptureUnsupported);
}
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
switch (attr) {
case hipStreamAttributeSynchronizationPolicy: {
hipSynchronizationPolicy syncPolicy = value->syncPolicy;
// validate sync policy
if (syncPolicy < hipSyncPolicyAuto || syncPolicy > hipSyncPolicyBlockingSync) {
HIP_RETURN(hipErrorInvalidValue);
}
s->SetSyncPolicy(static_cast<amd::SyncPolicy>(syncPolicy));
break;
}
default: {
HIP_RETURN(hipErrorInvalidValue);
}
}
HIP_RETURN(hipSuccess);
}
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value_out) {
HIP_INIT_API(hipStreamGetAttribute, stream, attr, value_out);
if (value_out == nullptr) {
return hipErrorInvalidValue;
}
if (!hip::isValid(stream)) {
HIP_RETURN(hipErrorInvalidResourceHandle);
}
getStreamPerThread(stream);
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
switch(attr) {
case hipStreamAttributeSynchronizationPolicy: {
value_out->syncPolicy = static_cast<hipSynchronizationPolicy>(s->GetSyncPolicy());
break;
}
case hipStreamAttributePriority: {
value_out->priority = s->GetPriority();
break;
}
default: {
HIP_RETURN(hipErrorInvalidValue);
}
}
HIP_RETURN(hipSuccess);
}
} // hip namespace
+9 -1
Просмотреть файл
@@ -1883,7 +1883,7 @@ hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f
return hip::GetHipDispatchTable()->hipDrvLaunchKernelEx_fn(config, f, kernel, extra);
}
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
hipMemRangeHandleType handleType,
unsigned long long flags) {
return hip::GetHipDispatchTable()->hipMemGetHandleForAddressRange_fn(handle, dptr, size,
@@ -1915,4 +1915,12 @@ hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int
size_t height, hipStream_t stream) {
return hip::GetHipDispatchTable()->hipMemsetD2D32Async_fn(dst, dstPitch, value, width, height,
stream);
}
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
const hipStreamAttrValue *value) {
return hip::GetHipDispatchTable()->hipStreamSetAttribute_fn(stream, attr, value);
}
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
hipStreamAttrValue *value) {
return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value);
}
+11 -3
Просмотреть файл
@@ -1383,6 +1383,14 @@ class VirtualDevice : public amd::HeapObject {
namespace amd {
/*! IHIP IPC MEMORY Structure */
#define AMD_IPC_MEM_HANDLE_SIZE 32
typedef enum SyncPolicy {
Auto = 1,
Spin = 2,
Yield = 3,
Blocking = 4
} SyncPolicy;
//! MemoryObject map lookup class
class MemObjMap : public AllStatic {
public:
@@ -2050,9 +2058,9 @@ class Device : public RuntimeObject {
// Returns the status of HW event, associated with amd::Event
virtual bool IsHwEventReady(
const amd::Event& event, //!< AMD event for HW status validation
bool wait = false, //!< If true then forces the event completion
uint32_t hip_event_flags = 0 //!< flags associated with the event. 0 = hipEventDefault
const amd::Event& event, //!< AMD event for HW status validation
bool wait = false, //!< If true then forces the event completion
amd::SyncPolicy policy = amd::SyncPolicy::Auto
) const {
return false;
};
+5 -3
Просмотреть файл
@@ -2840,7 +2840,7 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
}
// ================================================================================================
bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_event_flags) const {
bool Device::IsHwEventReady(const amd::Event& event, bool wait, amd::SyncPolicy policy) const {
void* hw_event =
(event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
if (hw_event == nullptr) {
@@ -2851,8 +2851,10 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
// when set the CPU gives up host thread for other work
// when not set the CPU enters a busy-wait on the event to occur
constexpr int kHipEventBlockingSync = 0x1;
bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
bool active_wait = !((policy == amd::SyncPolicy::Blocking) & kHipEventBlockingSync) &&
ActiveWait();
bool yield = (policy == amd::SyncPolicy::Yield);
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait, yield);
}
auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
+4 -2
Просмотреть файл
@@ -291,7 +291,8 @@ class NullDevice : public amd::Device {
}
bool IsHwEventReady(const amd::Event& event, bool wait = false,
uint32_t hip_event_flags = 0) const override {
amd::SyncPolicy policy = amd::SyncPolicy::Auto)
const override {
return false;
}
@@ -447,7 +448,8 @@ class Device : public NullDevice {
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false,
uint32_t hip_event_flags = 0) const;
amd::SyncPolicy policy = amd::SyncPolicy::Auto)
const;
virtual void getHwEventTime(const amd::Event& event, uint64_t* start, uint64_t* end) const;
virtual void ReleaseGlobalSignal(void* signal) const;
virtual bool CreateUserEvent(amd::UserEvent* event) const;
+5 -1
Просмотреть файл
@@ -32,6 +32,7 @@
#include "hsa/hsa_ven_amd_aqlprofile.h"
#include "rocsched.hpp"
#include "device/device.hpp"
#include "os/os.hpp"
#include <stack>
namespace amd::roc {
@@ -49,7 +50,7 @@ constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
constexpr static uint64_t kTimeout4Secs = 4 * M;
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) {
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yield = false) {
hsa_wait_state_t wait_state = HSA_WAIT_STATE_BLOCKED;
if (active_wait) {
@@ -81,6 +82,9 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) {
"(0x%lx) for %d ns", signal.handle, kTimeout4Secs);
return true;
}
if (yield && wait_state == HSA_WAIT_STATE_ACTIVE) {
amd::Os::yield();
}
}
}
+4 -2
Просмотреть файл
@@ -41,7 +41,8 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti
lastEnqueueCommand_(nullptr),
head_(nullptr),
tail_(nullptr),
isActive_(false) {
isActive_(false),
sync_policy_(amd::SyncPolicy::Auto) {
if (GPU_FORCE_QUEUE_PROFILING) {
properties().set(CL_QUEUE_PROFILING_ENABLE);
}
@@ -198,9 +199,10 @@ void HostQueue::finish(bool cpu_wait) {
}
command->enqueue();
}
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
static constexpr bool kWaitCompletion = true;
if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion)) {
if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion, GetSyncPolicy())) {
ClPrint(LOG_DEBUG, LOG_CMD,
"No HW event or batch size is less than %zu, "
"await command completion",
+9
Просмотреть файл
@@ -307,6 +307,13 @@ class HostQueue : public CommandQueue {
return thread_.vdev()->getQueueID();
}
//! Returns Synchronization Policy for the current stream
amd::SyncPolicy GetSyncPolicy() const { return sync_policy_; }
//! Set Synchronization Policy used by Queue
void SetSyncPolicy(amd::SyncPolicy value) {
sync_policy_ = value;
}
private:
Command* head_; //!< Head of the batch list
Command* tail_; //!< Tail of the batch list
@@ -315,6 +322,8 @@ private:
//! True if this command queue is active
bool isActive_;
bool forceDestroy_ = false; //!< Destroy the queue in the current state
amd::SyncPolicy sync_policy_; //!< Used for controlling stream synchronization
};
class DeviceQueue : public CommandQueue {