SWDEV-545952 - API definitions for hipStreamSet/GetAttribute (#831)
Co-authored-by: Rahul Manocha <rmanocha@amd.com>
Этот коммит содержится в:
коммит произвёл
GitHub
родитель
a5be0f5346
Коммит
0f49c4a97f
@@ -14,6 +14,8 @@ Full documentation for HIP is available at [rocm.docs.amd.com](https://rocm.docs
|
||||
- `hipMemsetD2D16Async` Used for setting 2D memory range with specified 16-bit values asynchronously
|
||||
- `hipMemsetD2D32` Used for setting 2D memory range with specified 32-bit values
|
||||
- `hipMemsetD2D32Async` Used for setting 2D memory range with specified 32-bit values asynchronously
|
||||
- `hipStreamSetAttribute` sets attributes such as synchronization policy for a given stream
|
||||
- `hipStreamGetAttribute` returns attributes such as priority for a given stream
|
||||
|
||||
## HIP 7.0 for ROCm 7.0
|
||||
|
||||
|
||||
@@ -1059,6 +1059,11 @@ typedef hipError_t (*t_hipMemsetD2D32)(hipDeviceptr_t dst, size_t dstPitch, unsi
|
||||
size_t width, size_t height);
|
||||
typedef hipError_t (*t_hipMemsetD2D32Async)(hipDeviceptr_t dst, size_t dstPitch, unsigned int value,
|
||||
size_t width, size_t height, hipStream_t stream);
|
||||
typedef hipError_t (*t_hipStreamSetAttribute)(hipStream_t stream, hipStreamAttrID attr,
|
||||
const hipStreamAttrValue *value);
|
||||
typedef hipError_t (*t_hipStreamGetAttribute)(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value_out);
|
||||
|
||||
// HIP Compiler dispatch table
|
||||
struct HipCompilerDispatchTable {
|
||||
// HIP_COMPILER_API_TABLE_STEP_VERSION == 0
|
||||
@@ -1608,6 +1613,8 @@ struct HipDispatchTable {
|
||||
t_hipMemsetD2D16Async hipMemsetD2D16Async_fn;
|
||||
t_hipMemsetD2D32 hipMemsetD2D32_fn;
|
||||
t_hipMemsetD2D32Async hipMemsetD2D32Async_fn;
|
||||
t_hipStreamGetAttribute hipStreamGetAttribute_fn;
|
||||
t_hipStreamSetAttribute hipStreamSetAttribute_fn;
|
||||
|
||||
// HIP_RUNTIME_API_TABLE_STEP_VERSION = 14
|
||||
// removed HIP_MEMSET_NODE_PARAMS replaced by hipMemsetParams
|
||||
|
||||
@@ -445,7 +445,9 @@ enum hip_api_id_t {
|
||||
HIP_API_ID_hipMemsetD2D32Async = 425,
|
||||
HIP_API_ID_hipMemsetD2D8 = 426,
|
||||
HIP_API_ID_hipMemsetD2D8Async = 427,
|
||||
HIP_API_ID_LAST = 427,
|
||||
HIP_API_ID_hipStreamGetAttribute = 428,
|
||||
HIP_API_ID_hipStreamSetAttribute = 429,
|
||||
HIP_API_ID_LAST = 429,
|
||||
|
||||
HIP_API_ID_hipChooseDevice = HIP_API_ID_CONCAT(HIP_API_ID_,hipChooseDevice),
|
||||
HIP_API_ID_hipGetDeviceProperties = HIP_API_ID_CONCAT(HIP_API_ID_,hipGetDeviceProperties),
|
||||
@@ -860,6 +862,7 @@ static inline const char* hip_api_name(const uint32_t id) {
|
||||
case HIP_API_ID_hipStreamCreateWithPriority: return "hipStreamCreateWithPriority";
|
||||
case HIP_API_ID_hipStreamDestroy: return "hipStreamDestroy";
|
||||
case HIP_API_ID_hipStreamEndCapture: return "hipStreamEndCapture";
|
||||
case HIP_API_ID_hipStreamGetAttribute: return "hipStreamGetAttribute";
|
||||
case HIP_API_ID_hipStreamGetCaptureInfo: return "hipStreamGetCaptureInfo";
|
||||
case HIP_API_ID_hipStreamGetCaptureInfo_v2: return "hipStreamGetCaptureInfo_v2";
|
||||
case HIP_API_ID_hipStreamGetDevice: return "hipStreamGetDevice";
|
||||
@@ -867,6 +870,7 @@ static inline const char* hip_api_name(const uint32_t id) {
|
||||
case HIP_API_ID_hipStreamGetPriority: return "hipStreamGetPriority";
|
||||
case HIP_API_ID_hipStreamIsCapturing: return "hipStreamIsCapturing";
|
||||
case HIP_API_ID_hipStreamQuery: return "hipStreamQuery";
|
||||
case HIP_API_ID_hipStreamSetAttribute: return "hipStreamSetAttribute";
|
||||
case HIP_API_ID_hipStreamSynchronize: return "hipStreamSynchronize";
|
||||
case HIP_API_ID_hipStreamUpdateCaptureDependencies: return "hipStreamUpdateCaptureDependencies";
|
||||
case HIP_API_ID_hipStreamWaitEvent: return "hipStreamWaitEvent";
|
||||
@@ -1282,6 +1286,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
||||
if (strcmp("hipStreamCreateWithPriority", name) == 0) return HIP_API_ID_hipStreamCreateWithPriority;
|
||||
if (strcmp("hipStreamDestroy", name) == 0) return HIP_API_ID_hipStreamDestroy;
|
||||
if (strcmp("hipStreamEndCapture", name) == 0) return HIP_API_ID_hipStreamEndCapture;
|
||||
if (strcmp("hipStreamGetAttribute", name) == 0) return HIP_API_ID_hipStreamGetAttribute;
|
||||
if (strcmp("hipStreamGetCaptureInfo", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo;
|
||||
if (strcmp("hipStreamGetCaptureInfo_v2", name) == 0) return HIP_API_ID_hipStreamGetCaptureInfo_v2;
|
||||
if (strcmp("hipStreamGetDevice", name) == 0) return HIP_API_ID_hipStreamGetDevice;
|
||||
@@ -1289,6 +1294,7 @@ static inline uint32_t hipApiIdByName(const char* name) {
|
||||
if (strcmp("hipStreamGetPriority", name) == 0) return HIP_API_ID_hipStreamGetPriority;
|
||||
if (strcmp("hipStreamIsCapturing", name) == 0) return HIP_API_ID_hipStreamIsCapturing;
|
||||
if (strcmp("hipStreamQuery", name) == 0) return HIP_API_ID_hipStreamQuery;
|
||||
if (strcmp("hipStreamSetAttribute", name) == 0) return HIP_API_ID_hipStreamSetAttribute;
|
||||
if (strcmp("hipStreamSynchronize", name) == 0) return HIP_API_ID_hipStreamSynchronize;
|
||||
if (strcmp("hipStreamUpdateCaptureDependencies", name) == 0) return HIP_API_ID_hipStreamUpdateCaptureDependencies;
|
||||
if (strcmp("hipStreamWaitEvent", name) == 0) return HIP_API_ID_hipStreamWaitEvent;
|
||||
@@ -3591,6 +3597,12 @@ typedef struct hip_api_data_s {
|
||||
hipGraph_t* pGraph;
|
||||
hipGraph_t pGraph__val;
|
||||
} hipStreamEndCapture;
|
||||
struct {
|
||||
hipStream_t stream;
|
||||
hipLaunchAttributeID attr;
|
||||
const hipLaunchAttributeValue* value_out;
|
||||
hipLaunchAttributeValue value_out__val;
|
||||
} hipStreamGetAttribute;
|
||||
struct {
|
||||
hipStream_t stream;
|
||||
hipStreamCaptureStatus* pCaptureStatus;
|
||||
@@ -3634,6 +3646,12 @@ typedef struct hip_api_data_s {
|
||||
struct {
|
||||
hipStream_t stream;
|
||||
} hipStreamQuery;
|
||||
struct {
|
||||
hipStream_t stream;
|
||||
hipLaunchAttributeID attr;
|
||||
const hipLaunchAttributeValue* value;
|
||||
hipLaunchAttributeValue value__val;
|
||||
} hipStreamSetAttribute;
|
||||
struct {
|
||||
hipStream_t stream;
|
||||
} hipStreamSynchronize;
|
||||
@@ -6138,6 +6156,9 @@ typedef struct hip_api_data_s {
|
||||
cb_data.args.hipStreamEndCapture.stream = (hipStream_t)stream; \
|
||||
cb_data.args.hipStreamEndCapture.pGraph = (hipGraph_t*)pGraph; \
|
||||
};
|
||||
// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')]
|
||||
#define INIT_hipStreamGetAttribute_CB_ARGS_DATA(cb_data) { \
|
||||
};
|
||||
// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')]
|
||||
#define INIT_hipStreamGetCaptureInfo_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipStreamGetCaptureInfo.stream = (hipStream_t)stream; \
|
||||
@@ -6177,6 +6198,9 @@ typedef struct hip_api_data_s {
|
||||
#define INIT_hipStreamQuery_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipStreamQuery.stream = (hipStream_t)stream; \
|
||||
};
|
||||
// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')]
|
||||
#define INIT_hipStreamSetAttribute_CB_ARGS_DATA(cb_data) { \
|
||||
};
|
||||
// hipStreamSynchronize[('hipStream_t', 'stream')]
|
||||
#define INIT_hipStreamSynchronize_CB_ARGS_DATA(cb_data) { \
|
||||
cb_data.args.hipStreamSynchronize.stream = (hipStream_t)stream; \
|
||||
@@ -7889,6 +7913,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
case HIP_API_ID_hipStreamEndCapture:
|
||||
if (data->args.hipStreamEndCapture.pGraph) data->args.hipStreamEndCapture.pGraph__val = *(data->args.hipStreamEndCapture.pGraph);
|
||||
break;
|
||||
// hipStreamGetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value_out')]
|
||||
case HIP_API_ID_hipStreamGetAttribute:
|
||||
if (data->args.hipStreamGetAttribute.value_out) data->args.hipStreamGetAttribute.value_out__val = *(data->args.hipStreamGetAttribute.value_out);
|
||||
break;
|
||||
// hipStreamGetCaptureInfo[('hipStream_t', 'stream'), ('hipStreamCaptureStatus*', 'pCaptureStatus'), ('unsigned long long*', 'pId')]
|
||||
case HIP_API_ID_hipStreamGetCaptureInfo:
|
||||
if (data->args.hipStreamGetCaptureInfo.pCaptureStatus) data->args.hipStreamGetCaptureInfo.pCaptureStatus__val = *(data->args.hipStreamGetCaptureInfo.pCaptureStatus);
|
||||
@@ -7921,6 +7949,10 @@ static inline void hipApiArgsInit(hip_api_id_t id, hip_api_data_t* data) {
|
||||
// hipStreamQuery[('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipStreamQuery:
|
||||
break;
|
||||
// hipStreamSetAttribute[('hipStream_t', 'stream'), ('hipLaunchAttributeID', 'attr'), ('const hipLaunchAttributeValue*', 'value')]
|
||||
case HIP_API_ID_hipStreamSetAttribute:
|
||||
if (data->args.hipStreamSetAttribute.value) data->args.hipStreamSetAttribute.value__val = *(data->args.hipStreamSetAttribute.value);
|
||||
break;
|
||||
// hipStreamSynchronize[('hipStream_t', 'stream')]
|
||||
case HIP_API_ID_hipStreamSynchronize:
|
||||
break;
|
||||
@@ -11094,6 +11126,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
else { oss << ", pGraph="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamEndCapture.pGraph__val); }
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipStreamGetAttribute:
|
||||
oss << "hipStreamGetAttribute(";
|
||||
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.stream);
|
||||
oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.attr);
|
||||
if (data->args.hipStreamGetAttribute.value_out == NULL) oss << ", value_out=NULL";
|
||||
else { oss << ", value_out="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetAttribute.value_out__val); }
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipStreamGetCaptureInfo:
|
||||
oss << "hipStreamGetCaptureInfo(";
|
||||
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamGetCaptureInfo.stream);
|
||||
@@ -11151,6 +11191,14 @@ static inline const char* hipApiString(hip_api_id_t id, const hip_api_data_t* da
|
||||
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamQuery.stream);
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipStreamSetAttribute:
|
||||
oss << "hipStreamSetAttribute(";
|
||||
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.stream);
|
||||
oss << ", attr="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.attr);
|
||||
if (data->args.hipStreamSetAttribute.value == NULL) oss << ", value=NULL";
|
||||
else { oss << ", value="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSetAttribute.value__val); }
|
||||
oss << ")";
|
||||
break;
|
||||
case HIP_API_ID_hipStreamSynchronize:
|
||||
oss << "hipStreamSynchronize(";
|
||||
oss << "stream="; roctracer::hip_support::detail::operator<<(oss, data->args.hipStreamSynchronize.stream);
|
||||
|
||||
@@ -499,3 +499,5 @@ hipMemsetD2D16
|
||||
hipMemsetD2D16Async
|
||||
hipMemsetD2D32
|
||||
hipMemsetD2D32Async
|
||||
hipStreamGetAttribute
|
||||
hipStreamSetAttribute
|
||||
|
||||
@@ -828,7 +828,7 @@ hipError_t hipEventRecordWithFlags(hipEvent_t event, hipStream_t stream, unsigne
|
||||
hipError_t hipLaunchKernelExC(const hipLaunchConfig_t* config, const void* fPtr, void** args);
|
||||
hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f, void** params,
|
||||
void** extra);
|
||||
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
|
||||
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
|
||||
hipMemRangeHandleType handleType,
|
||||
unsigned long long flags);
|
||||
hipError_t hipMemsetD2D8(hipDeviceptr_t dst, size_t dstPitch, unsigned char value, size_t width,
|
||||
@@ -843,7 +843,10 @@ hipError_t hipMemsetD2D32(hipDeviceptr_t dst, size_t dstPitch, unsigned int valu
|
||||
size_t height);
|
||||
hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int value,
|
||||
size_t width, size_t height, hipStream_t stream);
|
||||
|
||||
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value);
|
||||
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
const hipStreamAttrValue *value);
|
||||
} // namespace hip
|
||||
|
||||
namespace hip {
|
||||
@@ -1366,6 +1369,8 @@ void UpdateDispatchTable(HipDispatchTable* ptrDispatchTable) {
|
||||
ptrDispatchTable->hipMemsetD2D16Async_fn = hip::hipMemsetD2D16Async;
|
||||
ptrDispatchTable->hipMemsetD2D32_fn = hip::hipMemsetD2D32;
|
||||
ptrDispatchTable->hipMemsetD2D32Async_fn = hip::hipMemsetD2D32Async;
|
||||
ptrDispatchTable->hipStreamGetAttribute_fn = hip::hipStreamGetAttribute;
|
||||
ptrDispatchTable->hipStreamSetAttribute_fn = hip::hipStreamSetAttribute;
|
||||
}
|
||||
|
||||
#if HIP_ROCPROFILER_REGISTER > 0
|
||||
@@ -2017,14 +2022,15 @@ HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16_fn, 480);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D16Async_fn, 481);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32_fn, 482);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipMemsetD2D32Async_fn, 483);
|
||||
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamGetAttribute_fn, 484);
|
||||
HIP_ENFORCE_ABI(HipDispatchTable, hipStreamSetAttribute_fn, 485);
|
||||
// if HIP_ENFORCE_ABI entries are added for each new function pointer in the table, the number below
|
||||
// will be +1 of the number in the last HIP_ENFORCE_ABI line. E.g.:
|
||||
//
|
||||
// HIP_ENFORCE_ABI(<table>, <functor>, 8)
|
||||
//
|
||||
// HIP_ENFORCE_ABI_VERSIONING(<table>, 9) <- 8 + 1 = 9
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 484)
|
||||
HIP_ENFORCE_ABI_VERSIONING(HipDispatchTable, 486)
|
||||
|
||||
static_assert(HIP_RUNTIME_API_TABLE_MAJOR_VERSION == 0 && HIP_RUNTIME_API_TABLE_STEP_VERSION == 14,
|
||||
"If you get this error, add new HIP_ENFORCE_ABI(...) code for the new function "
|
||||
|
||||
@@ -76,7 +76,9 @@ hipError_t Event::synchronize() {
|
||||
auto hip_device = g_devices[deviceId()];
|
||||
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
|
||||
static constexpr bool kWaitCompletion = true;
|
||||
if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, flags_)) {
|
||||
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
|
||||
amd::SyncPolicy::Auto;
|
||||
if (!hip_device->devices()[0]->IsHwEventReady(*event_, kWaitCompletion, policy)) {
|
||||
event_->awaitCompletion();
|
||||
}
|
||||
return hipSuccess;
|
||||
@@ -88,7 +90,9 @@ bool Event::awaitEventCompletion() {
|
||||
}
|
||||
|
||||
bool EventDD::awaitEventCompletion() {
|
||||
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, flags_);
|
||||
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
|
||||
amd::SyncPolicy::Auto;
|
||||
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, true, policy);
|
||||
}
|
||||
|
||||
hipError_t Event::elapsedTime(Event& eStop, float& ms) {
|
||||
|
||||
@@ -98,7 +98,9 @@ class Event {
|
||||
std::vector<hip::GraphNode*> nodesPrevToRecorded_;
|
||||
protected:
|
||||
bool CheckHwEvent() {
|
||||
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, flags_);
|
||||
amd::SyncPolicy policy = (flags_ == hipEventBlockingSync) ? amd::SyncPolicy::Blocking :
|
||||
amd::SyncPolicy::Auto;
|
||||
return g_devices[deviceId()]->devices()[0]->IsHwEventReady(*event_, false, policy);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
@@ -615,6 +615,8 @@ global:
|
||||
hipMemsetD2D16Async;
|
||||
hipMemsetD2D32;
|
||||
hipMemsetD2D32Async;
|
||||
hipStreamGetAttribute;
|
||||
hipStreamSetAttribute;
|
||||
local:
|
||||
*;
|
||||
} hip_6.5;
|
||||
@@ -820,4 +820,76 @@ hipError_t hipStreamGetDevice(hipStream_t stream, hipDevice_t* device) {
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
// ================================================================================================
|
||||
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
const hipStreamAttrValue *value) {
|
||||
HIP_INIT_API(hipStreamSetAttribute, stream, attr, value);
|
||||
hipError_t status = hipSuccess;
|
||||
if (value == nullptr) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
if (!hip::isValid(stream)) {
|
||||
HIP_RETURN(hipErrorInvalidResourceHandle);
|
||||
}
|
||||
|
||||
getStreamPerThread(stream);
|
||||
|
||||
// if stream is capturing, don't allow changing stream attributes
|
||||
if (hip::Stream::StreamCaptureOngoing(stream) == true) {
|
||||
HIP_RETURN(hipErrorStreamCaptureUnsupported);
|
||||
}
|
||||
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
|
||||
switch (attr) {
|
||||
case hipStreamAttributeSynchronizationPolicy: {
|
||||
hipSynchronizationPolicy syncPolicy = value->syncPolicy;
|
||||
// validate sync policy
|
||||
if (syncPolicy < hipSyncPolicyAuto || syncPolicy > hipSyncPolicyBlockingSync) {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
s->SetSyncPolicy(static_cast<amd::SyncPolicy>(syncPolicy));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
|
||||
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value_out) {
|
||||
HIP_INIT_API(hipStreamGetAttribute, stream, attr, value_out);
|
||||
|
||||
if (value_out == nullptr) {
|
||||
return hipErrorInvalidValue;
|
||||
}
|
||||
|
||||
if (!hip::isValid(stream)) {
|
||||
HIP_RETURN(hipErrorInvalidResourceHandle);
|
||||
}
|
||||
|
||||
getStreamPerThread(stream);
|
||||
|
||||
hip::Stream* s = reinterpret_cast<hip::Stream*>(stream);
|
||||
|
||||
switch(attr) {
|
||||
case hipStreamAttributeSynchronizationPolicy: {
|
||||
value_out->syncPolicy = static_cast<hipSynchronizationPolicy>(s->GetSyncPolicy());
|
||||
break;
|
||||
}
|
||||
case hipStreamAttributePriority: {
|
||||
value_out->priority = s->GetPriority();
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
HIP_RETURN(hipErrorInvalidValue);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_RETURN(hipSuccess);
|
||||
}
|
||||
} // hip namespace
|
||||
|
||||
@@ -1883,7 +1883,7 @@ hipError_t hipDrvLaunchKernelEx(const HIP_LAUNCH_CONFIG* config, hipFunction_t f
|
||||
return hip::GetHipDispatchTable()->hipDrvLaunchKernelEx_fn(config, f, kernel, extra);
|
||||
}
|
||||
|
||||
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
|
||||
hipError_t hipMemGetHandleForAddressRange(void* handle, hipDeviceptr_t dptr, size_t size,
|
||||
hipMemRangeHandleType handleType,
|
||||
unsigned long long flags) {
|
||||
return hip::GetHipDispatchTable()->hipMemGetHandleForAddressRange_fn(handle, dptr, size,
|
||||
@@ -1915,4 +1915,12 @@ hipError_t hipMemsetD2D32Async(hipDeviceptr_t dst, size_t dstPitch, unsigned int
|
||||
size_t height, hipStream_t stream) {
|
||||
return hip::GetHipDispatchTable()->hipMemsetD2D32Async_fn(dst, dstPitch, value, width, height,
|
||||
stream);
|
||||
}
|
||||
hipError_t hipStreamSetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
const hipStreamAttrValue *value) {
|
||||
return hip::GetHipDispatchTable()->hipStreamSetAttribute_fn(stream, attr, value);
|
||||
}
|
||||
hipError_t hipStreamGetAttribute(hipStream_t stream, hipStreamAttrID attr,
|
||||
hipStreamAttrValue *value) {
|
||||
return hip::GetHipDispatchTable()->hipStreamGetAttribute_fn(stream, attr, value);
|
||||
}
|
||||
@@ -1383,6 +1383,14 @@ class VirtualDevice : public amd::HeapObject {
|
||||
namespace amd {
|
||||
/*! IHIP IPC MEMORY Structure */
|
||||
#define AMD_IPC_MEM_HANDLE_SIZE 32
|
||||
|
||||
typedef enum SyncPolicy {
|
||||
Auto = 1,
|
||||
Spin = 2,
|
||||
Yield = 3,
|
||||
Blocking = 4
|
||||
} SyncPolicy;
|
||||
|
||||
//! MemoryObject map lookup class
|
||||
class MemObjMap : public AllStatic {
|
||||
public:
|
||||
@@ -2050,9 +2058,9 @@ class Device : public RuntimeObject {
|
||||
|
||||
// Returns the status of HW event, associated with amd::Event
|
||||
virtual bool IsHwEventReady(
|
||||
const amd::Event& event, //!< AMD event for HW status validation
|
||||
bool wait = false, //!< If true then forces the event completion
|
||||
uint32_t hip_event_flags = 0 //!< flags associated with the event. 0 = hipEventDefault
|
||||
const amd::Event& event, //!< AMD event for HW status validation
|
||||
bool wait = false, //!< If true then forces the event completion
|
||||
amd::SyncPolicy policy = amd::SyncPolicy::Auto
|
||||
) const {
|
||||
return false;
|
||||
};
|
||||
|
||||
@@ -2840,7 +2840,7 @@ bool Device::SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeI
|
||||
}
|
||||
|
||||
// ================================================================================================
|
||||
bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_event_flags) const {
|
||||
bool Device::IsHwEventReady(const amd::Event& event, bool wait, amd::SyncPolicy policy) const {
|
||||
void* hw_event =
|
||||
(event.NotifyEvent() != nullptr) ? event.NotifyEvent()->HwEvent() : event.HwEvent();
|
||||
if (hw_event == nullptr) {
|
||||
@@ -2851,8 +2851,10 @@ bool Device::IsHwEventReady(const amd::Event& event, bool wait, uint32_t hip_eve
|
||||
// when set the CPU gives up host thread for other work
|
||||
// when not set the CPU enters a busy-wait on the event to occur
|
||||
constexpr int kHipEventBlockingSync = 0x1;
|
||||
bool active_wait = !(hip_event_flags & kHipEventBlockingSync) && ActiveWait();
|
||||
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait);
|
||||
bool active_wait = !((policy == amd::SyncPolicy::Blocking) & kHipEventBlockingSync) &&
|
||||
ActiveWait();
|
||||
bool yield = (policy == amd::SyncPolicy::Yield);
|
||||
return WaitForSignal(reinterpret_cast<ProfilingSignal*>(hw_event)->signal_, active_wait, yield);
|
||||
}
|
||||
|
||||
auto signal = reinterpret_cast<ProfilingSignal*>(hw_event)->signal_;
|
||||
|
||||
@@ -291,7 +291,8 @@ class NullDevice : public amd::Device {
|
||||
}
|
||||
|
||||
bool IsHwEventReady(const amd::Event& event, bool wait = false,
|
||||
uint32_t hip_event_flags = 0) const override {
|
||||
amd::SyncPolicy policy = amd::SyncPolicy::Auto)
|
||||
const override {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -447,7 +448,8 @@ class Device : public NullDevice {
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput);
|
||||
|
||||
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false,
|
||||
uint32_t hip_event_flags = 0) const;
|
||||
amd::SyncPolicy policy = amd::SyncPolicy::Auto)
|
||||
const;
|
||||
virtual void getHwEventTime(const amd::Event& event, uint64_t* start, uint64_t* end) const;
|
||||
virtual void ReleaseGlobalSignal(void* signal) const;
|
||||
virtual bool CreateUserEvent(amd::UserEvent* event) const;
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include "hsa/hsa_ven_amd_aqlprofile.h"
|
||||
#include "rocsched.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "os/os.hpp"
|
||||
#include <stack>
|
||||
|
||||
namespace amd::roc {
|
||||
@@ -49,7 +50,7 @@ constexpr static uint64_t kUnlimitedWait = std::numeric_limits<uint64_t>::max();
|
||||
|
||||
constexpr static uint64_t kTimeout4Secs = 4 * M;
|
||||
|
||||
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) {
|
||||
inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false, bool yield = false) {
|
||||
|
||||
hsa_wait_state_t wait_state = HSA_WAIT_STATE_BLOCKED;
|
||||
if (active_wait) {
|
||||
@@ -81,6 +82,9 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) {
|
||||
"(0x%lx) for %d ns", signal.handle, kTimeout4Secs);
|
||||
return true;
|
||||
}
|
||||
if (yield && wait_state == HSA_WAIT_STATE_ACTIVE) {
|
||||
amd::Os::yield();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -41,7 +41,8 @@ HostQueue::HostQueue(Context& context, Device& device, cl_command_queue_properti
|
||||
lastEnqueueCommand_(nullptr),
|
||||
head_(nullptr),
|
||||
tail_(nullptr),
|
||||
isActive_(false) {
|
||||
isActive_(false),
|
||||
sync_policy_(amd::SyncPolicy::Auto) {
|
||||
if (GPU_FORCE_QUEUE_PROFILING) {
|
||||
properties().set(CL_QUEUE_PROFILING_ENABLE);
|
||||
}
|
||||
@@ -198,9 +199,10 @@ void HostQueue::finish(bool cpu_wait) {
|
||||
}
|
||||
command->enqueue();
|
||||
}
|
||||
|
||||
// Check HW status of the ROCcrl event. Note: not all ROCclr modes support HW status
|
||||
static constexpr bool kWaitCompletion = true;
|
||||
if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion)) {
|
||||
if (cpu_wait || !device().IsHwEventReady(command->event(), kWaitCompletion, GetSyncPolicy())) {
|
||||
ClPrint(LOG_DEBUG, LOG_CMD,
|
||||
"No HW event or batch size is less than %zu, "
|
||||
"await command completion",
|
||||
|
||||
@@ -307,6 +307,13 @@ class HostQueue : public CommandQueue {
|
||||
return thread_.vdev()->getQueueID();
|
||||
}
|
||||
|
||||
//! Returns Synchronization Policy for the current stream
|
||||
amd::SyncPolicy GetSyncPolicy() const { return sync_policy_; }
|
||||
//! Set Synchronization Policy used by Queue
|
||||
void SetSyncPolicy(amd::SyncPolicy value) {
|
||||
sync_policy_ = value;
|
||||
}
|
||||
|
||||
private:
|
||||
Command* head_; //!< Head of the batch list
|
||||
Command* tail_; //!< Tail of the batch list
|
||||
@@ -315,6 +322,8 @@ private:
|
||||
//! True if this command queue is active
|
||||
bool isActive_;
|
||||
bool forceDestroy_ = false; //!< Destroy the queue in the current state
|
||||
|
||||
amd::SyncPolicy sync_policy_; //!< Used for controlling stream synchronization
|
||||
};
|
||||
|
||||
class DeviceQueue : public CommandQueue {
|
||||
|
||||
Ссылка в новой задаче
Block a user