rocr: Add WaitMultiple to core Signal

Replaces WaitAny with WaitMultiple to more closely align with the
underlying driver API for waiting on multiple events.

WaitMultiple adds a single parameter, wait_on_all, to the WaitAny
interface providing a single function for waiting on multiple
events when we only need AND and OR semantics for the signal
checking logic.

Change-Id: I68a4a45d48151d9d69aef02fd8f7263b9e6c0e75
Этот коммит содержится в:
Tony Gutierrez
2024-10-16 12:14:05 -07:00
коммит произвёл David Yat Sin
родитель c51aa0d155
Коммит 8a38f121ea
12 изменённых файлов: 153 добавлений и 61 удалений
+1 -1
Просмотреть файл
@@ -81,7 +81,7 @@ include(utils)
## Get version strings
get_version("1.14.0")
get_version("1.15.0")
if (${ROCM_PATCH_VERSION})
set(VERSION_PATCH ${ROCM_PATCH_VERSION})
endif()
+9
Просмотреть файл
@@ -909,6 +909,15 @@ hsa_status_t HSA_API
return amdExtTable->hsa_amd_async_function_fn(callback, arg);
}
// Mirrors Amd Extension Apis
uint32_t HSA_API hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
hsa_signal_condition_t* conds, hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_values) {
return amdExtTable->hsa_amd_signal_wait_all_fn(signal_count, signals, conds, values, timeout_hint,
wait_hint, satisfying_values);
}
// Mirrors Amd Extension Apis
uint32_t HSA_API
hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
+6
Просмотреть файл
@@ -100,6 +100,12 @@ hsa_status_t hsa_amd_signal_create(hsa_signal_value_t initial_value, uint32_t nu
const hsa_agent_t* consumers, uint64_t attributes,
hsa_signal_t* signal);
// Mirrors Amd Extension Apis
uint32_t hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
hsa_signal_condition_t* conds, hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_values);
// Mirrors Amd Extension Apis
uint32_t
hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* signals,
+25 -9
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -351,14 +351,30 @@ class Signal {
/// Returns NULL for DefaultEvent Type.
virtual HsaEvent* EopEvent() = 0;
/// @brief Waits until any signal in the list satisfies its condition or
/// timeout is reached.
/// Returns the index of a satisfied signal. Returns -1 on timeout and
/// errors.
static uint32_t WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_value);
/// @brief Waits until multiple signals in the list satisfy their conditions
/// or a timeout is reached.
/// @param signal_count Number of hsa_signals in the list.
/// @param hsa_signals Pointer to array of HSA signals.
/// @param conds Pointer to array of signal conditions.
/// @param values Pointer to array of signal values.
/// @param timeout Timeout hint value.
/// @param wait_hint Hint about wait state.
/// @param satisfying_values Vector of satisfying values. If \p wait_on_all
/// is false (then we are waiting on any signal in the list) this will contain
/// only the first satisfying value.
/// @param wait_on_all Wait on all signals in the list to satisfy their
/// conditions if true, else wait on any signal in the list to satisfy its
/// condition.
/// @return Return the index of the first signal in the list that satisfies
/// its condition or -1 on a timeout. Note that if \p wait_on_all is true,
/// then all signals in the list satisfy their conditions, thus the index will
/// always be 0.
static uint32_t WaitMultiple(uint32_t signal_count, const hsa_signal_t* hsa_signals,
const hsa_signal_condition_t* conds,
const hsa_signal_value_t* values, uint64_t timeout,
hsa_wait_state_t wait_hint,
std::vector<hsa_signal_value_t>& satisfying_values,
bool wait_on_all);
/// @brief Dedicated funtion to wait on signals that are not of type HSA_EVENTTYPE_SIGNAL
/// these events can only be received by calling the underlying driver (i.e via the hsaKmtWaitOnMultipleEvents_Ext
+2 -1
Просмотреть файл
@@ -87,7 +87,7 @@ void HsaApiTable::Init() {
// they can add preprocessor macros on the new functions
constexpr size_t expected_core_api_table_size = 1016;
constexpr size_t expected_amd_ext_table_size = 584;
constexpr size_t expected_amd_ext_table_size = 592;
constexpr size_t expected_image_ext_table_size = 128;
constexpr size_t expected_finalizer_ext_table_size = 64;
constexpr size_t expected_tools_table_size = 64;
@@ -474,6 +474,7 @@ void HsaApiTable::UpdateAmdExts() {
amd_ext_api.hsa_amd_agent_set_async_scratch_limit_fn = AMD::hsa_amd_agent_set_async_scratch_limit;
amd_ext_api.hsa_amd_queue_get_info_fn = AMD::hsa_amd_queue_get_info;
amd_ext_api.hsa_amd_enable_logging_fn = AMD::hsa_amd_enable_logging;
amd_ext_api.hsa_amd_signal_wait_all_fn = AMD::hsa_amd_signal_wait_all;
}
void HsaApiTable::UpdateTools() {
+43 -7
Просмотреть файл
@@ -40,13 +40,14 @@
//
////////////////////////////////////////////////////////////////////////////////
#include <new>
#include <typeinfo>
#include <algorithm>
#include <exception>
#include <set>
#include <utility>
#include <memory>
#include <map>
#include <memory>
#include <new>
#include <set>
#include <typeinfo>
#include <utility>
#include <vector>
#include "core/inc/agent.h"
@@ -570,6 +571,35 @@ hsa_status_t hsa_amd_signal_value_pointer(hsa_signal_t hsa_signal,
CATCH;
}
uint32_t hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* hsa_signals,
hsa_signal_condition_t* conds, hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_values) {
TRY;
if (!core::Runtime::runtime_singleton_->IsOpen()) {
assert(false && "hsa_amd_signal_wait_all called while not initialized.");
return 0;
}
// Do not check for signal invalidation. Invalidation may occur during async
// signal handler loop and is not an error.
for (int i = 0; i < signal_count; ++i)
assert(hsa_signals[i].handle != 0 && core::SharedSignal::Convert(hsa_signals[i])->IsValid() &&
"Invalid signal.");
std::vector<hsa_signal_value_t> satisfying_values_vec;
satisfying_values_vec.resize(signal_count);
uint32_t first_satysifying_signal_idx =
core::Signal::WaitMultiple(signal_count, hsa_signals, conds, values, timeout_hint, wait_hint,
satisfying_values_vec, true);
if (satisfying_values) {
std::copy(satisfying_values_vec.begin(), satisfying_values_vec.end(), satisfying_values);
}
return first_satysifying_signal_idx;
CATCHRET(uint32_t);
}
uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signals,
hsa_signal_condition_t* conds, hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
@@ -585,8 +615,14 @@ uint32_t hsa_amd_signal_wait_any(uint32_t signal_count, hsa_signal_t* hsa_signal
assert(hsa_signals[i].handle != 0 && core::SharedSignal::Convert(hsa_signals[i])->IsValid() &&
"Invalid signal.");
return core::Signal::WaitAny(signal_count, hsa_signals, conds, values,
timeout_hint, wait_hint, satisfying_value);
std::vector<hsa_signal_value_t> satisfying_value_vec(1);
uint32_t satisfying_signal_idx =
core::Signal::WaitMultiple(signal_count, hsa_signals, conds, values, timeout_hint, wait_hint,
satisfying_value_vec, false);
if (satisfying_value) *satisfying_value = satisfying_value_vec.at(0);
return satisfying_signal_idx;
CATCHRET(uint32_t);
}
+13 -20
Просмотреть файл
@@ -1533,7 +1533,7 @@ hsa_status_t Runtime::IPCDetach(void* ptr) {
}
void Runtime::AsyncEventsLoop(void* _eventsInfo) {
struct AsyncEventsInfo* eventsInfo = reinterpret_cast<struct AsyncEventsInfo*>(_eventsInfo);
AsyncEventsInfo* eventsInfo = reinterpret_cast<AsyncEventsInfo*>(_eventsInfo);
auto& async_events_control_ = eventsInfo->control;
auto& async_events_ = eventsInfo->events;
@@ -1602,26 +1602,19 @@ void Runtime::AsyncEventsLoop(void* _eventsInfo) {
while (!async_events_control_.exit) {
// Wait for a signal
hsa_signal_value_t value = 0;
std::vector<hsa_signal_value_t> value(1);
value[0] = 0;
uint32_t index = 0;
uint32_t wait_any = true;
if (eventsInfo->monitor_exceptions) {
index = Signal::WaitAnyExceptions(
uint32_t(async_events_.Size()),
&async_events_.signal_[0],
&async_events_.cond_[0],
&async_events_.value_[0],
&value);
index =
Signal::WaitAnyExceptions(uint32_t(async_events_.Size()), &async_events_.signal_[0],
&async_events_.cond_[0], &async_events_.value_[0], &value[0]);
} else {
if (core::Runtime::runtime_singleton_->flag().wait_any()) {
index = Signal::WaitAny(
uint32_t(async_events_.Size()),
&async_events_.signal_[0],
&async_events_.cond_[0],
&async_events_.value_[0],
uint64_t(-1),
HSA_WAIT_STATE_BLOCKED,
&value);
index = Signal::WaitMultiple(uint32_t(async_events_.Size()), &async_events_.signal_[0],
&async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1),
HSA_WAIT_STATE_BLOCKED, value, false);
} else {
// Skip wake-up signal logic
index = 1;
@@ -1636,7 +1629,7 @@ void Runtime::AsyncEventsLoop(void* _eventsInfo) {
hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
} else if (index != -1) {
if (wait_any) {
processEvent(index, value, wait_any);
processEvent(index, value[0], wait_any);
} else {
index = 0;
}
@@ -1664,12 +1657,12 @@ void Runtime::AsyncEventsLoop(void* _eventsInfo) {
// Check remaining signals before sleeping.
for (size_t i = index; i < async_events_.Size(); i++) {
hsa_signal_handle sig(async_events_.signal_[i]);
value = atomic::Load(&sig->signal_.value, std::memory_order_relaxed);
if (checkCondition(async_events_.cond_[i], value, async_events_.value_[i])) {
value[0] = atomic::Load(&sig->signal_.value, std::memory_order_relaxed);
if (checkCondition(async_events_.cond_[i], value[0], async_events_.value_[i])) {
if (i == 0) {
hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
} else {
if (!processEvent(i, value, wait_any)) {
if (!processEvent(i, value[0], wait_any)) {
i--;
}
}
+32 -18
Просмотреть файл
@@ -2,24 +2,24 @@
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
//
//
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
//
// AMD Research and AMD HSA Software Development
//
//
// Advanced Micro Devices, Inc.
//
//
// www.amd.com
//
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
@@ -29,7 +29,7 @@
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
@@ -46,6 +46,9 @@
#include "core/inc/signal.h"
#include <algorithm>
#include <numeric>
#include <vector>
#include "core/util/timer.h"
#include "core/inc/runtime.h"
@@ -177,10 +180,11 @@ Signal::~Signal() {
}
}
uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
uint64_t timeout, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_value) {
uint32_t Signal::WaitMultiple(uint32_t signal_count, const hsa_signal_t* hsa_signals,
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
uint64_t timeout, hsa_wait_state_t wait_hint,
std::vector<hsa_signal_value_t>& satisfying_values,
bool wait_on_all) {
hsa_signal_handle* signals =
reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));
@@ -251,10 +255,14 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
timer::duration_from_seconds<timer::fast_clock::duration>(
double(timeout) / double(hsa_freq));
bool condition_met = false;
std::vector<uint32_t> unmet_condition_ids(signal_count);
std::iota(unmet_condition_ids.begin(), unmet_condition_ids.end(), 0);
while (true) {
// Cannot mwaitx - polling multiple signals
for (uint32_t i = 0; i < signal_count; i++) {
for (auto it = unmet_condition_ids.begin(); it != unmet_condition_ids.end();) {
auto i = *it;
bool condition_met = false;
if (!signals[i]->IsValid())
return uint32_t(-1);
@@ -282,8 +290,14 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
return uint32_t(-1);
}
if (condition_met) {
if (satisfying_value != NULL) *satisfying_value = value;
return i;
it = unmet_condition_ids.erase(it);
satisfying_values[i] = value;
if (!wait_on_all)
return i;
else if (unmet_condition_ids.empty())
return 0;
} else {
++it;
}
}
@@ -306,7 +320,7 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
uint64_t ct=timer::duration_cast<std::chrono::milliseconds>(
time_remaining).count();
wait_ms = (ct>0xFFFFFFFEu) ? 0xFFFFFFFEu : ct;
hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, false, wait_ms, event_age);
hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, wait_on_all, wait_ms, event_age);
}
}
+1
Просмотреть файл
@@ -257,6 +257,7 @@ global:
hsa_ven_amd_pcs_flush;
hsa_amd_queue_get_info;
hsa_amd_enable_logging;
hsa_amd_signal_wait_all;
local:
*;
};
+2 -1
Просмотреть файл
@@ -3,7 +3,7 @@
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2020, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2014-2024, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
@@ -268,6 +268,7 @@ struct AmdExtTable {
decltype(hsa_amd_queue_get_info)* hsa_amd_queue_get_info_fn;
decltype(hsa_amd_vmem_address_reserve_align)* hsa_amd_vmem_address_reserve_align_fn;
decltype(hsa_amd_enable_logging)* hsa_amd_enable_logging_fn;
decltype(hsa_amd_signal_wait_all)* hsa_amd_signal_wait_all_fn;
};
// Table to export HSA Core Runtime Apis
+1 -1
Просмотреть файл
@@ -58,7 +58,7 @@
// Step Ids of the Api tables exported by Hsa Core Runtime
#define HSA_API_TABLE_STEP_VERSION 0x01
#define HSA_CORE_API_TABLE_STEP_VERSION 0x00
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x04
#define HSA_AMD_EXT_API_TABLE_STEP_VERSION 0x05
#define HSA_FINALIZER_API_TABLE_STEP_VERSION 0x00
#define HSA_IMAGE_API_TABLE_STEP_VERSION 0x01
// Rocprofiler just checks HSA_MAGE_EXT_API_TABLE_STEP_VERSION
+18 -3
Просмотреть файл
@@ -57,9 +57,10 @@
* - 1.4 - Virtual Memory API
* - 1.5 - hsa_amd_agent_info: HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES
* - 1.6 - Virtual Memory API: hsa_amd_vmem_address_reserve_align
* - 1.7 - hsa_amd_signal_wait_all
*/
#define HSA_AMD_INTERFACE_VERSION_MAJOR 1
#define HSA_AMD_INTERFACE_VERSION_MINOR 6
#define HSA_AMD_INTERFACE_VERSION_MINOR 7
#ifdef __cplusplus
extern "C" {
@@ -1179,6 +1180,20 @@ hsa_status_t HSA_API
hsa_signal_value_t value,
hsa_amd_signal_handler handler, void* arg);
/**
* @brief Wait for all signal-condition pairs to be satisfied.
*
* @details Allows waiting for all of several signal and condition pairs to be
* satisfied. The function returns 0 if all signals met their conditions and -1
* on a timeout. The value of each signal's satisfying value is returned in
* satisfying_value unless satisfying_value is nullptr. This function provides
* only relaxed memory semantics.
*/
uint32_t HSA_API hsa_amd_signal_wait_all(uint32_t signal_count, hsa_signal_t* signals,
hsa_signal_condition_t* conds, hsa_signal_value_t* values,
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
hsa_signal_value_t* satisfying_values);
/**
* @brief Wait for any signal-condition pair to be satisfied.
*
@@ -1429,7 +1444,7 @@ typedef enum {
* following its memory access model. The actual placement may vary or migrate
* due to the system's NUMA policy and state, which is beyond the scope of
* HSA APIs.
*/
*/
typedef struct hsa_amd_memory_pool_s {
/**
* Opaque handle.
@@ -2972,7 +2987,7 @@ typedef enum hsa_amd_svm_attribute_s {
HSA_AMD_SVM_ATTRIB_ACCESS_QUERY = 0x203,
} hsa_amd_svm_attribute_t;
// List type for hsa_amd_svm_attributes_set/get.
// List type for hsa_amd_svm_attributes_set/get.
typedef struct hsa_amd_svm_attribute_pair_s {
// hsa_amd_svm_attribute_t value.
uint64_t attribute;