SWDEV-299893 - Set preferred node affinity
Set affinity to the closest node of the current GPU. This reduces
the latency to fetch kernel args since device would query the CPU cache
of core which did the dispatch. This behavior is controlled with
AMD_CPU_AFFINITY env var(disabled by default)
Change-Id: I65afba62cb818ea25a311b88d1c0dd5c51330292
[ROCm/clr commit: b192beea52]
Tento commit je obsažen v:
@@ -1730,6 +1730,8 @@ class Device : public RuntimeObject {
|
||||
) const {
|
||||
return false;
|
||||
};
|
||||
|
||||
virtual const uint32_t getPreferredNumaNode() const { return 0; }
|
||||
virtual void ReleaseGlobalSignal(void* signal) const {}
|
||||
|
||||
//! Returns TRUE if the device is available for computations
|
||||
|
||||
@@ -170,7 +170,8 @@ Device::Device(hsa_agent_t bkendDevice)
|
||||
, queuePool_(QueuePriority::Total)
|
||||
, coopHostcallBuffer_(nullptr)
|
||||
, queueWithCUMaskPool_(QueuePriority::Total)
|
||||
, numOfVgpus_(0) {
|
||||
, numOfVgpus_(0)
|
||||
, preferred_numa_node_(0) {
|
||||
group_segment_.handle = 0;
|
||||
system_segment_.handle = 0;
|
||||
system_coarse_segment_.handle = 0;
|
||||
@@ -194,7 +195,7 @@ void Device::setupCpuAgent() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
preferred_numa_node_ = index;
|
||||
cpu_agent_ = cpu_agents_[index].agent;
|
||||
system_segment_ = cpu_agents_[index].fine_grain_pool;
|
||||
system_coarse_segment_ = cpu_agents_[index].coarse_grain_pool;
|
||||
|
||||
@@ -534,6 +534,8 @@ class Device : public NullDevice {
|
||||
|
||||
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset);
|
||||
|
||||
const uint32_t getPreferredNumaNode() const { return preferred_numa_node_; }
|
||||
|
||||
private:
|
||||
bool create();
|
||||
|
||||
@@ -555,6 +557,7 @@ class Device : public NullDevice {
|
||||
static std::vector<AgentInfo> cpu_agents_;
|
||||
|
||||
hsa_agent_t cpu_agent_;
|
||||
uint32_t preferred_numa_node_;
|
||||
std::vector<hsa_agent_t> p2p_agents_; //!< List of P2P agents available for this device
|
||||
std::vector<Device*> enabled_p2p_devices_; //!< List of user enabled P2P devices for this device
|
||||
mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls
|
||||
|
||||
@@ -222,6 +222,9 @@ class Os : AllStatic {
|
||||
//! Platform-specific optimized memcpy()
|
||||
static void* fastMemcpy(void* dest, const void* src, size_t n);
|
||||
|
||||
//! NUMA related settings
|
||||
static void setPreferredNumaNode(uint32_t node);
|
||||
|
||||
// File/Path helper routines:
|
||||
//
|
||||
|
||||
|
||||
@@ -48,6 +48,10 @@
|
||||
#define DT_GNU_HASH 0x6ffffef5
|
||||
#endif // DT_GNU_HASH
|
||||
|
||||
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
|
||||
#include <numa.h>
|
||||
#endif // ROCCLR_SUPPORT_NUMA_POLICY
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@@ -60,7 +64,6 @@
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace amd {
|
||||
|
||||
static struct sigaction oldSigAction;
|
||||
@@ -121,7 +124,6 @@ static void divisionErrorHandler(int sig, siginfo_t* info, void* ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
std::cerr << "Unhandled signal in divisionErrorHandler()" << std::endl;
|
||||
::abort();
|
||||
}
|
||||
@@ -306,6 +308,20 @@ void Os::currentStackInfo(address* base, size_t* size) {
|
||||
|
||||
void Os::setCurrentThreadName(const char* name) { ::prctl(PR_SET_NAME, name); }
|
||||
|
||||
void Os::setPreferredNumaNode(uint32_t node) {
|
||||
if (AMD_CPU_AFFINITY) {
|
||||
// Set preferred node affinity mask
|
||||
int num_cpus = numa_num_configured_cpus();
|
||||
bitmask* bm = numa_bitmask_alloc(num_cpus);
|
||||
|
||||
numa_node_to_cpus(node, bm);
|
||||
if (numa_sched_setaffinity(0, bm) < 0) {
|
||||
assert(0 && "failed to set affinity");
|
||||
}
|
||||
|
||||
numa_free_cpumask(bm);
|
||||
}
|
||||
}
|
||||
|
||||
void* Thread::entry(Thread* thread) {
|
||||
sigset_t set;
|
||||
|
||||
@@ -250,6 +250,8 @@ static void SetThreadName(DWORD threadId, const char* name) {
|
||||
|
||||
void Os::setCurrentThreadName(const char* name) { SetThreadName(GetCurrentThreadId(), name); }
|
||||
|
||||
void Os::setPreferredNumaNode(uint32_t node) {};
|
||||
|
||||
static LONG WINAPI divExceptionFilter(struct _EXCEPTION_POINTERS* ep) {
|
||||
DWORD code = ep->ExceptionRecord->ExceptionCode;
|
||||
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele