SWDEV-299893 - Set preferred node affinity
Set affinity to the closest node of the current GPU. This reduces the latency to fetch kernel args since device would query the CPU cache of core which did the dispatch. This behavior is controlled with AMD_CPU_AFFINITY env var(disabled by default) Change-Id: I65afba62cb818ea25a311b88d1c0dd5c51330292
Цей коміт міститься в:
@@ -1730,6 +1730,8 @@ class Device : public RuntimeObject {
|
||||
) const {
|
||||
return false;
|
||||
};
|
||||
|
||||
virtual const uint32_t getPreferredNumaNode() const { return 0; }
|
||||
virtual void ReleaseGlobalSignal(void* signal) const {}
|
||||
|
||||
//! Returns TRUE if the device is available for computations
|
||||
|
||||
@@ -170,7 +170,8 @@ Device::Device(hsa_agent_t bkendDevice)
|
||||
, queuePool_(QueuePriority::Total)
|
||||
, coopHostcallBuffer_(nullptr)
|
||||
, queueWithCUMaskPool_(QueuePriority::Total)
|
||||
, numOfVgpus_(0) {
|
||||
, numOfVgpus_(0)
|
||||
, preferred_numa_node_(0) {
|
||||
group_segment_.handle = 0;
|
||||
system_segment_.handle = 0;
|
||||
system_coarse_segment_.handle = 0;
|
||||
@@ -194,7 +195,7 @@ void Device::setupCpuAgent() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
preferred_numa_node_ = index;
|
||||
cpu_agent_ = cpu_agents_[index].agent;
|
||||
system_segment_ = cpu_agents_[index].fine_grain_pool;
|
||||
system_coarse_segment_ = cpu_agents_[index].coarse_grain_pool;
|
||||
|
||||
@@ -534,6 +534,8 @@ class Device : public NullDevice {
|
||||
|
||||
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset);
|
||||
|
||||
const uint32_t getPreferredNumaNode() const { return preferred_numa_node_; }
|
||||
|
||||
private:
|
||||
bool create();
|
||||
|
||||
@@ -555,6 +557,7 @@ class Device : public NullDevice {
|
||||
static std::vector<AgentInfo> cpu_agents_;
|
||||
|
||||
hsa_agent_t cpu_agent_;
|
||||
uint32_t preferred_numa_node_;
|
||||
std::vector<hsa_agent_t> p2p_agents_; //!< List of P2P agents available for this device
|
||||
std::vector<Device*> enabled_p2p_devices_; //!< List of user enabled P2P devices for this device
|
||||
mutable std::mutex lock_allow_access_; //!< To serialize allow_access calls
|
||||
|
||||
@@ -222,6 +222,9 @@ class Os : AllStatic {
|
||||
//! Platform-specific optimized memcpy()
|
||||
static void* fastMemcpy(void* dest, const void* src, size_t n);
|
||||
|
||||
//! NUMA related settings
|
||||
static void setPreferredNumaNode(uint32_t node);
|
||||
|
||||
// File/Path helper routines:
|
||||
//
|
||||
|
||||
|
||||
@@ -48,6 +48,10 @@
|
||||
#define DT_GNU_HASH 0x6ffffef5
|
||||
#endif // DT_GNU_HASH
|
||||
|
||||
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
|
||||
#include <numa.h>
|
||||
#endif // ROCCLR_SUPPORT_NUMA_POLICY
|
||||
|
||||
#include <atomic>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
@@ -60,7 +64,6 @@
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
|
||||
|
||||
namespace amd {
|
||||
|
||||
static struct sigaction oldSigAction;
|
||||
@@ -121,7 +124,6 @@ static void divisionErrorHandler(int sig, siginfo_t* info, void* ptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
std::cerr << "Unhandled signal in divisionErrorHandler()" << std::endl;
|
||||
::abort();
|
||||
}
|
||||
@@ -306,6 +308,20 @@ void Os::currentStackInfo(address* base, size_t* size) {
|
||||
|
||||
void Os::setCurrentThreadName(const char* name) { ::prctl(PR_SET_NAME, name); }
|
||||
|
||||
void Os::setPreferredNumaNode(uint32_t node) {
|
||||
if (AMD_CPU_AFFINITY) {
|
||||
// Set preferred node affinity mask
|
||||
int num_cpus = numa_num_configured_cpus();
|
||||
bitmask* bm = numa_bitmask_alloc(num_cpus);
|
||||
|
||||
numa_node_to_cpus(node, bm);
|
||||
if (numa_sched_setaffinity(0, bm) < 0) {
|
||||
assert(0 && "failed to set affinity");
|
||||
}
|
||||
|
||||
numa_free_cpumask(bm);
|
||||
}
|
||||
}
|
||||
|
||||
void* Thread::entry(Thread* thread) {
|
||||
sigset_t set;
|
||||
|
||||
@@ -250,6 +250,8 @@ static void SetThreadName(DWORD threadId, const char* name) {
|
||||
|
||||
void Os::setCurrentThreadName(const char* name) { SetThreadName(GetCurrentThreadId(), name); }
|
||||
|
||||
void Os::setPreferredNumaNode(uint32_t node) {};
|
||||
|
||||
static LONG WINAPI divExceptionFilter(struct _EXCEPTION_POINTERS* ep) {
|
||||
DWORD code = ep->ExceptionRecord->ExceptionCode;
|
||||
|
||||
|
||||
Посилання в новій задачі
Заблокувати користувача