SWDEV-322225 - Use numa_allocate_bitmask
- Fix a crash with AMD_CPU_AFFINITY=1 as numa_bitmask_alloc isnt the
right api to allocate bitmask
- Do not set affinity for ROCr thread. It worsens performance rather
than any improvement.
- Fix regression from my previous change for event handler.
Change-Id: I3ea75adc2a6333f29752283eddd5b555e9b58cc5
[ROCm/clr commit: 802c2c8a9f]
Этот коммит содержится в:
@@ -158,8 +158,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
|
||||
|
||||
amd::Thread* thread = amd::Thread::current();
|
||||
if (!(thread != nullptr ||
|
||||
((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current() &&
|
||||
amd::Os::setThreadAffinityToMainThread()))) {
|
||||
((thread = new amd::HostThread()) != nullptr && thread == amd::Thread::current()))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -413,40 +412,41 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
|
||||
ts->retain();
|
||||
prof_signal->ts_ = ts;
|
||||
ts->AddProfilingSignal(prof_signal);
|
||||
uint32_t init_value = kInitSignalValueOne;
|
||||
bool enqueHandler= false;
|
||||
enqueHandler = AMD_DIRECT_DISPATCH &&
|
||||
(ts->command().Callback() != nullptr ||
|
||||
ts->command().GetBatchHead() != nullptr ) &&
|
||||
(!ts->command().CpuWaitRequested());
|
||||
// If direct dispatch is enabled and the batch head isn't null, then it's a marker and
|
||||
// requires the batch update upon HSA signal completion
|
||||
if (enqueHandler) {
|
||||
if (AMD_DIRECT_DISPATCH) {
|
||||
bool enqueHandler= false;
|
||||
uint32_t init_value = kInitSignalValueOne;
|
||||
// If API callback is enabled, then use a blocking signal for AQL queue.
|
||||
// HSA signal will be acquired in SW and released after HSA signal callback
|
||||
if (ts->command().Callback() != nullptr) {
|
||||
ts->SetCallbackSignal(prof_signal->signal_);
|
||||
// Blocks AQL queue from further processing
|
||||
hsa_signal_add_relaxed(prof_signal->signal_, 1);
|
||||
init_value += 1;
|
||||
}
|
||||
hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_,
|
||||
HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts);
|
||||
if (HSA_STATUS_SUCCESS != result) {
|
||||
LogError("hsa_amd_signal_async_handler() failed to set the handler!");
|
||||
} else {
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)",
|
||||
prof_signal->signal_.handle, prof_signal);
|
||||
}
|
||||
enqueHandler = (ts->command().Callback() != nullptr ||
|
||||
ts->command().GetBatchHead() != nullptr ) &&
|
||||
!ts->command().CpuWaitRequested();
|
||||
// If direct dispatch is enabled and the batch head isn't null, then it's a marker and
|
||||
// requires the batch update upon HSA signal completion
|
||||
if (enqueHandler) {
|
||||
uint32_t init_value = kInitSignalValueOne;
|
||||
// If API callback is enabled, then use a blocking signal for AQL queue.
|
||||
// HSA signal will be acquired in SW and released after HSA signal callback
|
||||
if (ts->command().Callback() != nullptr) {
|
||||
ts->SetCallbackSignal(prof_signal->signal_);
|
||||
// Blocks AQL queue from further processing
|
||||
hsa_signal_add_relaxed(prof_signal->signal_, 1);
|
||||
init_value += 1;
|
||||
}
|
||||
hsa_status_t result = hsa_amd_signal_async_handler(prof_signal->signal_,
|
||||
HSA_SIGNAL_CONDITION_LT, init_value, &HsaAmdSignalHandler, ts);
|
||||
if (HSA_STATUS_SUCCESS != result) {
|
||||
LogError("hsa_amd_signal_async_handler() failed to set the handler!");
|
||||
} else {
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Set Handler: handle(0x%lx), timestamp(%p)",
|
||||
prof_signal->signal_.handle, prof_signal);
|
||||
}
|
||||
|
||||
// Update the current command/marker with HW event
|
||||
prof_signal->retain();
|
||||
ts->command().SetHwEvent(prof_signal);
|
||||
} else if (ts->command().profilingInfo().marker_ts_ ) {
|
||||
// Update the current command/marker with HW event
|
||||
prof_signal->retain();
|
||||
ts->command().SetHwEvent(prof_signal);
|
||||
// Update the current command/marker with HW event
|
||||
prof_signal->retain();
|
||||
ts->command().SetHwEvent(prof_signal);
|
||||
} else if (ts->command().profilingInfo().marker_ts_ ) {
|
||||
// Update the current command/marker with HW event
|
||||
prof_signal->retain();
|
||||
ts->command().SetHwEvent(prof_signal);
|
||||
}
|
||||
}
|
||||
if (!sdma_profiling_) {
|
||||
hsa_amd_profiling_async_copy_enable(true);
|
||||
|
||||
@@ -310,11 +310,8 @@ void Os::setCurrentThreadName(const char* name) { ::prctl(PR_SET_NAME, name); }
|
||||
|
||||
void Os::setPreferredNumaNode(uint32_t node) {
|
||||
#ifdef ROCCLR_SUPPORT_NUMA_POLICY
|
||||
if (AMD_CPU_AFFINITY) {
|
||||
// Set preferred node affinity mask
|
||||
int num_cpus = numa_num_configured_cpus();
|
||||
bitmask* bm = numa_bitmask_alloc(num_cpus);
|
||||
|
||||
if (AMD_CPU_AFFINITY && (numa_available() >= 0)) {
|
||||
bitmask* bm = numa_allocate_cpumask();
|
||||
numa_node_to_cpus(node, bm);
|
||||
if (numa_sched_setaffinity(0, bm) < 0) {
|
||||
assert(0 && "failed to set affinity");
|
||||
|
||||
Ссылка в новой задаче
Block a user