SWDEV-362046 - Report HIP_OPS activities using the ROCr driver_node_id instead of the device's index
The ROCclr assigns zero-based IDs to GPUs in the order they are
discovered. That zero-based ID is what is used to identify the GPU
on which the HIP_OPS activity took place.
When multiple ranks are used, each rank's first logical device always
has GPU ID 0, regardless of which physical device is selected with
CUDA_VISIBLE_DEVICES. Because of this, when merging trace files from
multiple ranks, GPU IDs from different processes may overlap.
The long term solution is to use the KFD's gpu_id which is stable
across APIs and processes. Unfortunately the gpu_id is not yet exposed
by the ROCr, so for now use the driver's node id.
Change-Id: Ib78854527d600d175bb76e2df0747c33f898c615
[ROCm/clr commit: 9a82118c85]
This commit is contained in:
committed by
Laurent Morichetti
parent
a8c33d177a
commit
860e33dbed
@@ -613,6 +613,8 @@ struct Info : public amd::EmbeddedObject {
|
||||
|
||||
bool virtualMemoryManagement_; //!< Virtual memory management support
|
||||
size_t virtualMemAllocGranularity_; //!< virtual memory allocation size/addr granularity
|
||||
|
||||
uint32_t driverNodeId_;
|
||||
};
|
||||
|
||||
//! Device settings
|
||||
|
||||
@@ -1179,6 +1179,13 @@ bool Device::populateOCLDeviceConstants() {
|
||||
}
|
||||
assert(info_.globalMemChannels_ > 0);
|
||||
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
hsa_agent_get_info(bkendDevice_,
|
||||
static_cast<hsa_agent_info_t>(HSA_AMD_AGENT_INFO_DRIVER_NODE_ID),
|
||||
&info_.driverNodeId_)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
setupCpuAgent();
|
||||
|
||||
checkAtomicSupport();
|
||||
|
||||
@@ -73,8 +73,8 @@ void ReportActivity(const amd::Command& command) {
|
||||
command.profilingInfo().start_, // begin timestamp, ns
|
||||
command.profilingInfo().end_, // end timestamp, ns
|
||||
{{
|
||||
static_cast<int>(queue->device().index()), // device id
|
||||
queue->vdev()->index() // queue id
|
||||
static_cast<int>(queue->device().info().driverNodeId_), // device id
|
||||
queue->vdev()->index() // queue id
|
||||
}},
|
||||
{} // copied data size for memcpy, or kernel name for dispatch
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user