2
0

SWDEV-548034 - Separate sdma signal from compute in checkGpuTime (#1201)

Este cometimento está contido em:
Pengda Xie
2025-10-09 14:55:25 -07:00
cometido por GitHub
ascendente 3d10a43c02
cometimento d195d925e9
+22 -15
Ver ficheiro
@@ -127,6 +127,9 @@ void Timestamp::checkGpuTime() {
if (HwProfiling()) {
uint64_t start = std::numeric_limits<uint64_t>::max();
uint64_t end = 0;
uint64_t sdmaStart = std::numeric_limits<uint64_t>::max();
uint64_t sdmaEnd = 0;
for (auto it : signals_) {
amd::ScopedLock lock(it->LockSignalOps());
@@ -140,41 +143,45 @@ void Timestamp::checkGpuTime() {
if (command().GetBatchHead() == nullptr || command().profilingInfo().marker_ts_ ||
command().type() == CL_COMMAND_TASK) {
hsa_amd_profiling_dispatch_time_t time = {};
hsa_amd_profiling_async_copy_time_t timeSdma = {};
amd_signal_t* amdSignal = reinterpret_cast<amd_signal_t*>(it->signal_.handle);
if (it->engine_ == HwQueueEngine::Compute) {
Hsa::profiling_get_dispatch_time(gpu()->gpu_device(), it->signal_, &time);
if (it->engine_ == HwQueueEngine::SdmaInter || it->engine_ == HwQueueEngine::SdmaRead ||
it->engine_ == HwQueueEngine::SdmaWrite || it->engine_ == HwQueueEngine::SdmaIntra) {
Hsa::profiling_get_async_copy_time(it->signal_, &timeSdma);
sdmaStart = std::min(timeSdma.start, sdmaStart);
sdmaEnd = std::max(timeSdma.end, sdmaEnd);
// set dispatch time to be used in logging.
time.start = timeSdma.start;
time.end = timeSdma.end;
} else {
hsa_amd_profiling_async_copy_time_t time_sdma = {};
Hsa::profiling_get_async_copy_time(it->signal_, &time_sdma);
time.start = time_sdma.start;
time.end = time_sdma.end;
Hsa::profiling_get_dispatch_time(gpu()->gpu_device(), it->signal_, &time);
start = std::min(time.start, start);
end = std::max(time.end, end);
}
start = std::min(time.start, start);
end = std::max(time.end, end);
if ((command().type() == CL_COMMAND_TASK) && (it->flags_.isPacketDispatch_ == true)) {
static_cast<amd::AccumulateCommand&>(command()).addTimestamps(time.start, time.end);
}
ClPrint(amd::LOG_INFO, amd::LOG_TS,
"Signal = (0x%lx), Translated start/end = %ld / %ld, "
"Elapsed = %ld ns, ticks start/end = %ld / %ld, Ticks elapsed = %ld",
"Signal = (0x%lx), Translated start/end = %ld / %ld, Elapsed = %ld ns, "
"ticks start/end = %ld / %ld, Ticks elapsed = %ld, Engine = %u",
it->signal_.handle, time.start, time.end, time.end - time.start,
amdSignal->start_ts, amdSignal->end_ts, amdSignal->end_ts - amdSignal->start_ts);
amdSignal->start_ts, amdSignal->end_ts, amdSignal->end_ts - amdSignal->start_ts,
it->engine_);
}
it->flags_.done_ = true;
}
signals_.clear();
if (end != 0) {
if (end != 0 || sdmaEnd != 0) {
// Check if it's the first execution and update start time
if (!accum_ena_) {
start_ = start * ticksToTime_;
start_ = ((sdmaEnd != 0) ? sdmaStart : start) * ticksToTime_;
accum_ena_ = true;
}
// Progress the end time always
end_ = end * ticksToTime_;
end_ = ((sdmaEnd != 0) ? sdmaEnd : end) * ticksToTime_;
}
}
}