diff --git a/projects/clr/rocclr/device/device.cpp b/projects/clr/rocclr/device/device.cpp index 6cddf83921..fcab448501 100644 --- a/projects/clr/rocclr/device/device.cpp +++ b/projects/clr/rocclr/device/device.cpp @@ -695,6 +695,8 @@ bool Device::init() { // Ignore the failure and assume KFD is not installed. // abort(); DevLogError("KFD is not installed \n"); + // Disable direct dispatch if ROC initialization wasn't successful + AMD_DIRECT_DISPATCH = flagIsDefault(AMD_DIRECT_DISPATCH) ? false : AMD_DIRECT_DISPATCH; } if (!amd::IS_HIP) { ret |= roc::NullDevice::init(); @@ -703,6 +705,10 @@ bool Device::init() { #endif // WITH_HSA_DEVICE #if defined(WITH_PAL_DEVICE) if (GPU_ENABLE_PAL != 0) { + if (GPU_ENABLE_PAL == 1) { + // PAL path can't support direct dispatch, unless it's forced + AMD_DIRECT_DISPATCH = flagIsDefault(AMD_DIRECT_DISPATCH) ? false : AMD_DIRECT_DISPATCH; + } ret |= PalDeviceLoad(); } #endif // WITH_PAL_DEVICE diff --git a/projects/clr/rocclr/device/rocm/rocvirtual.cpp b/projects/clr/rocclr/device/rocm/rocvirtual.cpp index 2ee6a11793..8429355360 100644 --- a/projects/clr/rocclr/device/rocm/rocvirtual.cpp +++ b/projects/clr/rocclr/device/rocm/rocvirtual.cpp @@ -1079,8 +1079,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(AqlPacket* packet, uint16_t header, ui ClPrint(amd::LOG_DEBUG, amd::LOG_AQL, "SWq=0x%zx, HWq=0x%zx, id=%d, Dispatch Header = " "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), " - "setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], private_seg_size=%zu, " - "group_seg_size=%zu, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx, " + "setup=%d, grid=[%u, %u, %u], workgroup=[%u, %u, %u], private_seg_size=%u, " + "group_seg_size=%u, kernel_obj=0x%zx, kernarg_address=0x%zx, completion_signal=0x%zx, " "correlation_id=%zu, rptr=%u, wptr=%u", gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, header, extractAqlBits(header, HSA_PACKET_HEADER_TYPE, HSA_PACKET_HEADER_WIDTH_TYPE), @@ -1294,8 +1294,8 @@ bool VirtualGPU::dispatchGenericAqlPacketBatch(const std::vector& pa ClPrint(amd::LOG_DETAIL_DEBUG, amd::LOG_AQL, "SWq=0x%zx, HWq=0x%zx, id=%d, Dispatch Header = " "0x%x (type=%d, barrier=%d, acquire=%d, release=%d), " - "setup=%d, grid=[%zu, %zu, %zu], workgroup=[%zu, %zu, %zu], " - "private_seg_size=%zu, group_seg_size=%zu, kernel_obj=0x%zx, " + "setup=%d, grid=[%u, %u, %u], workgroup=[%u, %u, %u], " + "private_seg_size=%u, group_seg_size=%u, kernel_obj=0x%zx, " "kernarg_address=0x%zx, completion_signal=0x%zx, correlation_id=%zu, " "rptr=%u, wptr=%u", gpu_queue_, gpu_queue_->base_address, gpu_queue_->id, header, packetType,