diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h index b188682ac2..008e5efa5a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_pm4.h @@ -72,6 +72,12 @@ # define PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA (1 << 29) #define PM4_ACQUIRE_MEM_DW2_COHER_SIZE(x) (((x) & 0xFFFFFFFF) << 0) #define PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(x) (((x) & 0xFF) << 0) +#define PM4_ACQUIRE_MEM_DW7_GCR_CNTL(x) (((x) & 0x7FFFF) << 0) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(x) (((x) & 0x3) << 0) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV (1 << 7) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV (1 << 8) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV (1 << 9) +# define PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV (1 << 14) #define PM4_RELEASE_MEM_DW1_EVENT_INDEX(x) (((x) & 0xF) << 8) # define PM4_RELEASE_MEM_EVENT_INDEX_AQL 0x7 diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 3a8074662f..b36ec23d71 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -1203,29 +1203,40 @@ void GpuAgent::InvalidateCodeCaches() { // Microcode is handling code cache invalidation. return; } - } else if (isa_->GetMajorVersion() > 9) { + } else if (isa_->GetMajorVersion() > 10) { assert(false && "Code cache invalidation not implemented for this agent"); } // Invalidate caches which may hold lines of code object allocation. - constexpr uint32_t cache_inv_size_dw = 7; - uint32_t cache_inv[cache_inv_size_dw]; + uint32_t cache_inv[8] = {0}; + uint32_t cache_inv_size_dw; + + if (isa_->GetMajorVersion() < 10) { + cache_inv[1] = PM4_ACQUIRE_MEM_DW1_COHER_CNTL( + PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA | + PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA | + PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA | + PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA); + + cache_inv_size_dw = 7; + } else { + cache_inv[7] = PM4_ACQUIRE_MEM_DW7_GCR_CNTL( + PM4_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) | + PM4_ACQUIRE_MEM_GCR_CNTL_GLK_INV | + PM4_ACQUIRE_MEM_GCR_CNTL_GLV_INV | + PM4_ACQUIRE_MEM_GCR_CNTL_GL1_INV | + PM4_ACQUIRE_MEM_GCR_CNTL_GL2_INV); + + cache_inv_size_dw = 8; + } cache_inv[0] = PM4_HDR(PM4_HDR_IT_OPCODE_ACQUIRE_MEM, cache_inv_size_dw, - isa_->GetMajorVersion()); - cache_inv[1] = PM4_ACQUIRE_MEM_DW1_COHER_CNTL( - PM4_ACQUIRE_MEM_COHER_CNTL_SH_ICACHE_ACTION_ENA | - PM4_ACQUIRE_MEM_COHER_CNTL_SH_KCACHE_ACTION_ENA | - PM4_ACQUIRE_MEM_COHER_CNTL_TC_ACTION_ENA | - PM4_ACQUIRE_MEM_COHER_CNTL_TC_WB_ACTION_ENA); + isa_->GetMajorVersion()); cache_inv[2] = PM4_ACQUIRE_MEM_DW2_COHER_SIZE(0xFFFFFFFF); cache_inv[3] = PM4_ACQUIRE_MEM_DW3_COHER_SIZE_HI(0xFF); - cache_inv[4] = 0; - cache_inv[5] = 0; - cache_inv[6] = 0; // Submit the command to the utility queue and wait for it to complete. - queues_[QueueUtility]->ExecutePM4(cache_inv, sizeof(cache_inv)); + queues_[QueueUtility]->ExecutePM4(cache_inv, cache_inv_size_dw * sizeof(uint32_t)); } lazy_ptr& GpuAgent::GetXgmiBlit(const core::Agent& dst_agent) {