diff --git a/libhsakmt/include/hsakmt/hsakmttypes.h b/libhsakmt/include/hsakmt/hsakmttypes.h index 41dae8d99e..fd1661f06e 100644 --- a/libhsakmt/include/hsakmt/hsakmttypes.h +++ b/libhsakmt/include/hsakmt/hsakmttypes.h @@ -586,7 +586,8 @@ typedef struct _HsaMemFlags unsigned int GTTAccess: 1; // default = 0; If 1: The caller indicates this memory will be mapped to GART for MES // KFD will allocate GTT memory with the Preferred_node set as gpu_id for GART mapping unsigned int Contiguous: 1; // Allocate contiguous VRAM - unsigned int Reserved: 9; + unsigned int ExecuteBlit: 1; // default = 0; If 1: The caller indicates that the memory is for blit kernel object. + unsigned int Reserved: 8; } ui32; HSAuint32 Value; diff --git a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp index 2c3d518430..935007b6a3 100644 --- a/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp +++ b/runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp @@ -241,6 +241,9 @@ KfdDriver::AllocateMemory(const core::MemoryRegion &mem_region, ? 1 : kmt_alloc_flags.ui32.Uncached); + kmt_alloc_flags.ui32.ExecuteBlit = + !!(alloc_flags & core::MemoryRegion::AllocateExecutableBlitKernelObject); + if (m_region.IsLocalMemory()) { // Allocate physically contiguous memory. AllocateKfdMemory function call // will fail if this flag is not supported in KFD. diff --git a/runtime/hsa-runtime/core/inc/memory_region.h b/runtime/hsa-runtime/core/inc/memory_region.h index db0210bfda..5da2b62fbd 100644 --- a/runtime/hsa-runtime/core/inc/memory_region.h +++ b/runtime/hsa-runtime/core/inc/memory_region.h @@ -106,6 +106,9 @@ class MemoryRegion : public Checked<0x9C961F19EE175BB3> { AllocateGTTAccess = (1 << 9), AllocateContiguous = (1 << 10), // Physically contiguous memory AllocateUncached = (1 << 11), // Uncached memory + // this flag is ignored by Thunk and only used for emulator/dxg to track code-object + // allocations in AQL to PM4 conversion. + AllocateExecutableBlitKernelObject = (1 << 12), }; typedef uint32_t AllocateFlags; diff --git a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index f994fb7fb0..6cb147a306 100644 --- a/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -378,7 +378,8 @@ void GpuAgent::AssembleShader(const char* func_name, AssembleTarget assemble_tar (assemble_target == AssembleTarget::AQL ? sizeof(amd_kernel_code_t) : 0); code_buf_size = AlignUp(header_size + asic_shader->size, 0x1000); - code_buf = system_allocator()(code_buf_size, 0x1000, core::MemoryRegion::AllocateExecutable); + code_buf = system_allocator()(code_buf_size, 0x1000, + core::MemoryRegion::AllocateExecutable | core::MemoryRegion::AllocateExecutableBlitKernelObject); assert(code_buf != NULL && "Code buffer allocation failed"); memset(code_buf, 0, code_buf_size);