diff --git a/runtime/hsa-runtime/core/inc/amd_loader_context.hpp b/runtime/hsa-runtime/core/inc/amd_loader_context.hpp index c109476f35..38666070a1 100644 --- a/runtime/hsa-runtime/core/inc/amd_loader_context.hpp +++ b/runtime/hsa-runtime/core/inc/amd_loader_context.hpp @@ -48,9 +48,9 @@ namespace rocr { namespace amd { -class LoaderContext final: public amd::hsa::loader::Context { -public: - LoaderContext(): amd::hsa::loader::Context() {} +class LoaderContext final : public rocr::amd::hsa::loader::Context { + public: + LoaderContext() : rocr::amd::hsa::loader::Context() {} ~LoaderContext() {} diff --git a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp b/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp index 2e7418c80c..72c71fa5fc 100644 --- a/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_loader_context.cpp @@ -65,24 +65,6 @@ namespace rocr { namespace { -bool IsLocalRegion(const core::MemoryRegion *region) -{ - const AMD::MemoryRegion *amd_region = (AMD::MemoryRegion*)region; - if (nullptr == amd_region || !amd_region->IsLocalMemory()) { - return false; - } - return true; -} - -bool IsDebuggerRegistered() -{ - return false; - // Leaving code commented as it will be used later on - //return ((core::Runtime::runtime_singleton_->flag().emulate_aql()) && - // (0 != - // core::Runtime::runtime_singleton_->flag().tools_lib_names().size())); -} - class SegmentMemory { public: virtual ~SegmentMemory() {} @@ -262,14 +244,21 @@ bool MappedMemory::Freeze() class RegionMemory final: public SegmentMemory { public: - static hsa_region_t AgentLocal(hsa_agent_t agent); - static hsa_region_t System(); + static const core::MemoryRegion* AgentLocal(hsa_agent_t agent, bool is_code); + static const core::MemoryRegion* System(bool is_code); - RegionMemory(hsa_region_t region): SegmentMemory(), region_(region), ptr_(nullptr), host_ptr_(nullptr), size_(0) {} - ~RegionMemory() {} + RegionMemory(const core::MemoryRegion* region, bool is_code) + : SegmentMemory(), + region_(region), + ptr_(nullptr), + host_ptr_(nullptr), + size_(0), + is_code_(is_code) {} + ~RegionMemory() {} - void* Address(size_t offset = 0) const override - { assert(this->Allocated()); return (char*)ptr_ + offset; } + void* Address(size_t offset = 0) const override { + assert(this->Allocated()); + return (char*)ptr_ + offset; } void* HostAddress(size_t offset = 0) const override { assert(this->Allocated()); return (char*)host_ptr_ + offset; } bool Allocated() const override @@ -284,44 +273,47 @@ private: RegionMemory(const RegionMemory&); RegionMemory& operator=(const RegionMemory&); - hsa_region_t region_; + const core::MemoryRegion* region_; void *ptr_; void *host_ptr_; size_t size_; + bool is_code_; }; -hsa_region_t RegionMemory::AgentLocal(hsa_agent_t agent) -{ - hsa_region_t invalid_region; invalid_region.handle = 0; +const core::MemoryRegion* RegionMemory::AgentLocal(hsa_agent_t agent, bool is_code) { AMD::GpuAgent *amd_agent = (AMD::GpuAgent*)core::Agent::Convert(agent); - if (nullptr == amd_agent) { - return invalid_region; - } - auto agent_local_region = std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(), IsLocalRegion); - return agent_local_region == amd_agent->regions().end() ? - invalid_region : core::MemoryRegion::Convert(*agent_local_region); + assert(amd_agent->device_type() == core::Agent::kAmdGpuDevice && "Invalid agent type."); + auto agent_local_region = + std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(), + [&](const core::MemoryRegion* region) { + const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region; + return amd_region->IsLocalMemory() & (!amd_region->fine_grain()); + }); + return agent_local_region == amd_agent->regions().end() ? nullptr : *agent_local_region; } -hsa_region_t RegionMemory::System() { - const core::MemoryRegion* default_system_region = - core::Runtime::runtime_singleton_->system_regions_fine()[0]; - - assert(default_system_region != NULL); - - return core::MemoryRegion::Convert(default_system_region); +const core::MemoryRegion* RegionMemory::System(bool is_code) { + if (is_code) + return core::Runtime::runtime_singleton_->system_regions_coarse()[0]; + else + return core::Runtime::runtime_singleton_->system_regions_fine()[0]; } -bool RegionMemory::Allocate(size_t size, size_t align, bool zero) -{ +bool RegionMemory::Allocate(size_t size, size_t align, bool zero) { assert(!this->Allocated()); assert(0 < size); assert(0 < align && 0 == (align & (align - 1))); - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(region_, size, &ptr_)) { + core::MemoryRegion::AllocateFlags flags = core::MemoryRegion::AllocateNoFlags; + if (is_code_) flags = core::MemoryRegion::AllocateExecutable; + if (HSA_STATUS_SUCCESS != + core::Runtime::runtime_singleton_->AllocateMemory(region_, size, flags, &ptr_)) { ptr_ = nullptr; return false; } assert(0 == ((uintptr_t)ptr_) % align); - if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(RegionMemory::System(), size, &host_ptr_)) { + if (HSA_STATUS_SUCCESS != + core::Runtime::runtime_singleton_->AllocateMemory( + RegionMemory::System(false), size, core::MemoryRegion::AllocateNoFlags, &host_ptr_)) { HSA::hsa_memory_free(ptr_); ptr_ = nullptr; host_ptr_ = nullptr; @@ -334,8 +326,7 @@ bool RegionMemory::Allocate(size_t size, size_t align, bool zero) return true; } -bool RegionMemory::Copy(size_t offset, const void *src, size_t size) -{ +bool RegionMemory::Copy(size_t offset, const void* src, size_t size) { assert(this->Allocated() && nullptr != host_ptr_); assert(nullptr != src); assert(0 < size); @@ -358,8 +349,7 @@ void RegionMemory::Free() bool RegionMemory::Freeze() { assert(this->Allocated() && nullptr != host_ptr_); - core::Agent* agent = reinterpret_cast( - core::MemoryRegion::Convert(region_))->owner(); + core::Agent* agent = region_->owner(); if (agent != NULL && agent->device_type() == core::Agent::kAmdGpuDevice) { if (HSA_STATUS_SUCCESS != agent->DmaCopy(ptr_, host_ptr_, size_)) { return false; @@ -368,11 +358,14 @@ bool RegionMemory::Freeze() { memcpy(ptr_, host_ptr_, size_); } + // Invalidate agent caches which may hold lines of the new allocation. + if (is_code_ && (region_->owner()->device_type() == core::Agent::kAmdGpuDevice)) + ((AMD::GpuAgent*)region_->owner())->InvalidateCodeCaches(); + return true; } } // namespace anonymous - namespace amd { hsa_isa_t LoaderContext::IsaFromName(const char *name) { @@ -427,21 +420,22 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, assert(0 < size); assert(0 < align && 0 == (align & (align - 1))); + hsa_profile_t agent_profile; + if (HSA_STATUS_SUCCESS != + HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) { + return nullptr; + } + SegmentMemory *mem = nullptr; switch (segment) { case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT: case AMDGPU_HSA_SEGMENT_READONLY_AGENT: { - hsa_profile_t agent_profile; - if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) { - return nullptr; - } - switch (agent_profile) { case HSA_PROFILE_BASE: - mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent)); + mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, false), false); break; case HSA_PROFILE_FULL: - mem = new (std::nothrow) RegionMemory(RegionMemory::System()); + mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false); break; default: assert(false); @@ -449,20 +443,13 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, break; } case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: { - mem = new (std::nothrow) RegionMemory(RegionMemory::System()); + mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false); break; } case AMDGPU_HSA_SEGMENT_CODE_AGENT: { - hsa_profile_t agent_profile; - if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) { - return nullptr; - } - switch (agent_profile) { case HSA_PROFILE_BASE: - mem = new (std::nothrow) RegionMemory(IsDebuggerRegistered() ? - RegionMemory::System() : - RegionMemory::AgentLocal(agent)); + mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, true), true); break; case HSA_PROFILE_FULL: mem = new (std::nothrow) MappedMemory(((AMD::GpuAgentInt*)core::Agent::Convert(agent))->is_kv_device()); @@ -470,10 +457,6 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment, default: assert(false); } - - // Invalidate agent caches which may hold lines of the new allocation. - ((AMD::GpuAgentInt*)core::Agent::Convert(agent))->InvalidateCodeCaches(); - break; } default: