Correct loader memory interfaces.
The loader must use internal interfaces to access page allocation flags. Code pages should also ensure use of cached memory. Also relocate i-cache flush after code page copy. Change-Id: I86d36243b6eebb1d46b991b372a5236baaf941ab
Этот коммит содержится в:
@@ -48,9 +48,9 @@
|
||||
namespace rocr {
|
||||
namespace amd {
|
||||
|
||||
class LoaderContext final: public amd::hsa::loader::Context {
|
||||
public:
|
||||
LoaderContext(): amd::hsa::loader::Context() {}
|
||||
class LoaderContext final : public rocr::amd::hsa::loader::Context {
|
||||
public:
|
||||
LoaderContext() : rocr::amd::hsa::loader::Context() {}
|
||||
|
||||
~LoaderContext() {}
|
||||
|
||||
|
||||
@@ -65,24 +65,6 @@
|
||||
namespace rocr {
|
||||
namespace {
|
||||
|
||||
bool IsLocalRegion(const core::MemoryRegion *region)
|
||||
{
|
||||
const AMD::MemoryRegion *amd_region = (AMD::MemoryRegion*)region;
|
||||
if (nullptr == amd_region || !amd_region->IsLocalMemory()) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool IsDebuggerRegistered()
|
||||
{
|
||||
return false;
|
||||
// Leaving code commented as it will be used later on
|
||||
//return ((core::Runtime::runtime_singleton_->flag().emulate_aql()) &&
|
||||
// (0 !=
|
||||
// core::Runtime::runtime_singleton_->flag().tools_lib_names().size()));
|
||||
}
|
||||
|
||||
class SegmentMemory {
|
||||
public:
|
||||
virtual ~SegmentMemory() {}
|
||||
@@ -262,14 +244,21 @@ bool MappedMemory::Freeze()
|
||||
|
||||
class RegionMemory final: public SegmentMemory {
|
||||
public:
|
||||
static hsa_region_t AgentLocal(hsa_agent_t agent);
|
||||
static hsa_region_t System();
|
||||
static const core::MemoryRegion* AgentLocal(hsa_agent_t agent, bool is_code);
|
||||
static const core::MemoryRegion* System(bool is_code);
|
||||
|
||||
RegionMemory(hsa_region_t region): SegmentMemory(), region_(region), ptr_(nullptr), host_ptr_(nullptr), size_(0) {}
|
||||
~RegionMemory() {}
|
||||
RegionMemory(const core::MemoryRegion* region, bool is_code)
|
||||
: SegmentMemory(),
|
||||
region_(region),
|
||||
ptr_(nullptr),
|
||||
host_ptr_(nullptr),
|
||||
size_(0),
|
||||
is_code_(is_code) {}
|
||||
~RegionMemory() {}
|
||||
|
||||
void* Address(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)ptr_ + offset; }
|
||||
void* Address(size_t offset = 0) const override {
|
||||
assert(this->Allocated());
|
||||
return (char*)ptr_ + offset; }
|
||||
void* HostAddress(size_t offset = 0) const override
|
||||
{ assert(this->Allocated()); return (char*)host_ptr_ + offset; }
|
||||
bool Allocated() const override
|
||||
@@ -284,44 +273,47 @@ private:
|
||||
RegionMemory(const RegionMemory&);
|
||||
RegionMemory& operator=(const RegionMemory&);
|
||||
|
||||
hsa_region_t region_;
|
||||
const core::MemoryRegion* region_;
|
||||
void *ptr_;
|
||||
void *host_ptr_;
|
||||
size_t size_;
|
||||
bool is_code_;
|
||||
};
|
||||
|
||||
hsa_region_t RegionMemory::AgentLocal(hsa_agent_t agent)
|
||||
{
|
||||
hsa_region_t invalid_region; invalid_region.handle = 0;
|
||||
const core::MemoryRegion* RegionMemory::AgentLocal(hsa_agent_t agent, bool is_code) {
|
||||
AMD::GpuAgent *amd_agent = (AMD::GpuAgent*)core::Agent::Convert(agent);
|
||||
if (nullptr == amd_agent) {
|
||||
return invalid_region;
|
||||
}
|
||||
auto agent_local_region = std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(), IsLocalRegion);
|
||||
return agent_local_region == amd_agent->regions().end() ?
|
||||
invalid_region : core::MemoryRegion::Convert(*agent_local_region);
|
||||
assert(amd_agent->device_type() == core::Agent::kAmdGpuDevice && "Invalid agent type.");
|
||||
auto agent_local_region =
|
||||
std::find_if(amd_agent->regions().begin(), amd_agent->regions().end(),
|
||||
[&](const core::MemoryRegion* region) {
|
||||
const AMD::MemoryRegion* amd_region = (const AMD::MemoryRegion*)region;
|
||||
return amd_region->IsLocalMemory() & (!amd_region->fine_grain());
|
||||
});
|
||||
return agent_local_region == amd_agent->regions().end() ? nullptr : *agent_local_region;
|
||||
}
|
||||
|
||||
hsa_region_t RegionMemory::System() {
|
||||
const core::MemoryRegion* default_system_region =
|
||||
core::Runtime::runtime_singleton_->system_regions_fine()[0];
|
||||
|
||||
assert(default_system_region != NULL);
|
||||
|
||||
return core::MemoryRegion::Convert(default_system_region);
|
||||
const core::MemoryRegion* RegionMemory::System(bool is_code) {
|
||||
if (is_code)
|
||||
return core::Runtime::runtime_singleton_->system_regions_coarse()[0];
|
||||
else
|
||||
return core::Runtime::runtime_singleton_->system_regions_fine()[0];
|
||||
}
|
||||
|
||||
bool RegionMemory::Allocate(size_t size, size_t align, bool zero)
|
||||
{
|
||||
bool RegionMemory::Allocate(size_t size, size_t align, bool zero) {
|
||||
assert(!this->Allocated());
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(region_, size, &ptr_)) {
|
||||
core::MemoryRegion::AllocateFlags flags = core::MemoryRegion::AllocateNoFlags;
|
||||
if (is_code_) flags = core::MemoryRegion::AllocateExecutable;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(region_, size, flags, &ptr_)) {
|
||||
ptr_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
assert(0 == ((uintptr_t)ptr_) % align);
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_memory_allocate(RegionMemory::System(), size, &host_ptr_)) {
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
core::Runtime::runtime_singleton_->AllocateMemory(
|
||||
RegionMemory::System(false), size, core::MemoryRegion::AllocateNoFlags, &host_ptr_)) {
|
||||
HSA::hsa_memory_free(ptr_);
|
||||
ptr_ = nullptr;
|
||||
host_ptr_ = nullptr;
|
||||
@@ -334,8 +326,7 @@ bool RegionMemory::Allocate(size_t size, size_t align, bool zero)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RegionMemory::Copy(size_t offset, const void *src, size_t size)
|
||||
{
|
||||
bool RegionMemory::Copy(size_t offset, const void* src, size_t size) {
|
||||
assert(this->Allocated() && nullptr != host_ptr_);
|
||||
assert(nullptr != src);
|
||||
assert(0 < size);
|
||||
@@ -358,8 +349,7 @@ void RegionMemory::Free()
|
||||
bool RegionMemory::Freeze() {
|
||||
assert(this->Allocated() && nullptr != host_ptr_);
|
||||
|
||||
core::Agent* agent = reinterpret_cast<AMD::MemoryRegion*>(
|
||||
core::MemoryRegion::Convert(region_))->owner();
|
||||
core::Agent* agent = region_->owner();
|
||||
if (agent != NULL && agent->device_type() == core::Agent::kAmdGpuDevice) {
|
||||
if (HSA_STATUS_SUCCESS != agent->DmaCopy(ptr_, host_ptr_, size_)) {
|
||||
return false;
|
||||
@@ -368,11 +358,14 @@ bool RegionMemory::Freeze() {
|
||||
memcpy(ptr_, host_ptr_, size_);
|
||||
}
|
||||
|
||||
// Invalidate agent caches which may hold lines of the new allocation.
|
||||
if (is_code_ && (region_->owner()->device_type() == core::Agent::kAmdGpuDevice))
|
||||
((AMD::GpuAgent*)region_->owner())->InvalidateCodeCaches();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace anonymous
|
||||
|
||||
namespace amd {
|
||||
|
||||
hsa_isa_t LoaderContext::IsaFromName(const char *name) {
|
||||
@@ -427,21 +420,22 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
|
||||
assert(0 < size);
|
||||
assert(0 < align && 0 == (align & (align - 1)));
|
||||
|
||||
hsa_profile_t agent_profile;
|
||||
if (HSA_STATUS_SUCCESS !=
|
||||
HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SegmentMemory *mem = nullptr;
|
||||
switch (segment) {
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_AGENT:
|
||||
case AMDGPU_HSA_SEGMENT_READONLY_AGENT: {
|
||||
hsa_profile_t agent_profile;
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (agent_profile) {
|
||||
case HSA_PROFILE_BASE:
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent));
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, false), false);
|
||||
break;
|
||||
case HSA_PROFILE_FULL:
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System());
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false);
|
||||
break;
|
||||
default:
|
||||
assert(false);
|
||||
@@ -449,20 +443,13 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
|
||||
break;
|
||||
}
|
||||
case AMDGPU_HSA_SEGMENT_GLOBAL_PROGRAM: {
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System());
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::System(false), false);
|
||||
break;
|
||||
}
|
||||
case AMDGPU_HSA_SEGMENT_CODE_AGENT: {
|
||||
hsa_profile_t agent_profile;
|
||||
if (HSA_STATUS_SUCCESS != HSA::hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &agent_profile)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (agent_profile) {
|
||||
case HSA_PROFILE_BASE:
|
||||
mem = new (std::nothrow) RegionMemory(IsDebuggerRegistered() ?
|
||||
RegionMemory::System() :
|
||||
RegionMemory::AgentLocal(agent));
|
||||
mem = new (std::nothrow) RegionMemory(RegionMemory::AgentLocal(agent, true), true);
|
||||
break;
|
||||
case HSA_PROFILE_FULL:
|
||||
mem = new (std::nothrow) MappedMemory(((AMD::GpuAgentInt*)core::Agent::Convert(agent))->is_kv_device());
|
||||
@@ -470,10 +457,6 @@ void* LoaderContext::SegmentAlloc(amdgpu_hsa_elf_segment_t segment,
|
||||
default:
|
||||
assert(false);
|
||||
}
|
||||
|
||||
// Invalidate agent caches which may hold lines of the new allocation.
|
||||
((AMD::GpuAgentInt*)core::Agent::Convert(agent))->InvalidateCodeCaches();
|
||||
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
||||
Ссылка в новой задаче
Block a user