Adding code object list in loader.
Change-Id: Iab3541287bd56276fd32615ee59fcd590de84ca0
[ROCm/ROCR-Runtime commit: 16a20cfb8c]
Tento commit je obsažen v:
@@ -2,24 +2,24 @@
|
||||
//
|
||||
// The University of Illinois/NCSA
|
||||
// Open Source License (NCSA)
|
||||
//
|
||||
//
|
||||
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
//
|
||||
// Developed by:
|
||||
//
|
||||
//
|
||||
// AMD Research and AMD HSA Software Development
|
||||
//
|
||||
//
|
||||
// Advanced Micro Devices, Inc.
|
||||
//
|
||||
//
|
||||
// www.amd.com
|
||||
//
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to
|
||||
// deal with the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
//
|
||||
// - Redistributions of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimers.
|
||||
// - Redistributions in binary form must reproduce the above copyright
|
||||
@@ -29,7 +29,7 @@
|
||||
// nor the names of its contributors may be used to endorse or promote
|
||||
// products derived from this Software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
@@ -394,6 +394,10 @@ public:
|
||||
const char *options,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) = 0;
|
||||
|
||||
|
||||
/// @brief Freezes @p executable
|
||||
virtual hsa_status_t FreezeExecutable(Executable *executable, const char *options) = 0;
|
||||
|
||||
/// @brief Destroys @p executable
|
||||
virtual void DestroyExecutable(Executable *executable) = 0;
|
||||
|
||||
|
||||
@@ -2329,7 +2329,7 @@ hsa_status_t hsa_executable_freeze(
|
||||
return HSA_STATUS_ERROR_INVALID_EXECUTABLE;
|
||||
}
|
||||
|
||||
return exec->Freeze(options);
|
||||
return GetLoader()->FreezeExecutable(exec, options);
|
||||
CATCH;
|
||||
}
|
||||
|
||||
|
||||
@@ -49,17 +49,27 @@
|
||||
#include <atomic>
|
||||
#include <fstream>
|
||||
#include <libelf.h>
|
||||
#include <unistd.h>
|
||||
#include "amd_hsa_elf.h"
|
||||
#include "amd_hsa_kernel_code.h"
|
||||
#include "amd_hsa_code.hpp"
|
||||
#include "amd_hsa_code_util.hpp"
|
||||
#include "amd_options.hpp"
|
||||
#include "core/util/utils.h"
|
||||
|
||||
#include "AMDHSAKernelDescriptor.h"
|
||||
|
||||
using namespace amd::hsa;
|
||||
using namespace amd::hsa::common;
|
||||
|
||||
static void __attribute__((noinline, optimize(0))) _loader_debug_state() {};
|
||||
r_debug _amdgpu_r_debug __attribute__((visibility("default"))) = {1,
|
||||
nullptr,
|
||||
reinterpret_cast<uintptr_t>(&_loader_debug_state),
|
||||
r_debug::RT_CONSISTENT,
|
||||
0};
|
||||
static link_map* r_debug_tail = nullptr;
|
||||
|
||||
namespace amd {
|
||||
namespace hsa {
|
||||
namespace loader {
|
||||
@@ -140,6 +150,10 @@ Loader* Loader::Create(Context* context)
|
||||
|
||||
void Loader::Destroy(Loader *loader)
|
||||
{
|
||||
// Loader resets the link_map, but the executables and loaded code objects are not deleted.
|
||||
_amdgpu_r_debug.r_map = nullptr;
|
||||
_amdgpu_r_debug.r_state = r_debug::RT_CONSISTENT;
|
||||
r_debug_tail = nullptr;
|
||||
delete loader;
|
||||
}
|
||||
|
||||
@@ -152,9 +166,66 @@ Executable* AmdHsaCodeLoader::CreateExecutable(
|
||||
return executables.back();
|
||||
}
|
||||
|
||||
void AmdHsaCodeLoader::DestroyExecutable(Executable *executable)
|
||||
{
|
||||
static void AddCodeObjectInfoIntoDebugMap(link_map* map) {
|
||||
if (r_debug_tail) {
|
||||
r_debug_tail->l_next = map;
|
||||
map->l_prev = r_debug_tail;
|
||||
map->l_next = nullptr;
|
||||
} else {
|
||||
_amdgpu_r_debug.r_map = map;
|
||||
map->l_prev = nullptr;
|
||||
map->l_next = nullptr;
|
||||
}
|
||||
r_debug_tail = map;
|
||||
}
|
||||
|
||||
static void RemoveCodeObjectInfoFromDebugMap(link_map* map) {
|
||||
if (r_debug_tail == map) {
|
||||
r_debug_tail = map->l_prev;
|
||||
}
|
||||
if (map->l_prev) {
|
||||
map->l_prev->l_next = map->l_next;
|
||||
}
|
||||
if (map->l_next) {
|
||||
map->l_next->l_prev = map->l_prev;
|
||||
}
|
||||
|
||||
delete map->l_name;
|
||||
}
|
||||
|
||||
hsa_status_t AmdHsaCodeLoader::FreezeExecutable(Executable *executable, const char *options) {
|
||||
hsa_status_t status = executable->Freeze(options);
|
||||
if (status != HSA_STATUS_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
// Assumeing runtime atomic implements C++ std::memory_order
|
||||
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
|
||||
atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_ADD, std::memory_order_relaxed);
|
||||
atomic::Fence(std::memory_order_acq_rel);
|
||||
_loader_debug_state();
|
||||
atomic::Fence(std::memory_order_acq_rel);
|
||||
for (auto &lco : reinterpret_cast<ExecutableImpl*>(executable)->loaded_code_objects) {
|
||||
AddCodeObjectInfoIntoDebugMap(&(lco->r_debug_info));
|
||||
}
|
||||
atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release);
|
||||
_loader_debug_state();
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void AmdHsaCodeLoader::DestroyExecutable(Executable *executable) {
|
||||
// Assumeing runtime atomic implements C++ std::memory_order
|
||||
WriterLockGuard<ReaderWriterLock> writer_lock(rw_lock_);
|
||||
atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_DELETE, std::memory_order_relaxed);
|
||||
atomic::Fence(std::memory_order_acq_rel);
|
||||
_loader_debug_state();
|
||||
atomic::Fence(std::memory_order_acq_rel);
|
||||
for (auto &lco : reinterpret_cast<ExecutableImpl*>(executable)->loaded_code_objects) {
|
||||
RemoveCodeObjectInfoFromDebugMap(&(lco->r_debug_info));
|
||||
}
|
||||
atomic::Store(&_amdgpu_r_debug.r_state, r_debug::RT_CONSISTENT, std::memory_order_release);
|
||||
_loader_debug_state();
|
||||
|
||||
executables[((ExecutableImpl*)executable)->id()] = nullptr;
|
||||
delete executable;
|
||||
@@ -1066,8 +1137,7 @@ hsa_status_t ExecutableImpl::LoadCodeObject(
|
||||
s2 = range.substr(mi + 1);
|
||||
std::istringstream is1(s1); is1 >> n1;
|
||||
std::istringstream is2(s2); is2 >> n2;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
std::istringstream is(range); is >> n1;
|
||||
n2 = n1;
|
||||
}
|
||||
@@ -1775,6 +1845,17 @@ hsa_status_t ExecutableImpl::Freeze(const char *options) {
|
||||
for (auto &ls : lco->LoadedSegments()) {
|
||||
ls->Freeze();
|
||||
}
|
||||
// Update code object debug info after it is frozen.
|
||||
std::stringstream ss;
|
||||
uint64_t elf_begin = lco->getElfData();
|
||||
uint64_t elf_size = lco->getElfSize();
|
||||
ss << "file:///proc/" << getpid() << "/mem#"
|
||||
<< "offset=" << std::hex << std::showbase << elf_begin << "&"
|
||||
<< "size=" << elf_size;
|
||||
lco->r_debug_info.l_addr = lco->getDelta();
|
||||
lco->r_debug_info.l_name = strdup(ss.str().c_str());
|
||||
lco->r_debug_info.l_prev = nullptr;
|
||||
lco->r_debug_info.l_next = nullptr;
|
||||
}
|
||||
|
||||
state_ = HSA_EXECUTABLE_STATE_FROZEN;
|
||||
|
||||
@@ -48,6 +48,7 @@
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <libelf.h>
|
||||
#include <link.h>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
@@ -283,6 +284,7 @@ public:
|
||||
};
|
||||
|
||||
class LoadedCodeObjectImpl : public LoadedCodeObject, public ExecutableObject {
|
||||
friend class AmdHsaCodeLoader;
|
||||
private:
|
||||
LoadedCodeObjectImpl(const LoadedCodeObjectImpl&);
|
||||
LoadedCodeObjectImpl& operator=(const LoadedCodeObjectImpl&);
|
||||
@@ -319,6 +321,8 @@ public:
|
||||
uint64_t getLoadBase() const override;
|
||||
uint64_t getLoadSize() const override;
|
||||
int64_t getDelta() const override;
|
||||
|
||||
link_map r_debug_info;
|
||||
};
|
||||
|
||||
class Segment : public LoadedSegment, public ExecutableObject {
|
||||
@@ -396,6 +400,7 @@ struct ASH {
|
||||
typedef std::unordered_map<AgentSymbol, SymbolImpl*, ASH, ASC> AgentSymbolMap;
|
||||
|
||||
class ExecutableImpl final: public Executable {
|
||||
friend class AmdHsaCodeLoader;
|
||||
public:
|
||||
const hsa_profile_t& profile() const {
|
||||
return profile_;
|
||||
@@ -563,6 +568,7 @@ public:
|
||||
const char *options,
|
||||
hsa_default_float_rounding_mode_t default_float_rounding_mode = HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT) override;
|
||||
|
||||
hsa_status_t FreezeExecutable(Executable *executable, const char *options) override;
|
||||
void DestroyExecutable(Executable *executable) override;
|
||||
|
||||
hsa_status_t IterateExecutables(
|
||||
|
||||
Odkázat v novém úkolu
Zablokovat Uživatele