SWDEV-285333 - Introduce Address sanitizer hostcall service

Change-Id: Id29aacd09d0a9934a027446c57c7095804e1a454


[ROCm/clr commit: c11c02f2c7]
Bu işleme şunda yer alıyor:
pvellien
2021-05-07 13:46:52 +00:00
işlemeyi yapan: Maneesh Gupta
ebeveyn c073813460
işleme e23597d4a4
13 değiştirilmiş dosya ile 609 ekleme ve 93 silme
+2 -1
Dosyayı Görüntüle
@@ -46,6 +46,7 @@ target_sources(rocclr PRIVATE
${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp
${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp
${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp
${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp)
${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp
${ROCCLR_SRC_DIR}/device/rocm/rocurilocator.cpp)
target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE)
+6 -1
Dosyayı Görüntüle
@@ -70,7 +70,12 @@ enum ServiceID {
SERVICE_RESERVED = 0,
SERVICE_FUNCTION_CALL = 1,
SERVICE_PRINTF = 2,
SERVICE_DEVMEM = 3,
SERVICE_DEVMEM = 3
#if defined(__clang__)
#if __has_feature(address_sanitizer)
, SERVICE_SANITIZER = 4
#endif
#endif
};
struct Message;
+43 -88
Dosyayı Görüntüle
@@ -36,91 +36,11 @@
#include <string.h>
#include <set>
namespace { // anonymous
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
/** \brief Packet payload
*
* Contains 64 slots of 8 ulongs each, one for each workitem in the
* wave. A slot with index \c i contains valid data if the
* corresponding bit in PacketHeader::activemask is set.
*/
struct Payload {
uint64_t slots[64][8];
};
/** Packet header */
struct PacketHeader {
/** Tagged pointer to the next packet in an intrusive stack */
uint64_t next_;
/** Bitmask that represents payload slots with valid data */
uint64_t activemask_;
/** Service ID requested by the wave */
uint32_t service_;
/** Control bits.
* \li 0: \c READY flag. Indicates packet awaiting a host response.
*/
std::atomic<uint32_t> control_;
};
static_assert(std::is_standard_layout<PacketHeader>::value,
"the hostcall packet must be useable from other languages");
/** Field offsets in the packet control field */
enum ControlOffset {
CONTROL_OFFSET_READY_FLAG = 0,
CONTROL_OFFSET_RESERVED0 = 1,
};
/** Field widths in the packet control field */
enum ControlWidth {
CONTROL_WIDTH_READY_FLAG = 1,
CONTROL_WIDTH_RESERVED0 = 31,
};
/** \brief Shared buffer submitting hostcall requests.
*
* Holds hostcall packets requested by all kernels executing on the
* same device queue. Each hostcall buffer is associated with at most
* one device queue.
*
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
* the ABA problem in lock-free stacks. The index_mask is used to extract the
* lower bits of the pointer, which form the index into the packet array. The
* remaining higher bits define a tag that is incremented on every pop from a
* stack.
*/
class HostcallBuffer {
/** Array of packet headers */
PacketHeader* headers_;
/** Array of packet payloads */
Payload* payloads_;
/** Signal used by kernels to indicate new work */
void* doorbell_;
/** Stack of free packets. Uses tagged pointers. */
uint64_t free_stack_;
/** Stack of ready packets. Uses tagged pointers */
std::atomic<uint64_t> ready_stack_;
/** Mask for accessing the packet index in the tagged pointer. */
uint64_t index_mask_;
/** Some services need a device */
const amd::Device* device_;
PacketHeader* getHeader(uint64_t ptr) const;
Payload* getPayload(uint64_t ptr) const;
public:
void processPackets(MessageHandler& messages);
void initialize(uint32_t num_packets);
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
void setDevice(const amd::Device* dptr) { device_ = dptr; }
};
static_assert(std::is_standard_layout<HostcallBuffer>::value,
"the hostcall buffer must be useable from other languages");
}; // namespace
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devsanitizer.hpp"
#endif
#endif
PacketHeader* HostcallBuffer::getHeader(uint64_t ptr) const {
return headers_ + (ptr & index_mask_);
@@ -205,6 +125,7 @@ void HostcallBuffer::processPackets(MessageHandler& messages) {
// Grab the entire ready stack and set the top to 0. New requests from the
// device will continue pushing on the stack while we process the packets that
// we have grabbed.
uint64_t ready_stack = std::atomic_exchange_explicit(&ready_stack_, static_cast<uint64_t>(0), std::memory_order_acquire);
if (!ready_stack) {
return;
@@ -222,6 +143,16 @@ void HostcallBuffer::processPackets(MessageHandler& messages) {
auto service = header->service_;
auto payload = getPayload(iter);
auto activemask = header->activemask_;
#if defined(__clang__)
#if __has_feature(address_sanitizer)
if (service == SERVICE_SANITIZER) {
handleSanitizerService(payload, activemask, device_, uri_locator);
//activemask zeroed to avoid subsequent handling for each work-item.
activemask = 0;
}
#endif
#endif
while (activemask) {
auto wi = amd::leastBitSet(activemask);
activemask ^= static_cast<decltype(activemask)>(1) << wi;
@@ -290,7 +221,11 @@ class HostcallListener {
std::set<HostcallBuffer*> buffers_;
device::Signal* doorbell_;
MessageHandler messages_;
#if defined(__clang__)
#if __has_feature(address_sanitizer)
device::UriLocator* urilocator = nullptr;
#endif
#endif
class Thread : public amd::Thread {
public:
Thread() : amd::Thread("Hostcall Listener Thread", CQ_THREAD_STACK_SIZE) {}
@@ -338,7 +273,6 @@ amd::Monitor listenerLock("Hostcall listener lock");
void HostcallListener::consumePackets() {
uint64_t timeout = 1024 * 1024;
uint64_t signal_value = SIGNAL_INIT;
while (true) {
while (true) {
uint64_t new_value = doorbell_->Wait(signal_value, device::Signal::Condition::Ne, timeout);
@@ -349,7 +283,6 @@ void HostcallListener::consumePackets() {
}
if (signal_value == SIGNAL_DONE) {
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Hostcall listener received SIGNAL_DONE");
return;
}
@@ -375,12 +308,23 @@ void HostcallListener::terminate() {
amd::Os::yield();
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
if (urilocator)
delete urilocator;
#endif
#endif
delete doorbell_;
}
void HostcallListener::addBuffer(HostcallBuffer* buffer) {
assert(buffers_.count(buffer) == 0 && "buffer already present");
buffer->setDoorbell(doorbell_->getHandle());
#if defined(__clang__)
#if __has_feature(address_sanitizer)
buffer->setUriLocator(urilocator);
#endif
#endif
buffers_.insert(buffer);
}
@@ -400,10 +344,21 @@ bool HostcallListener::initialize(const amd::Device &dev) {
return false;
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
urilocator = dev.createUriLocator();
#endif
#endif
// If the listener thread was not successfully initialized, clean
// everything up and bail out.
if (thread_.state() < Thread::INITIALIZED) {
delete doorbell_;
#if defined(__clang__)
#if __has_feature(address_sanitizer)
if (urilocator)
delete urilocator;
#endif
#endif
return false;
}
+97 -1
Dosyayı Görüntüle
@@ -22,9 +22,14 @@
#include "top.hpp"
#include "device/device.hpp"
#include "device/devhcmessages.hpp"
#include <cstddef>
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#endif
#endif
/** \file Support for invoking host services from the device.
*
* A hostcall is a fixed-size request generated by a kernel running
@@ -79,3 +84,94 @@ uint32_t getHostcallBufferAlignment(void);
bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets);
void disableHostcalls(void* buffer);
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
/** \brief Packet payload
*
* Contains 64 slots of 8 ulongs each, one for each workitem in the
* wave. A slot with index \c i contains valid data if the
* corresponding bit in PacketHeader::activemask is set.
*/
struct Payload {
uint64_t slots[64][8];
};
/** Packet header */
struct PacketHeader {
/** Tagged pointer to the next packet in an intrusive stack */
uint64_t next_;
/** Bitmask that represents payload slots with valid data */
uint64_t activemask_;
/** Service ID requested by the wave */
uint32_t service_;
/** Control bits.
* \li 0: \c READY flag. Indicates packet awaiting a host response.
*/
std::atomic<uint32_t> control_;
};
static_assert(std::is_standard_layout<PacketHeader>::value,
"the hostcall packet must be useable from other languages");
/** Field offsets in the packet control field */
enum ControlOffset {
CONTROL_OFFSET_READY_FLAG = 0,
CONTROL_OFFSET_RESERVED0 = 1,
};
/** Field widths in the packet control field */
enum ControlWidth {
CONTROL_WIDTH_READY_FLAG = 1,
CONTROL_WIDTH_RESERVED0 = 31,
};
/** \brief Shared buffer submitting hostcall requests.
*
* Holds hostcall packets requested by all kernels executing on the
* same device queue. Each hostcall buffer is associated with at most
* one device queue.
*
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
* the ABA problem in lock-free stacks. The index_mask is used to extract the
* lower bits of the pointer, which form the index into the packet array. The
* remaining higher bits define a tag that is incremented on every pop from a
* stack.
*/
class HostcallBuffer {
/** Array of packet headers */
PacketHeader* headers_;
/** Array of packet payloads */
Payload* payloads_;
/** Signal used by kernels to indicate new work */
void* doorbell_;
/** Stack of free packets. Uses tagged pointers. */
uint64_t free_stack_;
/** Stack of ready packets. Uses tagged pointers */
std::atomic<uint64_t> ready_stack_;
/** Mask for accessing the packet index in the tagged pointer. */
uint64_t index_mask_;
/** Some services need a device**/
const amd::Device* device_;
PacketHeader* getHeader(uint64_t ptr) const;
Payload* getPayload(uint64_t ptr) const;
public:
void processPackets(MessageHandler& messages);
void initialize(uint32_t num_packets);
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
void setDevice(const amd::Device* dptr) { device_ = dptr; };
#if defined(__clang__)
#if __has_feature(address_sanitizer)
private:
device::UriLocator* uri_locator;
public:
void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; };
#endif
#endif
};
static_assert(std::is_standard_layout<HostcallBuffer>::value,
"the hostcall buffer must be useable from other languages");
+11
Dosyayı Görüntüle
@@ -40,6 +40,12 @@
#include "hwdebug.hpp"
#include "devsignal.hpp"
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "devurilocator.hpp"
#endif
#endif
#include <cassert>
#include <cstdint>
#include <cstdio>
@@ -1838,6 +1844,11 @@ class Device : public RuntimeObject {
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) {
return nullptr;
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const = 0;
#endif
#endif
protected:
//! Enable the specified extension
+110
Dosyayı Görüntüle
@@ -0,0 +1,110 @@
/* Copyright (c) 2021-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#include "device/devhostcall.hpp"
#include "device/device.hpp"
#include "device/devurilocator.hpp"
#include "utils/debug.hpp"
#include "platform/memory.hpp"
#include <inttypes.h> //to exp
#include <string>
#include <vector>
#include <tuple>
#include <algorithm>
//Address sanitizer runtime entry-function to report the invalid device memory access
//this will be defined in llvm-project/compiler-rt/lib/asan, and will have effect only
//when compiler-rt is build for AMDGPU.
//Note: This API is runtime interface of asan library and only defined for linux os.
extern "C"
void __asan_report_nonself_error(uint64_t *callstack, uint32_t n_callstack, uint64_t* addr,
uint32_t naddr, uint64_t* entity_ids, uint32_t n_entities, bool is_write, uint32_t access_size,
bool is_abort, const char* name, int64_t vma_adjust, int fd,
uint64_t file_extent_size, uint64_t file_extent_start = 0);
namespace {
void handleSanitizerService(Payload* packt_payload, uint64_t activemask,
const amd::Device* gpu_device, device::UriLocator* uri_locator) {
// An address results in invalid access in each active lane
uint64_t device_failing_addresses[64];
// An array of identifications of entities requesting a report.
// index 0 - contains device id
// index 1,2,3 - contains wg_idx, wg_idy, wg_idz respectively.
// index 4 to 67 - contains reporting wave ids in a wave-front.
uint64_t entity_id[68], callstack[1];
uint32_t n_activelanes = __builtin_popcountl(activemask);
uint64_t access_info = 0, access_size = 0;
bool is_abort = true;
entity_id[0] = gpu_device->index();
assert(packt_payload != nullptr && "packet payload is null?");
int indx = 0, en_idx = 1;
bool first_workitem = false;
while (activemask) {
auto wi = amd::leastBitSet(activemask);
activemask ^= static_cast<decltype(activemask)>(1) << wi;
auto data_slot = packt_payload->slots[wi];
//encoding of packet payload arguments is
//defined in device-libs/asanrtl/src/report.cl
if (!first_workitem) {
device_failing_addresses[indx] = data_slot[0];
callstack[0] = data_slot[1];
entity_id[en_idx] = data_slot[2];
entity_id[++en_idx] = data_slot[3];
entity_id[++en_idx] = data_slot[4];
entity_id[++en_idx] = data_slot[5];
access_info = data_slot[6];
access_size = data_slot[7];
first_workitem = true;
}
else {
device_failing_addresses[indx] = data_slot[0];
entity_id[en_idx] = data_slot[5];
}
indx++;
en_idx++;
}
bool is_write = false;
if (access_info & 0xFFFFFFFF00000000)
is_abort = false;
if (access_info & 1)
is_write = true;
std::string fileuri;
uint64_t size = 0, offset = 0;
int64_t loadAddrAdjust = 0;
auto uri_fd = amd::Os::FDescInit();
if (uri_locator) {
device::UriLocator::UriInfo fileuri_info = uri_locator->lookUpUri(callstack[0]);
std::tie(offset, size) = uri_locator->decodeUriAndGetFd(fileuri_info, &uri_fd);
loadAddrAdjust = fileuri_info.loadAddressDiff;
}
#if defined(__linux__)
__asan_report_nonself_error(callstack, 1, device_failing_addresses, n_activelanes,
entity_id, n_activelanes+4, is_write, access_size, is_abort,
/*thread key*/"amdgpu", loadAddrAdjust, uri_fd, size, offset);
#endif
}
} //end anonymous namespace
+43
Dosyayı Görüntüle
@@ -0,0 +1,43 @@
/* Copyright (c) 2019-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "os/os.hpp"
#include <string>
#include <utility>
namespace device{
// Interface for HSA/PAL Uri Locators
class UriLocator {
public:
struct UriInfo {
std::string uriPath;
int64_t loadAddressDiff;
};
virtual ~UriLocator() {}
virtual UriInfo lookUpUri(uint64_t device_pc) = 0;
virtual std::pair<uint64_t, uint64_t> decodeUriAndGetFd(UriInfo& uri,
amd::Os::FileDesc* uri_fd) = 0;
};
} //namespace device
#endif
#endif
+20
Dosyayı Görüntüle
@@ -42,6 +42,12 @@
#include "hsailctx.hpp"
#include "vaminterface.h"
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#endif
#endif
/*! \addtogroup GPU
* @{
*/
@@ -140,6 +146,13 @@ class NullDevice : public amd::Device {
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const {
return nullptr;
}
#endif
#endif
protected:
//! Answer the question: "Should HSAIL Program be created?",
//! based on the given options.
@@ -548,6 +561,13 @@ class Device : public NullDevice, public CALGSLDevice {
//! Initial the Hardware Debug Manager
int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage);
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const {
return nullptr;
}
#endif
#endif
private:
//! Disable copy constructor
Device(const Device&);
+19 -2
Dosyayı Görüntüle
@@ -43,6 +43,11 @@
#include <atomic>
#include <unordered_set>
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#endif
#endif
/*! \addtogroup PAL
* @{
*/
@@ -145,7 +150,13 @@ class NullDevice : public amd::Device {
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
return true;
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const {
return nullptr;
}
#endif
#endif
protected:
static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
@@ -593,7 +604,13 @@ class Device : public NullDevice {
virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle);
virtual void DestroyExtSemaphore(void* extSemaphore);
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUrilocator() const {
return nullptr;
}
#endif
#endif
private:
static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
Pal::Developer::CallbackType type, void* pCbData);
+13
Dosyayı Görüntüle
@@ -45,6 +45,12 @@
#endif
#include "platform/sampler.hpp"
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/rocm/rocurilocator.hpp"
#endif
#endif
#include <algorithm>
#include <cstring>
#include <fstream>
@@ -3026,5 +3032,12 @@ void Device::ReleaseGlobalSignal(void* signal) const {
}
}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
device::UriLocator* Device::createUriLocator() const {
return new roc::UriLocator();
}
#endif
#endif
} // namespace roc
#endif // WITHOUT_HSA_BACKEND
+14
Dosyayı Görüntüle
@@ -254,6 +254,15 @@ class NullDevice : public amd::Device {
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; }
virtual void ReleaseGlobalSignal(void* signal) const {}
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const {
ShouldNotReachHere();
return nullptr;
}
#endif
#endif
protected:
//! Initialize compiler instance and handle
static bool initCompiler(bool isOffline);
@@ -599,6 +608,11 @@ class Device : public NullDevice {
//! enum for keeping the total and available queue priorities
enum QueuePriority : uint { Low = 0, Normal = 1, High = 2, Total = 3};
#if defined(__clang__)
#if __has_feature(address_sanitizer)
virtual device::UriLocator* createUriLocator() const;
#endif
#endif
}; // class roc::Device
} // namespace roc
+183
Dosyayı Görüntüle
@@ -0,0 +1,183 @@
/* Copyright (c) 2021-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "rocurilocator.hpp"
#include <sstream>
namespace roc {
hsa_status_t UriLocator::createUriRangeTable() {
auto execCb = [] (hsa_executable_t exec,
void *data) -> hsa_status_t {
int execState = 0;
hsa_status_t status;
status = hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState);
if (status != HSA_STATUS_SUCCESS)
return status;
if (execState != HSA_EXECUTABLE_STATE_FROZEN)
return status;
auto loadedCodeObjectCb = [] (hsa_executable_t exec,
hsa_loaded_code_object_t lcobj, void *data) -> hsa_status_t {
hsa_status_t result;
uint64_t loadBAddr = 0, loadSize = 0;
uint32_t uriLen = 0;
int64_t delta = 0;
uint64_t *argsCb = static_cast<uint64_t *>(data);
hsa_ven_amd_loader_1_03_pfn_t *fnTab =
reinterpret_cast<hsa_ven_amd_loader_1_03_pfn_t*> (argsCb[0]);
std::vector<UriRange> *rangeTab =
reinterpret_cast<std::vector<UriRange>*> (argsCb[1]);
if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info)
return HSA_STATUS_ERROR;
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, (void*) &loadBAddr);
if (result != HSA_STATUS_SUCCESS)
return result;
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, (void*) &loadSize);
if (result != HSA_STATUS_SUCCESS)
return result;
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, (void*) &uriLen);
if (result != HSA_STATUS_SUCCESS)
return result;
result = fnTab-> hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, (void*) &delta);
if (result != HSA_STATUS_SUCCESS)
return result;
char *uri = new char[uriLen+1];
uri[uriLen] = '\0';
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void*) uri);
if (result != HSA_STATUS_SUCCESS)
return result;
rangeTab->push_back(UriRange{loadBAddr, loadBAddr+loadSize-1,
delta, std::string{uri,uriLen+1}});
delete[] uri;
return HSA_STATUS_SUCCESS;
};
uint64_t *args = static_cast<uint64_t *>(data);
hsa_ven_amd_loader_1_03_pfn_t *fnExtTab =
reinterpret_cast<hsa_ven_amd_loader_1_03_pfn_t*> (args[0]);
return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects(exec,
loadedCodeObjectCb, data);
};
if (!fn_table_.hsa_ven_amd_loader_iterate_executables)
return HSA_STATUS_ERROR;
uint64_t callbackArgs[2] = {(uint64_t)& fn_table_, (uint64_t) &rangeTab_};
return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, (void*) callbackArgs);
}
// Encoding of uniform-resource-identifier(URI) is detailed in
// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri
std::pair<uint64_t, uint64_t> UriLocator::decodeUriAndGetFd(UriInfo& uri,
amd::Os::FileDesc* uri_fd) {
std::ostringstream ss;
char cur;
uint64_t offset = 0, size = 0;
if (uri.uriPath.size() == 0)
return {0,0};
auto pos = uri.uriPath.find("//");
if (pos == std::string::npos || uri.uriPath.substr(0, pos) != "file:") {
uri.uriPath="";
return {0,0};
}
auto rspos = uri.uriPath.find('#');
if (rspos != std::string::npos) {
//parse range specifier
std::string offprefix = "offset=", sizeprefix = "size=";
auto sbeg = uri.uriPath.find('&',rspos);
auto offbeg = rspos + offprefix.size()+1;
std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg);
auto sizebeg = sbeg + sizeprefix.size()+1;
std::string sizestr = uri.uriPath.substr(sizebeg, uri.uriPath.size()-sizebeg);
offset = std::stoull(offstr, nullptr, 0);
size = std::stoull(sizestr, nullptr, 0);
rspos -= 1;
}
else {
rspos = uri.uriPath.size()-1;
}
pos += 2;
//decode filepath
for (auto i=pos; i<= rspos;) {
cur = uri.uriPath[i];
if (isalnum(cur) || cur == '/' || cur == '-' ||
cur == '_' || cur == '.' || cur == '~') {
ss << cur;
i++;
}
else {
//characters prefix with '%' char
char tbits = uri.uriPath[i+1], lbits = uri.uriPath[i+2];
uint8_t t = (tbits < 58) ? ( tbits - 48) : ((tbits - 65) + 10);
uint8_t l = (lbits < 58) ? ( lbits - 48) : ((lbits - 65) + 10);
ss << (char)(((0b00000000 | t)<<4) | l);
i += 3;
}
}
uri.uriPath = ss.str();
size_t fd_size;
(void) amd::Os::GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size);
// As per URI locator syntax, range_specifier is optional
// if range_specifier is absent return total size of the file
// and set offset to begin at 0.
if (size == 0)
size = fd_size;
return {offset, size};
}
UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) {
UriInfo errorstate{"", 0};
if (!init_) {
hsa_status_t result;
result = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
sizeof(fn_table_), &fn_table_);
if (result != HSA_STATUS_SUCCESS)
return errorstate;
result = createUriRangeTable();
if (result != HSA_STATUS_SUCCESS) {
rangeTab_.clear();
return errorstate;
}
init_ = true;
}
for(auto& seg : rangeTab_)
if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_)
return UriInfo{seg.Uri_.c_str(), seg.elfDelta_};
return errorstate;
}
} //namespace roc
#endif
#endif
+48
Dosyayı Görüntüle
@@ -0,0 +1,48 @@
/* Copyright (c) 2019-present Advanced Micro Devices, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. */
#pragma once
#if defined(__clang__)
#if __has_feature(address_sanitizer)
#include "device/devurilocator.hpp"
#include "hsa_ven_amd_loader.h"
#include <vector>
namespace roc {
class UriLocator : public device::UriLocator {
bool init_ = false;
struct UriRange {
uint64_t startAddr_, endAddr_;
int64_t elfDelta_;
std::string Uri_;
};
std::vector<UriRange> rangeTab_;
hsa_ven_amd_loader_1_03_pfn_t fn_table_;
hsa_status_t createUriRangeTable();
public:
virtual ~UriLocator() {}
virtual UriInfo lookUpUri(uint64_t device_pc) override;
virtual std::pair<uint64_t, uint64_t> decodeUriAndGetFd(UriInfo& uri_path,
amd::Os::FileDesc* uri_fd) override;
};
}
#endif
#endif