SWDEV-285333 - Introduce Address sanitizer hostcall service
Change-Id: Id29aacd09d0a9934a027446c57c7095804e1a454
[ROCm/clr commit: c11c02f2c7]
Bu işleme şunda yer alıyor:
işlemeyi yapan:
Maneesh Gupta
ebeveyn
c073813460
işleme
e23597d4a4
@@ -46,6 +46,7 @@ target_sources(rocclr PRIVATE
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp)
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp
|
||||
${ROCCLR_SRC_DIR}/device/rocm/rocurilocator.cpp)
|
||||
|
||||
target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE)
|
||||
|
||||
@@ -70,7 +70,12 @@ enum ServiceID {
|
||||
SERVICE_RESERVED = 0,
|
||||
SERVICE_FUNCTION_CALL = 1,
|
||||
SERVICE_PRINTF = 2,
|
||||
SERVICE_DEVMEM = 3,
|
||||
SERVICE_DEVMEM = 3
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
, SERVICE_SANITIZER = 4
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
struct Message;
|
||||
|
||||
@@ -36,91 +36,11 @@
|
||||
#include <string.h>
|
||||
#include <set>
|
||||
|
||||
namespace { // anonymous
|
||||
|
||||
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
|
||||
|
||||
/** \brief Packet payload
|
||||
*
|
||||
* Contains 64 slots of 8 ulongs each, one for each workitem in the
|
||||
* wave. A slot with index \c i contains valid data if the
|
||||
* corresponding bit in PacketHeader::activemask is set.
|
||||
*/
|
||||
struct Payload {
|
||||
uint64_t slots[64][8];
|
||||
};
|
||||
|
||||
/** Packet header */
|
||||
struct PacketHeader {
|
||||
/** Tagged pointer to the next packet in an intrusive stack */
|
||||
uint64_t next_;
|
||||
/** Bitmask that represents payload slots with valid data */
|
||||
uint64_t activemask_;
|
||||
/** Service ID requested by the wave */
|
||||
uint32_t service_;
|
||||
/** Control bits.
|
||||
* \li 0: \c READY flag. Indicates packet awaiting a host response.
|
||||
*/
|
||||
std::atomic<uint32_t> control_;
|
||||
};
|
||||
|
||||
static_assert(std::is_standard_layout<PacketHeader>::value,
|
||||
"the hostcall packet must be useable from other languages");
|
||||
|
||||
/** Field offsets in the packet control field */
|
||||
enum ControlOffset {
|
||||
CONTROL_OFFSET_READY_FLAG = 0,
|
||||
CONTROL_OFFSET_RESERVED0 = 1,
|
||||
};
|
||||
|
||||
/** Field widths in the packet control field */
|
||||
enum ControlWidth {
|
||||
CONTROL_WIDTH_READY_FLAG = 1,
|
||||
CONTROL_WIDTH_RESERVED0 = 31,
|
||||
};
|
||||
|
||||
/** \brief Shared buffer submitting hostcall requests.
|
||||
*
|
||||
* Holds hostcall packets requested by all kernels executing on the
|
||||
* same device queue. Each hostcall buffer is associated with at most
|
||||
* one device queue.
|
||||
*
|
||||
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
|
||||
* the ABA problem in lock-free stacks. The index_mask is used to extract the
|
||||
* lower bits of the pointer, which form the index into the packet array. The
|
||||
* remaining higher bits define a tag that is incremented on every pop from a
|
||||
* stack.
|
||||
*/
|
||||
class HostcallBuffer {
|
||||
/** Array of packet headers */
|
||||
PacketHeader* headers_;
|
||||
/** Array of packet payloads */
|
||||
Payload* payloads_;
|
||||
/** Signal used by kernels to indicate new work */
|
||||
void* doorbell_;
|
||||
/** Stack of free packets. Uses tagged pointers. */
|
||||
uint64_t free_stack_;
|
||||
/** Stack of ready packets. Uses tagged pointers */
|
||||
std::atomic<uint64_t> ready_stack_;
|
||||
/** Mask for accessing the packet index in the tagged pointer. */
|
||||
uint64_t index_mask_;
|
||||
/** Some services need a device */
|
||||
const amd::Device* device_;
|
||||
|
||||
PacketHeader* getHeader(uint64_t ptr) const;
|
||||
Payload* getPayload(uint64_t ptr) const;
|
||||
|
||||
public:
|
||||
void processPackets(MessageHandler& messages);
|
||||
void initialize(uint32_t num_packets);
|
||||
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
|
||||
void setDevice(const amd::Device* dptr) { device_ = dptr; }
|
||||
};
|
||||
|
||||
static_assert(std::is_standard_layout<HostcallBuffer>::value,
|
||||
"the hostcall buffer must be useable from other languages");
|
||||
|
||||
}; // namespace
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/devsanitizer.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
PacketHeader* HostcallBuffer::getHeader(uint64_t ptr) const {
|
||||
return headers_ + (ptr & index_mask_);
|
||||
@@ -205,6 +125,7 @@ void HostcallBuffer::processPackets(MessageHandler& messages) {
|
||||
// Grab the entire ready stack and set the top to 0. New requests from the
|
||||
// device will continue pushing on the stack while we process the packets that
|
||||
// we have grabbed.
|
||||
|
||||
uint64_t ready_stack = std::atomic_exchange_explicit(&ready_stack_, static_cast<uint64_t>(0), std::memory_order_acquire);
|
||||
if (!ready_stack) {
|
||||
return;
|
||||
@@ -222,6 +143,16 @@ void HostcallBuffer::processPackets(MessageHandler& messages) {
|
||||
auto service = header->service_;
|
||||
auto payload = getPayload(iter);
|
||||
auto activemask = header->activemask_;
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
if (service == SERVICE_SANITIZER) {
|
||||
handleSanitizerService(payload, activemask, device_, uri_locator);
|
||||
//activemask zeroed to avoid subsequent handling for each work-item.
|
||||
activemask = 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
while (activemask) {
|
||||
auto wi = amd::leastBitSet(activemask);
|
||||
activemask ^= static_cast<decltype(activemask)>(1) << wi;
|
||||
@@ -290,7 +221,11 @@ class HostcallListener {
|
||||
std::set<HostcallBuffer*> buffers_;
|
||||
device::Signal* doorbell_;
|
||||
MessageHandler messages_;
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
device::UriLocator* urilocator = nullptr;
|
||||
#endif
|
||||
#endif
|
||||
class Thread : public amd::Thread {
|
||||
public:
|
||||
Thread() : amd::Thread("Hostcall Listener Thread", CQ_THREAD_STACK_SIZE) {}
|
||||
@@ -338,7 +273,6 @@ amd::Monitor listenerLock("Hostcall listener lock");
|
||||
void HostcallListener::consumePackets() {
|
||||
uint64_t timeout = 1024 * 1024;
|
||||
uint64_t signal_value = SIGNAL_INIT;
|
||||
|
||||
while (true) {
|
||||
while (true) {
|
||||
uint64_t new_value = doorbell_->Wait(signal_value, device::Signal::Condition::Ne, timeout);
|
||||
@@ -349,7 +283,6 @@ void HostcallListener::consumePackets() {
|
||||
}
|
||||
|
||||
if (signal_value == SIGNAL_DONE) {
|
||||
ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Hostcall listener received SIGNAL_DONE");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -375,12 +308,23 @@ void HostcallListener::terminate() {
|
||||
amd::Os::yield();
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
if (urilocator)
|
||||
delete urilocator;
|
||||
#endif
|
||||
#endif
|
||||
delete doorbell_;
|
||||
}
|
||||
|
||||
void HostcallListener::addBuffer(HostcallBuffer* buffer) {
|
||||
assert(buffers_.count(buffer) == 0 && "buffer already present");
|
||||
buffer->setDoorbell(doorbell_->getHandle());
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
buffer->setUriLocator(urilocator);
|
||||
#endif
|
||||
#endif
|
||||
buffers_.insert(buffer);
|
||||
}
|
||||
|
||||
@@ -400,10 +344,21 @@ bool HostcallListener::initialize(const amd::Device &dev) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
urilocator = dev.createUriLocator();
|
||||
#endif
|
||||
#endif
|
||||
// If the listener thread was not successfully initialized, clean
|
||||
// everything up and bail out.
|
||||
if (thread_.state() < Thread::INITIALIZED) {
|
||||
delete doorbell_;
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
if (urilocator)
|
||||
delete urilocator;
|
||||
#endif
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -22,9 +22,14 @@
|
||||
|
||||
#include "top.hpp"
|
||||
#include "device/device.hpp"
|
||||
|
||||
#include "device/devhcmessages.hpp"
|
||||
#include <cstddef>
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/devurilocator.hpp"
|
||||
#endif
|
||||
#endif
|
||||
/** \file Support for invoking host services from the device.
|
||||
*
|
||||
* A hostcall is a fixed-size request generated by a kernel running
|
||||
@@ -79,3 +84,94 @@ uint32_t getHostcallBufferAlignment(void);
|
||||
|
||||
bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets);
|
||||
void disableHostcalls(void* buffer);
|
||||
|
||||
enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 };
|
||||
|
||||
/** \brief Packet payload
|
||||
*
|
||||
* Contains 64 slots of 8 ulongs each, one for each workitem in the
|
||||
* wave. A slot with index \c i contains valid data if the
|
||||
* corresponding bit in PacketHeader::activemask is set.
|
||||
*/
|
||||
struct Payload {
|
||||
uint64_t slots[64][8];
|
||||
};
|
||||
|
||||
/** Packet header */
|
||||
struct PacketHeader {
|
||||
/** Tagged pointer to the next packet in an intrusive stack */
|
||||
uint64_t next_;
|
||||
/** Bitmask that represents payload slots with valid data */
|
||||
uint64_t activemask_;
|
||||
/** Service ID requested by the wave */
|
||||
uint32_t service_;
|
||||
/** Control bits.
|
||||
* \li 0: \c READY flag. Indicates packet awaiting a host response.
|
||||
*/
|
||||
std::atomic<uint32_t> control_;
|
||||
};
|
||||
|
||||
static_assert(std::is_standard_layout<PacketHeader>::value,
|
||||
"the hostcall packet must be useable from other languages");
|
||||
|
||||
/** Field offsets in the packet control field */
|
||||
enum ControlOffset {
|
||||
CONTROL_OFFSET_READY_FLAG = 0,
|
||||
CONTROL_OFFSET_RESERVED0 = 1,
|
||||
};
|
||||
|
||||
/** Field widths in the packet control field */
|
||||
enum ControlWidth {
|
||||
CONTROL_WIDTH_READY_FLAG = 1,
|
||||
CONTROL_WIDTH_RESERVED0 = 31,
|
||||
};
|
||||
|
||||
/** \brief Shared buffer submitting hostcall requests.
|
||||
*
|
||||
* Holds hostcall packets requested by all kernels executing on the
|
||||
* same device queue. Each hostcall buffer is associated with at most
|
||||
* one device queue.
|
||||
*
|
||||
* Packets in the buffer are accessed using 64-bit tagged pointers to mitigate
|
||||
* the ABA problem in lock-free stacks. The index_mask is used to extract the
|
||||
* lower bits of the pointer, which form the index into the packet array. The
|
||||
* remaining higher bits define a tag that is incremented on every pop from a
|
||||
* stack.
|
||||
*/
|
||||
class HostcallBuffer {
|
||||
/** Array of packet headers */
|
||||
PacketHeader* headers_;
|
||||
/** Array of packet payloads */
|
||||
Payload* payloads_;
|
||||
/** Signal used by kernels to indicate new work */
|
||||
void* doorbell_;
|
||||
/** Stack of free packets. Uses tagged pointers. */
|
||||
uint64_t free_stack_;
|
||||
/** Stack of ready packets. Uses tagged pointers */
|
||||
std::atomic<uint64_t> ready_stack_;
|
||||
/** Mask for accessing the packet index in the tagged pointer. */
|
||||
uint64_t index_mask_;
|
||||
/** Some services need a device**/
|
||||
const amd::Device* device_;
|
||||
|
||||
PacketHeader* getHeader(uint64_t ptr) const;
|
||||
Payload* getPayload(uint64_t ptr) const;
|
||||
|
||||
public:
|
||||
void processPackets(MessageHandler& messages);
|
||||
void initialize(uint32_t num_packets);
|
||||
void setDoorbell(void* doorbell) { doorbell_ = doorbell; };
|
||||
void setDevice(const amd::Device* dptr) { device_ = dptr; };
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
private:
|
||||
device::UriLocator* uri_locator;
|
||||
public:
|
||||
void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; };
|
||||
#endif
|
||||
#endif
|
||||
};
|
||||
|
||||
static_assert(std::is_standard_layout<HostcallBuffer>::value,
|
||||
"the hostcall buffer must be useable from other languages");
|
||||
|
||||
@@ -40,6 +40,12 @@
|
||||
#include "hwdebug.hpp"
|
||||
#include "devsignal.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "devurilocator.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
@@ -1838,6 +1844,11 @@ class Device : public RuntimeObject {
|
||||
virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) {
|
||||
return nullptr;
|
||||
}
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
protected:
|
||||
//! Enable the specified extension
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
/* Copyright (c) 2021-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
#include "device/devhostcall.hpp"
|
||||
#include "device/device.hpp"
|
||||
#include "device/devurilocator.hpp"
|
||||
#include "utils/debug.hpp"
|
||||
#include "platform/memory.hpp"
|
||||
|
||||
#include <inttypes.h> //to exp
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <algorithm>
|
||||
|
||||
//Address sanitizer runtime entry-function to report the invalid device memory access
|
||||
//this will be defined in llvm-project/compiler-rt/lib/asan, and will have effect only
|
||||
//when compiler-rt is build for AMDGPU.
|
||||
//Note: This API is runtime interface of asan library and only defined for linux os.
|
||||
extern "C"
|
||||
void __asan_report_nonself_error(uint64_t *callstack, uint32_t n_callstack, uint64_t* addr,
|
||||
uint32_t naddr, uint64_t* entity_ids, uint32_t n_entities, bool is_write, uint32_t access_size,
|
||||
bool is_abort, const char* name, int64_t vma_adjust, int fd,
|
||||
uint64_t file_extent_size, uint64_t file_extent_start = 0);
|
||||
|
||||
namespace {
|
||||
void handleSanitizerService(Payload* packt_payload, uint64_t activemask,
|
||||
const amd::Device* gpu_device, device::UriLocator* uri_locator) {
|
||||
// An address results in invalid access in each active lane
|
||||
uint64_t device_failing_addresses[64];
|
||||
// An array of identifications of entities requesting a report.
|
||||
// index 0 - contains device id
|
||||
// index 1,2,3 - contains wg_idx, wg_idy, wg_idz respectively.
|
||||
// index 4 to 67 - contains reporting wave ids in a wave-front.
|
||||
uint64_t entity_id[68], callstack[1];
|
||||
uint32_t n_activelanes = __builtin_popcountl(activemask);
|
||||
uint64_t access_info = 0, access_size = 0;
|
||||
bool is_abort = true;
|
||||
entity_id[0] = gpu_device->index();
|
||||
|
||||
assert(packt_payload != nullptr && "packet payload is null?");
|
||||
|
||||
int indx = 0, en_idx = 1;
|
||||
bool first_workitem = false;
|
||||
while (activemask) {
|
||||
auto wi = amd::leastBitSet(activemask);
|
||||
activemask ^= static_cast<decltype(activemask)>(1) << wi;
|
||||
auto data_slot = packt_payload->slots[wi];
|
||||
//encoding of packet payload arguments is
|
||||
//defined in device-libs/asanrtl/src/report.cl
|
||||
if (!first_workitem) {
|
||||
device_failing_addresses[indx] = data_slot[0];
|
||||
callstack[0] = data_slot[1];
|
||||
entity_id[en_idx] = data_slot[2];
|
||||
entity_id[++en_idx] = data_slot[3];
|
||||
entity_id[++en_idx] = data_slot[4];
|
||||
entity_id[++en_idx] = data_slot[5];
|
||||
access_info = data_slot[6];
|
||||
access_size = data_slot[7];
|
||||
first_workitem = true;
|
||||
}
|
||||
else {
|
||||
device_failing_addresses[indx] = data_slot[0];
|
||||
entity_id[en_idx] = data_slot[5];
|
||||
}
|
||||
indx++;
|
||||
en_idx++;
|
||||
}
|
||||
|
||||
bool is_write = false;
|
||||
if (access_info & 0xFFFFFFFF00000000)
|
||||
is_abort = false;
|
||||
if (access_info & 1)
|
||||
is_write = true;
|
||||
|
||||
std::string fileuri;
|
||||
uint64_t size = 0, offset = 0;
|
||||
int64_t loadAddrAdjust = 0;
|
||||
auto uri_fd = amd::Os::FDescInit();
|
||||
if (uri_locator) {
|
||||
device::UriLocator::UriInfo fileuri_info = uri_locator->lookUpUri(callstack[0]);
|
||||
std::tie(offset, size) = uri_locator->decodeUriAndGetFd(fileuri_info, &uri_fd);
|
||||
loadAddrAdjust = fileuri_info.loadAddressDiff;
|
||||
}
|
||||
|
||||
#if defined(__linux__)
|
||||
__asan_report_nonself_error(callstack, 1, device_failing_addresses, n_activelanes,
|
||||
entity_id, n_activelanes+4, is_write, access_size, is_abort,
|
||||
/*thread key*/"amdgpu", loadAddrAdjust, uri_fd, size, offset);
|
||||
#endif
|
||||
}
|
||||
} //end anonymous namespace
|
||||
@@ -0,0 +1,43 @@
|
||||
/* Copyright (c) 2019-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "os/os.hpp"
|
||||
#include <string>
|
||||
#include <utility>
|
||||
namespace device{
|
||||
// Interface for HSA/PAL Uri Locators
|
||||
class UriLocator {
|
||||
public:
|
||||
struct UriInfo {
|
||||
std::string uriPath;
|
||||
int64_t loadAddressDiff;
|
||||
};
|
||||
|
||||
virtual ~UriLocator() {}
|
||||
virtual UriInfo lookUpUri(uint64_t device_pc) = 0;
|
||||
virtual std::pair<uint64_t, uint64_t> decodeUriAndGetFd(UriInfo& uri,
|
||||
amd::Os::FileDesc* uri_fd) = 0;
|
||||
};
|
||||
} //namespace device
|
||||
#endif
|
||||
#endif
|
||||
@@ -42,6 +42,12 @@
|
||||
#include "hsailctx.hpp"
|
||||
#include "vaminterface.h"
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/devurilocator.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*! \addtogroup GPU
|
||||
* @{
|
||||
*/
|
||||
@@ -140,6 +146,13 @@ class NullDevice : public amd::Device {
|
||||
|
||||
virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; }
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
protected:
|
||||
//! Answer the question: "Should HSAIL Program be created?",
|
||||
//! based on the given options.
|
||||
@@ -548,6 +561,13 @@ class Device : public NullDevice, public CALGSLDevice {
|
||||
//! Initial the Hardware Debug Manager
|
||||
int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage);
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
private:
|
||||
//! Disable copy constructor
|
||||
Device(const Device&);
|
||||
|
||||
@@ -43,6 +43,11 @@
|
||||
#include <atomic>
|
||||
#include <unordered_set>
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/devurilocator.hpp"
|
||||
#endif
|
||||
#endif
|
||||
/*! \addtogroup PAL
|
||||
* @{
|
||||
*/
|
||||
@@ -145,7 +150,13 @@ class NullDevice : public amd::Device {
|
||||
cl_set_device_clock_mode_output_amd* pSetClockModeOutput) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
protected:
|
||||
static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL
|
||||
|
||||
@@ -593,7 +604,13 @@ class Device : public NullDevice {
|
||||
|
||||
virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle);
|
||||
virtual void DestroyExtSemaphore(void* extSemaphore);
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUrilocator() const {
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
private:
|
||||
static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex,
|
||||
Pal::Developer::CallbackType type, void* pCbData);
|
||||
|
||||
@@ -45,6 +45,12 @@
|
||||
#endif
|
||||
#include "platform/sampler.hpp"
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/rocm/rocurilocator.hpp"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
@@ -3026,5 +3032,12 @@ void Device::ReleaseGlobalSignal(void* signal) const {
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
device::UriLocator* Device::createUriLocator() const {
|
||||
return new roc::UriLocator();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
} // namespace roc
|
||||
#endif // WITHOUT_HSA_BACKEND
|
||||
|
||||
@@ -254,6 +254,15 @@ class NullDevice : public amd::Device {
|
||||
virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; }
|
||||
virtual void ReleaseGlobalSignal(void* signal) const {}
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const {
|
||||
ShouldNotReachHere();
|
||||
return nullptr;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
protected:
|
||||
//! Initialize compiler instance and handle
|
||||
static bool initCompiler(bool isOffline);
|
||||
@@ -599,6 +608,11 @@ class Device : public NullDevice {
|
||||
//! enum for keeping the total and available queue priorities
|
||||
enum QueuePriority : uint { Low = 0, Normal = 1, High = 2, Total = 3};
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
virtual device::UriLocator* createUriLocator() const;
|
||||
#endif
|
||||
#endif
|
||||
}; // class roc::Device
|
||||
} // namespace roc
|
||||
|
||||
|
||||
@@ -0,0 +1,183 @@
|
||||
/* Copyright (c) 2021-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "rocurilocator.hpp"
|
||||
#include <sstream>
|
||||
|
||||
namespace roc {
|
||||
hsa_status_t UriLocator::createUriRangeTable() {
|
||||
auto execCb = [] (hsa_executable_t exec,
|
||||
void *data) -> hsa_status_t {
|
||||
int execState = 0;
|
||||
hsa_status_t status;
|
||||
status = hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState);
|
||||
if (status != HSA_STATUS_SUCCESS)
|
||||
return status;
|
||||
if (execState != HSA_EXECUTABLE_STATE_FROZEN)
|
||||
return status;
|
||||
|
||||
auto loadedCodeObjectCb = [] (hsa_executable_t exec,
|
||||
hsa_loaded_code_object_t lcobj, void *data) -> hsa_status_t {
|
||||
hsa_status_t result;
|
||||
uint64_t loadBAddr = 0, loadSize = 0;
|
||||
uint32_t uriLen = 0;
|
||||
int64_t delta = 0;
|
||||
uint64_t *argsCb = static_cast<uint64_t *>(data);
|
||||
hsa_ven_amd_loader_1_03_pfn_t *fnTab =
|
||||
reinterpret_cast<hsa_ven_amd_loader_1_03_pfn_t*> (argsCb[0]);
|
||||
std::vector<UriRange> *rangeTab =
|
||||
reinterpret_cast<std::vector<UriRange>*> (argsCb[1]);
|
||||
|
||||
if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, (void*) &loadBAddr);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, (void*) &loadSize);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, (void*) &uriLen);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
result = fnTab-> hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, (void*) &delta);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return result;
|
||||
|
||||
char *uri = new char[uriLen+1];
|
||||
uri[uriLen] = '\0';
|
||||
result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj,
|
||||
HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void*) uri);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return result;
|
||||
rangeTab->push_back(UriRange{loadBAddr, loadBAddr+loadSize-1,
|
||||
delta, std::string{uri,uriLen+1}});
|
||||
delete[] uri;
|
||||
return HSA_STATUS_SUCCESS;
|
||||
};
|
||||
|
||||
uint64_t *args = static_cast<uint64_t *>(data);
|
||||
hsa_ven_amd_loader_1_03_pfn_t *fnExtTab =
|
||||
reinterpret_cast<hsa_ven_amd_loader_1_03_pfn_t*> (args[0]);
|
||||
return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects(exec,
|
||||
loadedCodeObjectCb, data);
|
||||
};
|
||||
|
||||
if (!fn_table_.hsa_ven_amd_loader_iterate_executables)
|
||||
return HSA_STATUS_ERROR;
|
||||
|
||||
uint64_t callbackArgs[2] = {(uint64_t)& fn_table_, (uint64_t) &rangeTab_};
|
||||
return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, (void*) callbackArgs);
|
||||
}
|
||||
|
||||
// Encoding of uniform-resource-identifier(URI) is detailed in
|
||||
// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri
|
||||
std::pair<uint64_t, uint64_t> UriLocator::decodeUriAndGetFd(UriInfo& uri,
|
||||
amd::Os::FileDesc* uri_fd) {
|
||||
std::ostringstream ss;
|
||||
char cur;
|
||||
uint64_t offset = 0, size = 0;
|
||||
if (uri.uriPath.size() == 0)
|
||||
return {0,0};
|
||||
auto pos = uri.uriPath.find("//");
|
||||
if (pos == std::string::npos || uri.uriPath.substr(0, pos) != "file:") {
|
||||
uri.uriPath="";
|
||||
return {0,0};
|
||||
}
|
||||
auto rspos = uri.uriPath.find('#');
|
||||
if (rspos != std::string::npos) {
|
||||
//parse range specifier
|
||||
std::string offprefix = "offset=", sizeprefix = "size=";
|
||||
auto sbeg = uri.uriPath.find('&',rspos);
|
||||
auto offbeg = rspos + offprefix.size()+1;
|
||||
std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg);
|
||||
auto sizebeg = sbeg + sizeprefix.size()+1;
|
||||
std::string sizestr = uri.uriPath.substr(sizebeg, uri.uriPath.size()-sizebeg);
|
||||
offset = std::stoull(offstr, nullptr, 0);
|
||||
size = std::stoull(sizestr, nullptr, 0);
|
||||
rspos -= 1;
|
||||
}
|
||||
else {
|
||||
rspos = uri.uriPath.size()-1;
|
||||
}
|
||||
pos += 2;
|
||||
//decode filepath
|
||||
for (auto i=pos; i<= rspos;) {
|
||||
cur = uri.uriPath[i];
|
||||
if (isalnum(cur) || cur == '/' || cur == '-' ||
|
||||
cur == '_' || cur == '.' || cur == '~') {
|
||||
ss << cur;
|
||||
i++;
|
||||
}
|
||||
else {
|
||||
//characters prefix with '%' char
|
||||
char tbits = uri.uriPath[i+1], lbits = uri.uriPath[i+2];
|
||||
uint8_t t = (tbits < 58) ? ( tbits - 48) : ((tbits - 65) + 10);
|
||||
uint8_t l = (lbits < 58) ? ( lbits - 48) : ((lbits - 65) + 10);
|
||||
ss << (char)(((0b00000000 | t)<<4) | l);
|
||||
i += 3;
|
||||
}
|
||||
}
|
||||
uri.uriPath = ss.str();
|
||||
size_t fd_size;
|
||||
(void) amd::Os::GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size);
|
||||
// As per URI locator syntax, range_specifier is optional
|
||||
// if range_specifier is absent return total size of the file
|
||||
// and set offset to begin at 0.
|
||||
if (size == 0)
|
||||
size = fd_size;
|
||||
return {offset, size};
|
||||
}
|
||||
|
||||
UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) {
|
||||
UriInfo errorstate{"", 0};
|
||||
|
||||
if (!init_) {
|
||||
hsa_status_t result;
|
||||
result = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1,
|
||||
sizeof(fn_table_), &fn_table_);
|
||||
if (result != HSA_STATUS_SUCCESS)
|
||||
return errorstate;
|
||||
result = createUriRangeTable();
|
||||
if (result != HSA_STATUS_SUCCESS) {
|
||||
rangeTab_.clear();
|
||||
return errorstate;
|
||||
}
|
||||
init_ = true;
|
||||
}
|
||||
|
||||
for(auto& seg : rangeTab_)
|
||||
if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_)
|
||||
return UriInfo{seg.Uri_.c_str(), seg.elfDelta_};
|
||||
|
||||
return errorstate;
|
||||
}
|
||||
} //namespace roc
|
||||
#endif
|
||||
#endif
|
||||
@@ -0,0 +1,48 @@
|
||||
/* Copyright (c) 2019-present Advanced Micro Devices, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE. */
|
||||
|
||||
#pragma once
|
||||
#if defined(__clang__)
|
||||
#if __has_feature(address_sanitizer)
|
||||
#include "device/devurilocator.hpp"
|
||||
#include "hsa_ven_amd_loader.h"
|
||||
|
||||
#include <vector>
|
||||
namespace roc {
|
||||
class UriLocator : public device::UriLocator {
|
||||
bool init_ = false;
|
||||
struct UriRange {
|
||||
uint64_t startAddr_, endAddr_;
|
||||
int64_t elfDelta_;
|
||||
std::string Uri_;
|
||||
};
|
||||
std::vector<UriRange> rangeTab_;
|
||||
hsa_ven_amd_loader_1_03_pfn_t fn_table_;
|
||||
|
||||
hsa_status_t createUriRangeTable();
|
||||
public:
|
||||
virtual ~UriLocator() {}
|
||||
virtual UriInfo lookUpUri(uint64_t device_pc) override;
|
||||
virtual std::pair<uint64_t, uint64_t> decodeUriAndGetFd(UriInfo& uri_path,
|
||||
amd::Os::FileDesc* uri_fd) override;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
Yeni konuda referans
Bir kullanıcı engelle