diff --git a/projects/clr/rocclr/cmake/ROCclrHSA.cmake b/projects/clr/rocclr/cmake/ROCclrHSA.cmake index 87cb55e4ce..bb0ddc0c12 100644 --- a/projects/clr/rocclr/cmake/ROCclrHSA.cmake +++ b/projects/clr/rocclr/cmake/ROCclrHSA.cmake @@ -46,6 +46,7 @@ target_sources(rocclr PRIVATE ${ROCCLR_SRC_DIR}/device/rocm/rocschedcl.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsettings.cpp ${ROCCLR_SRC_DIR}/device/rocm/rocsignal.cpp - ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp) + ${ROCCLR_SRC_DIR}/device/rocm/rocvirtual.cpp + ${ROCCLR_SRC_DIR}/device/rocm/rocurilocator.cpp) target_compile_definitions(rocclr PUBLIC WITH_HSA_DEVICE) diff --git a/projects/clr/rocclr/device/devhcmessages.hpp b/projects/clr/rocclr/device/devhcmessages.hpp index 7f156decd9..4d0f877a9f 100644 --- a/projects/clr/rocclr/device/devhcmessages.hpp +++ b/projects/clr/rocclr/device/devhcmessages.hpp @@ -70,7 +70,12 @@ enum ServiceID { SERVICE_RESERVED = 0, SERVICE_FUNCTION_CALL = 1, SERVICE_PRINTF = 2, - SERVICE_DEVMEM = 3, + SERVICE_DEVMEM = 3 + #if defined(__clang__) + #if __has_feature(address_sanitizer) + , SERVICE_SANITIZER = 4 + #endif + #endif }; struct Message; diff --git a/projects/clr/rocclr/device/devhostcall.cpp b/projects/clr/rocclr/device/devhostcall.cpp index a1bb1b6f74..5b7517b22d 100644 --- a/projects/clr/rocclr/device/devhostcall.cpp +++ b/projects/clr/rocclr/device/devhostcall.cpp @@ -36,91 +36,11 @@ #include #include -namespace { // anonymous - -enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; - -/** \brief Packet payload - * - * Contains 64 slots of 8 ulongs each, one for each workitem in the - * wave. A slot with index \c i contains valid data if the - * corresponding bit in PacketHeader::activemask is set. - */ -struct Payload { - uint64_t slots[64][8]; -}; - -/** Packet header */ -struct PacketHeader { - /** Tagged pointer to the next packet in an intrusive stack */ - uint64_t next_; - /** Bitmask that represents payload slots with valid data */ - uint64_t activemask_; - /** Service ID requested by the wave */ - uint32_t service_; - /** Control bits. - * \li 0: \c READY flag. Indicates packet awaiting a host response. - */ - std::atomic control_; -}; - -static_assert(std::is_standard_layout::value, - "the hostcall packet must be useable from other languages"); - -/** Field offsets in the packet control field */ -enum ControlOffset { - CONTROL_OFFSET_READY_FLAG = 0, - CONTROL_OFFSET_RESERVED0 = 1, -}; - -/** Field widths in the packet control field */ -enum ControlWidth { - CONTROL_WIDTH_READY_FLAG = 1, - CONTROL_WIDTH_RESERVED0 = 31, -}; - -/** \brief Shared buffer submitting hostcall requests. - * - * Holds hostcall packets requested by all kernels executing on the - * same device queue. Each hostcall buffer is associated with at most - * one device queue. - * - * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate - * the ABA problem in lock-free stacks. The index_mask is used to extract the - * lower bits of the pointer, which form the index into the packet array. The - * remaining higher bits define a tag that is incremented on every pop from a - * stack. - */ -class HostcallBuffer { - /** Array of packet headers */ - PacketHeader* headers_; - /** Array of packet payloads */ - Payload* payloads_; - /** Signal used by kernels to indicate new work */ - void* doorbell_; - /** Stack of free packets. Uses tagged pointers. */ - uint64_t free_stack_; - /** Stack of ready packets. Uses tagged pointers */ - std::atomic ready_stack_; - /** Mask for accessing the packet index in the tagged pointer. */ - uint64_t index_mask_; - /** Some services need a device */ - const amd::Device* device_; - - PacketHeader* getHeader(uint64_t ptr) const; - Payload* getPayload(uint64_t ptr) const; - - public: - void processPackets(MessageHandler& messages); - void initialize(uint32_t num_packets); - void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; - void setDevice(const amd::Device* dptr) { device_ = dptr; } -}; - -static_assert(std::is_standard_layout::value, - "the hostcall buffer must be useable from other languages"); - -}; // namespace +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devsanitizer.hpp" +#endif +#endif PacketHeader* HostcallBuffer::getHeader(uint64_t ptr) const { return headers_ + (ptr & index_mask_); @@ -205,6 +125,7 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { // Grab the entire ready stack and set the top to 0. New requests from the // device will continue pushing on the stack while we process the packets that // we have grabbed. + uint64_t ready_stack = std::atomic_exchange_explicit(&ready_stack_, static_cast(0), std::memory_order_acquire); if (!ready_stack) { return; @@ -222,6 +143,16 @@ void HostcallBuffer::processPackets(MessageHandler& messages) { auto service = header->service_; auto payload = getPayload(iter); auto activemask = header->activemask_; + +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (service == SERVICE_SANITIZER) { + handleSanitizerService(payload, activemask, device_, uri_locator); + //activemask zeroed to avoid subsequent handling for each work-item. + activemask = 0; + } +#endif +#endif while (activemask) { auto wi = amd::leastBitSet(activemask); activemask ^= static_cast(1) << wi; @@ -290,7 +221,11 @@ class HostcallListener { std::set buffers_; device::Signal* doorbell_; MessageHandler messages_; - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + device::UriLocator* urilocator = nullptr; +#endif +#endif class Thread : public amd::Thread { public: Thread() : amd::Thread("Hostcall Listener Thread", CQ_THREAD_STACK_SIZE) {} @@ -338,7 +273,6 @@ amd::Monitor listenerLock("Hostcall listener lock"); void HostcallListener::consumePackets() { uint64_t timeout = 1024 * 1024; uint64_t signal_value = SIGNAL_INIT; - while (true) { while (true) { uint64_t new_value = doorbell_->Wait(signal_value, device::Signal::Condition::Ne, timeout); @@ -349,7 +283,6 @@ void HostcallListener::consumePackets() { } if (signal_value == SIGNAL_DONE) { - ClPrint(amd::LOG_INFO, amd::LOG_INIT, "Hostcall listener received SIGNAL_DONE"); return; } @@ -375,12 +308,23 @@ void HostcallListener::terminate() { amd::Os::yield(); } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif delete doorbell_; } void HostcallListener::addBuffer(HostcallBuffer* buffer) { assert(buffers_.count(buffer) == 0 && "buffer already present"); buffer->setDoorbell(doorbell_->getHandle()); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + buffer->setUriLocator(urilocator); +#endif +#endif buffers_.insert(buffer); } @@ -400,10 +344,21 @@ bool HostcallListener::initialize(const amd::Device &dev) { return false; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + urilocator = dev.createUriLocator(); +#endif +#endif // If the listener thread was not successfully initialized, clean // everything up and bail out. if (thread_.state() < Thread::INITIALIZED) { delete doorbell_; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + if (urilocator) + delete urilocator; +#endif +#endif return false; } diff --git a/projects/clr/rocclr/device/devhostcall.hpp b/projects/clr/rocclr/device/devhostcall.hpp index 1a3f275a9d..ae2047c33e 100644 --- a/projects/clr/rocclr/device/devhostcall.hpp +++ b/projects/clr/rocclr/device/devhostcall.hpp @@ -22,9 +22,14 @@ #include "top.hpp" #include "device/device.hpp" - +#include "device/devhcmessages.hpp" #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /** \file Support for invoking host services from the device. * * A hostcall is a fixed-size request generated by a kernel running @@ -79,3 +84,94 @@ uint32_t getHostcallBufferAlignment(void); bool enableHostcalls(const amd::Device& dev, void* buffer, uint32_t numPackets); void disableHostcalls(void* buffer); + +enum SignalValue { SIGNAL_DONE = 0, SIGNAL_INIT = 1 }; + +/** \brief Packet payload + * + * Contains 64 slots of 8 ulongs each, one for each workitem in the + * wave. A slot with index \c i contains valid data if the + * corresponding bit in PacketHeader::activemask is set. + */ +struct Payload { + uint64_t slots[64][8]; +}; + +/** Packet header */ +struct PacketHeader { + /** Tagged pointer to the next packet in an intrusive stack */ + uint64_t next_; + /** Bitmask that represents payload slots with valid data */ + uint64_t activemask_; + /** Service ID requested by the wave */ + uint32_t service_; + /** Control bits. + * \li 0: \c READY flag. Indicates packet awaiting a host response. + */ + std::atomic control_; +}; + +static_assert(std::is_standard_layout::value, + "the hostcall packet must be useable from other languages"); + +/** Field offsets in the packet control field */ +enum ControlOffset { + CONTROL_OFFSET_READY_FLAG = 0, + CONTROL_OFFSET_RESERVED0 = 1, +}; + +/** Field widths in the packet control field */ +enum ControlWidth { + CONTROL_WIDTH_READY_FLAG = 1, + CONTROL_WIDTH_RESERVED0 = 31, +}; + +/** \brief Shared buffer submitting hostcall requests. + * + * Holds hostcall packets requested by all kernels executing on the + * same device queue. Each hostcall buffer is associated with at most + * one device queue. + * + * Packets in the buffer are accessed using 64-bit tagged pointers to mitigate + * the ABA problem in lock-free stacks. The index_mask is used to extract the + * lower bits of the pointer, which form the index into the packet array. The + * remaining higher bits define a tag that is incremented on every pop from a + * stack. + */ +class HostcallBuffer { + /** Array of packet headers */ + PacketHeader* headers_; + /** Array of packet payloads */ + Payload* payloads_; + /** Signal used by kernels to indicate new work */ + void* doorbell_; + /** Stack of free packets. Uses tagged pointers. */ + uint64_t free_stack_; + /** Stack of ready packets. Uses tagged pointers */ + std::atomic ready_stack_; + /** Mask for accessing the packet index in the tagged pointer. */ + uint64_t index_mask_; + /** Some services need a device**/ + const amd::Device* device_; + + PacketHeader* getHeader(uint64_t ptr) const; + Payload* getPayload(uint64_t ptr) const; + + public: + void processPackets(MessageHandler& messages); + void initialize(uint32_t num_packets); + void setDoorbell(void* doorbell) { doorbell_ = doorbell; }; + void setDevice(const amd::Device* dptr) { device_ = dptr; }; + + #if defined(__clang__) + #if __has_feature(address_sanitizer) + private: + device::UriLocator* uri_locator; + public: + void setUriLocator(device::UriLocator* uri_l) { uri_locator = uri_l; }; + #endif + #endif +}; + +static_assert(std::is_standard_layout::value, + "the hostcall buffer must be useable from other languages"); diff --git a/projects/clr/rocclr/device/device.hpp b/projects/clr/rocclr/device/device.hpp index 7c0e13db1f..69eb60a2dd 100644 --- a/projects/clr/rocclr/device/device.hpp +++ b/projects/clr/rocclr/device/device.hpp @@ -40,6 +40,12 @@ #include "hwdebug.hpp" #include "devsignal.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "devurilocator.hpp" +#endif +#endif + #include #include #include @@ -1838,6 +1844,11 @@ class Device : public RuntimeObject { virtual amd::Memory* GetArenaMemObj(const void* ptr, size_t& offset) { return nullptr; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const = 0; +#endif +#endif protected: //! Enable the specified extension diff --git a/projects/clr/rocclr/device/devsanitizer.hpp b/projects/clr/rocclr/device/devsanitizer.hpp new file mode 100644 index 0000000000..5483b40ba1 --- /dev/null +++ b/projects/clr/rocclr/device/devsanitizer.hpp @@ -0,0 +1,110 @@ +/* Copyright (c) 2021-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#include "device/devhostcall.hpp" +#include "device/device.hpp" +#include "device/devurilocator.hpp" +#include "utils/debug.hpp" +#include "platform/memory.hpp" + +#include //to exp +#include +#include +#include +#include + +//Address sanitizer runtime entry-function to report the invalid device memory access +//this will be defined in llvm-project/compiler-rt/lib/asan, and will have effect only +//when compiler-rt is build for AMDGPU. +//Note: This API is runtime interface of asan library and only defined for linux os. +extern "C" +void __asan_report_nonself_error(uint64_t *callstack, uint32_t n_callstack, uint64_t* addr, + uint32_t naddr, uint64_t* entity_ids, uint32_t n_entities, bool is_write, uint32_t access_size, + bool is_abort, const char* name, int64_t vma_adjust, int fd, + uint64_t file_extent_size, uint64_t file_extent_start = 0); + +namespace { +void handleSanitizerService(Payload* packt_payload, uint64_t activemask, + const amd::Device* gpu_device, device::UriLocator* uri_locator) { + // An address results in invalid access in each active lane + uint64_t device_failing_addresses[64]; + // An array of identifications of entities requesting a report. + // index 0 - contains device id + // index 1,2,3 - contains wg_idx, wg_idy, wg_idz respectively. + // index 4 to 67 - contains reporting wave ids in a wave-front. + uint64_t entity_id[68], callstack[1]; + uint32_t n_activelanes = __builtin_popcountl(activemask); + uint64_t access_info = 0, access_size = 0; + bool is_abort = true; + entity_id[0] = gpu_device->index(); + + assert(packt_payload != nullptr && "packet payload is null?"); + + int indx = 0, en_idx = 1; + bool first_workitem = false; + while (activemask) { + auto wi = amd::leastBitSet(activemask); + activemask ^= static_cast(1) << wi; + auto data_slot = packt_payload->slots[wi]; + //encoding of packet payload arguments is + //defined in device-libs/asanrtl/src/report.cl + if (!first_workitem) { + device_failing_addresses[indx] = data_slot[0]; + callstack[0] = data_slot[1]; + entity_id[en_idx] = data_slot[2]; + entity_id[++en_idx] = data_slot[3]; + entity_id[++en_idx] = data_slot[4]; + entity_id[++en_idx] = data_slot[5]; + access_info = data_slot[6]; + access_size = data_slot[7]; + first_workitem = true; + } + else { + device_failing_addresses[indx] = data_slot[0]; + entity_id[en_idx] = data_slot[5]; + } + indx++; + en_idx++; + } + + bool is_write = false; + if (access_info & 0xFFFFFFFF00000000) + is_abort = false; + if (access_info & 1) + is_write = true; + + std::string fileuri; + uint64_t size = 0, offset = 0; + int64_t loadAddrAdjust = 0; + auto uri_fd = amd::Os::FDescInit(); + if (uri_locator) { + device::UriLocator::UriInfo fileuri_info = uri_locator->lookUpUri(callstack[0]); + std::tie(offset, size) = uri_locator->decodeUriAndGetFd(fileuri_info, &uri_fd); + loadAddrAdjust = fileuri_info.loadAddressDiff; + } + +#if defined(__linux__) + __asan_report_nonself_error(callstack, 1, device_failing_addresses, n_activelanes, + entity_id, n_activelanes+4, is_write, access_size, is_abort, + /*thread key*/"amdgpu", loadAddrAdjust, uri_fd, size, offset); +#endif +} +} //end anonymous namespace diff --git a/projects/clr/rocclr/device/devurilocator.hpp b/projects/clr/rocclr/device/devurilocator.hpp new file mode 100644 index 0000000000..dcb31b5de9 --- /dev/null +++ b/projects/clr/rocclr/device/devurilocator.hpp @@ -0,0 +1,43 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "os/os.hpp" +#include +#include +namespace device{ +// Interface for HSA/PAL Uri Locators +class UriLocator { + public: + struct UriInfo { + std::string uriPath; + int64_t loadAddressDiff; + }; + + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) = 0; + virtual std::pair decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) = 0; +}; +} //namespace device +#endif +#endif diff --git a/projects/clr/rocclr/device/gpu/gpudevice.hpp b/projects/clr/rocclr/device/gpu/gpudevice.hpp index 35a83c7688..1ee64f52d5 100644 --- a/projects/clr/rocclr/device/gpu/gpudevice.hpp +++ b/projects/clr/rocclr/device/gpu/gpudevice.hpp @@ -42,6 +42,12 @@ #include "hsailctx.hpp" #include "vaminterface.h" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif + /*! \addtogroup GPU * @{ */ @@ -140,6 +146,13 @@ class NullDevice : public amd::Device { virtual bool SetClockMode(const cl_set_device_clock_mode_input_amd setClockModeInput, cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: //! Answer the question: "Should HSAIL Program be created?", //! based on the given options. @@ -548,6 +561,13 @@ class Device : public NullDevice, public CALGSLDevice { //! Initial the Hardware Debug Manager int32_t hwDebugManagerInit(amd::Context* context, uintptr_t messageStorage); +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif private: //! Disable copy constructor Device(const Device&); diff --git a/projects/clr/rocclr/device/pal/paldevice.hpp b/projects/clr/rocclr/device/pal/paldevice.hpp index 052577e722..6f19ebebe9 100644 --- a/projects/clr/rocclr/device/pal/paldevice.hpp +++ b/projects/clr/rocclr/device/pal/paldevice.hpp @@ -43,6 +43,11 @@ #include #include +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#endif +#endif /*! \addtogroup PAL * @{ */ @@ -145,7 +150,13 @@ class NullDevice : public amd::Device { cl_set_device_clock_mode_output_amd* pSetClockModeOutput) { return true; } - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + return nullptr; + } +#endif +#endif protected: static Util::GenericAllocator allocator_; //!< Generic memory allocator in PAL @@ -593,7 +604,13 @@ class Device : public NullDevice { virtual bool importExtSemaphore(void** extSemaphore, const amd::Os::FileDesc& handle); virtual void DestroyExtSemaphore(void* extSemaphore); - +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUrilocator() const { + return nullptr; + } +#endif +#endif private: static void PAL_STDCALL PalDeveloperCallback(void* pPrivateData, const Pal::uint32 deviceIndex, Pal::Developer::CallbackType type, void* pCbData); diff --git a/projects/clr/rocclr/device/rocm/rocdevice.cpp b/projects/clr/rocclr/device/rocm/rocdevice.cpp index c6e94732a6..0bb98ec598 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.cpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.cpp @@ -45,6 +45,12 @@ #endif #include "platform/sampler.hpp" +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/rocm/rocurilocator.hpp" +#endif +#endif + #include #include #include @@ -3026,5 +3032,12 @@ void Device::ReleaseGlobalSignal(void* signal) const { } } +#if defined(__clang__) +#if __has_feature(address_sanitizer) +device::UriLocator* Device::createUriLocator() const { + return new roc::UriLocator(); +} +#endif +#endif } // namespace roc #endif // WITHOUT_HSA_BACKEND diff --git a/projects/clr/rocclr/device/rocm/rocdevice.hpp b/projects/clr/rocclr/device/rocm/rocdevice.hpp index 0fb6f0b8e0..b408a9d71c 100644 --- a/projects/clr/rocclr/device/rocm/rocdevice.hpp +++ b/projects/clr/rocclr/device/rocm/rocdevice.hpp @@ -254,6 +254,15 @@ class NullDevice : public amd::Device { virtual bool IsHwEventReady(const amd::Event& event, bool wait = false) const { return false; } virtual void ReleaseGlobalSignal(void* signal) const {} +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const { + ShouldNotReachHere(); + return nullptr; + } +#endif +#endif + protected: //! Initialize compiler instance and handle static bool initCompiler(bool isOffline); @@ -599,6 +608,11 @@ class Device : public NullDevice { //! enum for keeping the total and available queue priorities enum QueuePriority : uint { Low = 0, Normal = 1, High = 2, Total = 3}; +#if defined(__clang__) +#if __has_feature(address_sanitizer) + virtual device::UriLocator* createUriLocator() const; +#endif +#endif }; // class roc::Device } // namespace roc diff --git a/projects/clr/rocclr/device/rocm/rocurilocator.cpp b/projects/clr/rocclr/device/rocm/rocurilocator.cpp new file mode 100644 index 0000000000..acc01e75c3 --- /dev/null +++ b/projects/clr/rocclr/device/rocm/rocurilocator.cpp @@ -0,0 +1,183 @@ +/* Copyright (c) 2021-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "rocurilocator.hpp" +#include + +namespace roc { +hsa_status_t UriLocator::createUriRangeTable() { + auto execCb = [] (hsa_executable_t exec, + void *data) -> hsa_status_t { + int execState = 0; + hsa_status_t status; + status = hsa_executable_get_info(exec, HSA_EXECUTABLE_INFO_STATE, &execState); + if (status != HSA_STATUS_SUCCESS) + return status; + if (execState != HSA_EXECUTABLE_STATE_FROZEN) + return status; + + auto loadedCodeObjectCb = [] (hsa_executable_t exec, + hsa_loaded_code_object_t lcobj, void *data) -> hsa_status_t { + hsa_status_t result; + uint64_t loadBAddr = 0, loadSize = 0; + uint32_t uriLen = 0; + int64_t delta = 0; + uint64_t *argsCb = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnTab = + reinterpret_cast (argsCb[0]); + std::vector *rangeTab = + reinterpret_cast*> (argsCb[1]); + + if (!fnTab->hsa_ven_amd_loader_loaded_code_object_get_info) + return HSA_STATUS_ERROR; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_BASE, (void*) &loadBAddr); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_SIZE, (void*) &loadSize); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI_LENGTH, (void*) &uriLen); + if (result != HSA_STATUS_SUCCESS) + return result; + + result = fnTab-> hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_LOAD_DELTA, (void*) &delta); + if (result != HSA_STATUS_SUCCESS) + return result; + + char *uri = new char[uriLen+1]; + uri[uriLen] = '\0'; + result = fnTab->hsa_ven_amd_loader_loaded_code_object_get_info(lcobj, + HSA_VEN_AMD_LOADER_LOADED_CODE_OBJECT_INFO_URI, (void*) uri); + if (result != HSA_STATUS_SUCCESS) + return result; + rangeTab->push_back(UriRange{loadBAddr, loadBAddr+loadSize-1, + delta, std::string{uri,uriLen+1}}); + delete[] uri; + return HSA_STATUS_SUCCESS; + }; + + uint64_t *args = static_cast(data); + hsa_ven_amd_loader_1_03_pfn_t *fnExtTab = + reinterpret_cast (args[0]); + return fnExtTab->hsa_ven_amd_loader_executable_iterate_loaded_code_objects(exec, + loadedCodeObjectCb, data); + }; + + if (!fn_table_.hsa_ven_amd_loader_iterate_executables) + return HSA_STATUS_ERROR; + + uint64_t callbackArgs[2] = {(uint64_t)& fn_table_, (uint64_t) &rangeTab_}; + return fn_table_.hsa_ven_amd_loader_iterate_executables(execCb, (void*) callbackArgs); +} + +// Encoding of uniform-resource-identifier(URI) is detailed in +// https://llvm.org/docs/AMDGPUUsage.html#loaded-code-object-path-uniform-resource-identifier-uri +std::pair UriLocator::decodeUriAndGetFd(UriInfo& uri, + amd::Os::FileDesc* uri_fd) { + std::ostringstream ss; + char cur; + uint64_t offset = 0, size = 0; + if (uri.uriPath.size() == 0) + return {0,0}; + auto pos = uri.uriPath.find("//"); + if (pos == std::string::npos || uri.uriPath.substr(0, pos) != "file:") { + uri.uriPath=""; + return {0,0}; + } + auto rspos = uri.uriPath.find('#'); + if (rspos != std::string::npos) { + //parse range specifier + std::string offprefix = "offset=", sizeprefix = "size="; + auto sbeg = uri.uriPath.find('&',rspos); + auto offbeg = rspos + offprefix.size()+1; + std::string offstr = uri.uriPath.substr(offbeg, sbeg - offbeg); + auto sizebeg = sbeg + sizeprefix.size()+1; + std::string sizestr = uri.uriPath.substr(sizebeg, uri.uriPath.size()-sizebeg); + offset = std::stoull(offstr, nullptr, 0); + size = std::stoull(sizestr, nullptr, 0); + rspos -= 1; + } + else { + rspos = uri.uriPath.size()-1; + } + pos += 2; + //decode filepath + for (auto i=pos; i<= rspos;) { + cur = uri.uriPath[i]; + if (isalnum(cur) || cur == '/' || cur == '-' || + cur == '_' || cur == '.' || cur == '~') { + ss << cur; + i++; + } + else { + //characters prefix with '%' char + char tbits = uri.uriPath[i+1], lbits = uri.uriPath[i+2]; + uint8_t t = (tbits < 58) ? ( tbits - 48) : ((tbits - 65) + 10); + uint8_t l = (lbits < 58) ? ( lbits - 48) : ((lbits - 65) + 10); + ss << (char)(((0b00000000 | t)<<4) | l); + i += 3; + } + } + uri.uriPath = ss.str(); + size_t fd_size; + (void) amd::Os::GetFileHandle(uri.uriPath.c_str(), uri_fd, &fd_size); + // As per URI locator syntax, range_specifier is optional + // if range_specifier is absent return total size of the file + // and set offset to begin at 0. + if (size == 0) + size = fd_size; + return {offset, size}; +} + +UriLocator::UriInfo UriLocator::lookUpUri(uint64_t device_pc) { + UriInfo errorstate{"", 0}; + + if (!init_) { + hsa_status_t result; + result = hsa_system_get_major_extension_table(HSA_EXTENSION_AMD_LOADER, 1, + sizeof(fn_table_), &fn_table_); + if (result != HSA_STATUS_SUCCESS) + return errorstate; + result = createUriRangeTable(); + if (result != HSA_STATUS_SUCCESS) { + rangeTab_.clear(); + return errorstate; + } + init_ = true; + } + + for(auto& seg : rangeTab_) + if (seg.startAddr_ <= device_pc && device_pc <= seg.endAddr_) + return UriInfo{seg.Uri_.c_str(), seg.elfDelta_}; + + return errorstate; +} +} //namespace roc +#endif +#endif diff --git a/projects/clr/rocclr/device/rocm/rocurilocator.hpp b/projects/clr/rocclr/device/rocm/rocurilocator.hpp new file mode 100644 index 0000000000..8331b479c4 --- /dev/null +++ b/projects/clr/rocclr/device/rocm/rocurilocator.hpp @@ -0,0 +1,48 @@ +/* Copyright (c) 2019-present Advanced Micro Devices, Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. */ + +#pragma once +#if defined(__clang__) +#if __has_feature(address_sanitizer) +#include "device/devurilocator.hpp" +#include "hsa_ven_amd_loader.h" + +#include +namespace roc { +class UriLocator : public device::UriLocator { + bool init_ = false; + struct UriRange { + uint64_t startAddr_, endAddr_; + int64_t elfDelta_; + std::string Uri_; + }; + std::vector rangeTab_; + hsa_ven_amd_loader_1_03_pfn_t fn_table_; + + hsa_status_t createUriRangeTable(); + public: + virtual ~UriLocator() {} + virtual UriInfo lookUpUri(uint64_t device_pc) override; + virtual std::pair decodeUriAndGetFd(UriInfo& uri_path, + amd::Os::FileDesc* uri_fd) override; +}; +} +#endif +#endif