From b93ffafdc79e11c1d0a04a85abf73ab0652fc482 Mon Sep 17 00:00:00 2001 From: Sean Keely Date: Wed, 22 Nov 2017 14:35:59 -0600 Subject: [PATCH] Pull from github (tstellar): Prefer using memfd_create() for the ring buffer. We were using /dev/shm, but this won't work on systems that either don't have /dev/shm or have mounted it with noexec, because for everything other than gfx700 we map the ring buffer with PROT_EXEC. memfd_create() is Linux specific and was added in Linux 3.17, so we will fallback to using /dev/shm on systems where memfd_create() is not available. Change-Id: I58fb533eebc362f6d29dc3e316a80801014d50e8 --- runtime/hsa-runtime/CMakeLists.txt | 7 ++ runtime/hsa-runtime/core/inc/amd_aql_queue.h | 4 + .../core/runtime/amd_aql_queue.cpp | 105 +++++++++++------- 3 files changed, 76 insertions(+), 40 deletions(-) diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index 4240299fe5..37a9b09f5f 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -58,12 +58,19 @@ include ( hsa_common ) ## Find LibElf find_package(LibElf REQUIRED) +## Check for memfd_create syscall +include(CheckSymbolExists) +CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE ) + ## Compiler preproc definitions. add_definitions ( -D__linux__ ) add_definitions ( -DHSA_EXPORT=1 ) add_definitions ( -DHSA_EXPORT_FINALIZER=1 ) add_definitions ( -DHSA_EXPORT_IMAGES=1 ) add_definitions ( -D HSA_DEPRECATED= ) +if ( HAVE_MEMFD_CREATE ) + add_definitions ( -DHAVE_MEMFD_CREATE ) +endif() ## Get the package version. The defaults to 1.0.0. get_version ( "1.0.0" ) diff --git a/runtime/hsa-runtime/core/inc/amd_aql_queue.h b/runtime/hsa-runtime/core/inc/amd_aql_queue.h index 4612ebbca9..104a0272cb 100644 --- a/runtime/hsa-runtime/core/inc/amd_aql_queue.h +++ b/runtime/hsa-runtime/core/inc/amd_aql_queue.h @@ -349,6 +349,10 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig void AllocRegisteredRingBuffer(uint32_t queue_size_pkts); void FreeRegisteredRingBuffer(); + /// @brief Abstracts the file handle use for double mapping queues. + void CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const; + int CreateRingBufferFD(const char* ring_buf_shm_path, uint32_t ring_buf_phys_size_bytes) const; + static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg); /// @brief Define the Scratch Buffer Descriptor and related parameters diff --git a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp index 0ce462c237..ab8fee67a1 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aql_queue.cpp @@ -486,37 +486,30 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) { int ring_buf_shm_fd = -1; void* reserve_va = NULL; - do { - // Create a shared memory object to back the ring buffer. - ring_buf_shm_fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, - S_IRUSR | S_IWUSR); - if (ring_buf_shm_fd == -1) { - break; - } - if (posix_fallocate(ring_buf_shm_fd, 0, ring_buf_phys_size_bytes) != 0) - break; + ring_buf_shm_fd = CreateRingBufferFD(ring_buf_shm_path, ring_buf_phys_size_bytes); - // Reserve a VA range twice the size of the physical backing store. - reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - assert(reserve_va != MAP_FAILED && "mmap failed"); + if (ring_buf_shm_fd == -1) { + return; + } - // Remap the lower and upper halves of the VA range. - // Map both halves to the shared memory backing store. - // If the GPU device is KV, do not set PROT_EXEC flag. - void* ring_buf_lower_half = NULL; - void* ring_buf_upper_half = NULL; - if (is_kv_queue_) { - ring_buf_lower_half = - mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_lower_half != MAP_FAILED && "mmap failed"); + // Reserve a VA range twice the size of the physical backing store. + reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(reserve_va != MAP_FAILED && "mmap failed"); - ring_buf_upper_half = - mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), - ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); - assert(ring_buf_upper_half != MAP_FAILED && "mmap failed"); + // Remap the lower and upper halves of the VA range. + // Map both halves to the shared memory backing store. + // If the GPU device is KV, do not set PROT_EXEC flag. + void* ring_buf_lower_half = NULL; + void* ring_buf_upper_half = NULL; + if (is_kv_queue_) { + ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); + assert(ring_buf_lower_half != MAP_FAILED && "mmap failed"); + + ring_buf_upper_half = + mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), ring_buf_phys_size_bytes, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0); + assert(ring_buf_upper_half != MAP_FAILED && "mmap failed"); } else { ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -530,21 +523,12 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) { assert(ring_buf_upper_half != MAP_FAILED && "mmap failed"); } - // Release explicit reference to shared memory object. - shm_unlink(ring_buf_shm_path); - close(ring_buf_shm_fd); - // Successfully created mapping. ring_buf_ = ring_buf_lower_half; - return; - } while (false); - // Resource cleanup on failure. - if (reserve_va) munmap(reserve_va, ring_buf_alloc_bytes_); - if (ring_buf_shm_fd != -1) { - shm_unlink(ring_buf_shm_path); - close(ring_buf_shm_fd); - } + // Release explicit reference to shared memory object. + CloseRingBufferFD(ring_buf_shm_path, ring_buf_shm_fd); + return; #endif #ifdef _WIN32 HANDLE ring_buf_mapping = INVALID_HANDLE_VALUE; @@ -651,6 +635,47 @@ void AqlQueue::FreeRegisteredRingBuffer() { ring_buf_alloc_bytes_ = 0; } +void AqlQueue::CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const { +#ifdef __linux__ +#if !defined(HAVE_MEMFD_CREATE) + shm_unlink(ring_buf_shm_path); +#endif + close(fd); +#else + assert(false && "Function only needed on Linux."); +#endif +} + +int AqlQueue::CreateRingBufferFD(const char* ring_buf_shm_path, + uint32_t ring_buf_phys_size_bytes) const { +#ifdef __linux__ + int fd; +#ifdef HAVE_MEMFD_CREATE + fd = syscall(__NR_memfd_create, ring_buf_shm_path, 0); + + if (fd == -1) return -1; + + if (ftruncate(fd, ring_buf_phys_size_bytes) == -1) { + CloseRingBufferFD(ring_buf_shm_path, fd); + return -1; + } +#else + fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, S_IRUSR | S_IWUSR); + + if (fd == -1) return -1; + + if (posix_fallocate(fd, 0, ring_buf_phys_size_bytes) != 0) { + CloseRingBufferFD(ring_buf_shm_path, fd); + return -1; + } +#endif + return fd; +#else + assert(false && "Function only needed on Linux."); + return -1; +#endif +} + hsa_status_t AqlQueue::Inactivate() { bool active = active_.exchange(false, std::memory_order_relaxed); if (active) {