Pull from github (tstellar):
Prefer using memfd_create() for the ring buffer. We were using /dev/shm, but this won't work on systems that either don't have /dev/shm or have mounted it with noexec, because for everything other than gfx700 we map the ring buffer with PROT_EXEC. memfd_create() is Linux specific and was added in Linux 3.17, so we will fallback to using /dev/shm on systems where memfd_create() is not available. Change-Id: I58fb533eebc362f6d29dc3e316a80801014d50e8
Dieser Commit ist enthalten in:
@@ -58,12 +58,19 @@ include ( hsa_common )
|
||||
## Find LibElf
|
||||
find_package(LibElf REQUIRED)
|
||||
|
||||
## Check for memfd_create syscall
|
||||
include(CheckSymbolExists)
|
||||
CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE )
|
||||
|
||||
## Compiler preproc definitions.
|
||||
add_definitions ( -D__linux__ )
|
||||
add_definitions ( -DHSA_EXPORT=1 )
|
||||
add_definitions ( -DHSA_EXPORT_FINALIZER=1 )
|
||||
add_definitions ( -DHSA_EXPORT_IMAGES=1 )
|
||||
add_definitions ( -D HSA_DEPRECATED= )
|
||||
if ( HAVE_MEMFD_CREATE )
|
||||
add_definitions ( -DHAVE_MEMFD_CREATE )
|
||||
endif()
|
||||
|
||||
## Get the package version. The defaults to 1.0.0.
|
||||
get_version ( "1.0.0" )
|
||||
|
||||
@@ -349,6 +349,10 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig
|
||||
void AllocRegisteredRingBuffer(uint32_t queue_size_pkts);
|
||||
void FreeRegisteredRingBuffer();
|
||||
|
||||
/// @brief Abstracts the file handle use for double mapping queues.
|
||||
void CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const;
|
||||
int CreateRingBufferFD(const char* ring_buf_shm_path, uint32_t ring_buf_phys_size_bytes) const;
|
||||
|
||||
static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg);
|
||||
|
||||
/// @brief Define the Scratch Buffer Descriptor and related parameters
|
||||
|
||||
@@ -486,37 +486,30 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
|
||||
int ring_buf_shm_fd = -1;
|
||||
void* reserve_va = NULL;
|
||||
|
||||
do {
|
||||
// Create a shared memory object to back the ring buffer.
|
||||
ring_buf_shm_fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL,
|
||||
S_IRUSR | S_IWUSR);
|
||||
if (ring_buf_shm_fd == -1) {
|
||||
break;
|
||||
}
|
||||
if (posix_fallocate(ring_buf_shm_fd, 0, ring_buf_phys_size_bytes) != 0)
|
||||
break;
|
||||
ring_buf_shm_fd = CreateRingBufferFD(ring_buf_shm_path, ring_buf_phys_size_bytes);
|
||||
|
||||
// Reserve a VA range twice the size of the physical backing store.
|
||||
reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
assert(reserve_va != MAP_FAILED && "mmap failed");
|
||||
if (ring_buf_shm_fd == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Remap the lower and upper halves of the VA range.
|
||||
// Map both halves to the shared memory backing store.
|
||||
// If the GPU device is KV, do not set PROT_EXEC flag.
|
||||
void* ring_buf_lower_half = NULL;
|
||||
void* ring_buf_upper_half = NULL;
|
||||
if (is_kv_queue_) {
|
||||
ring_buf_lower_half =
|
||||
mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
|
||||
// Reserve a VA range twice the size of the physical backing store.
|
||||
reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
assert(reserve_va != MAP_FAILED && "mmap failed");
|
||||
|
||||
ring_buf_upper_half =
|
||||
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes),
|
||||
ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
|
||||
// Remap the lower and upper halves of the VA range.
|
||||
// Map both halves to the shared memory backing store.
|
||||
// If the GPU device is KV, do not set PROT_EXEC flag.
|
||||
void* ring_buf_lower_half = NULL;
|
||||
void* ring_buf_upper_half = NULL;
|
||||
if (is_kv_queue_) {
|
||||
ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
|
||||
|
||||
ring_buf_upper_half =
|
||||
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), ring_buf_phys_size_bytes,
|
||||
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
|
||||
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
|
||||
} else {
|
||||
ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC,
|
||||
@@ -530,21 +523,12 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
|
||||
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
|
||||
}
|
||||
|
||||
// Release explicit reference to shared memory object.
|
||||
shm_unlink(ring_buf_shm_path);
|
||||
close(ring_buf_shm_fd);
|
||||
|
||||
// Successfully created mapping.
|
||||
ring_buf_ = ring_buf_lower_half;
|
||||
return;
|
||||
} while (false);
|
||||
|
||||
// Resource cleanup on failure.
|
||||
if (reserve_va) munmap(reserve_va, ring_buf_alloc_bytes_);
|
||||
if (ring_buf_shm_fd != -1) {
|
||||
shm_unlink(ring_buf_shm_path);
|
||||
close(ring_buf_shm_fd);
|
||||
}
|
||||
// Release explicit reference to shared memory object.
|
||||
CloseRingBufferFD(ring_buf_shm_path, ring_buf_shm_fd);
|
||||
return;
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
HANDLE ring_buf_mapping = INVALID_HANDLE_VALUE;
|
||||
@@ -651,6 +635,47 @@ void AqlQueue::FreeRegisteredRingBuffer() {
|
||||
ring_buf_alloc_bytes_ = 0;
|
||||
}
|
||||
|
||||
void AqlQueue::CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const {
|
||||
#ifdef __linux__
|
||||
#if !defined(HAVE_MEMFD_CREATE)
|
||||
shm_unlink(ring_buf_shm_path);
|
||||
#endif
|
||||
close(fd);
|
||||
#else
|
||||
assert(false && "Function only needed on Linux.");
|
||||
#endif
|
||||
}
|
||||
|
||||
int AqlQueue::CreateRingBufferFD(const char* ring_buf_shm_path,
|
||||
uint32_t ring_buf_phys_size_bytes) const {
|
||||
#ifdef __linux__
|
||||
int fd;
|
||||
#ifdef HAVE_MEMFD_CREATE
|
||||
fd = syscall(__NR_memfd_create, ring_buf_shm_path, 0);
|
||||
|
||||
if (fd == -1) return -1;
|
||||
|
||||
if (ftruncate(fd, ring_buf_phys_size_bytes) == -1) {
|
||||
CloseRingBufferFD(ring_buf_shm_path, fd);
|
||||
return -1;
|
||||
}
|
||||
#else
|
||||
fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, S_IRUSR | S_IWUSR);
|
||||
|
||||
if (fd == -1) return -1;
|
||||
|
||||
if (posix_fallocate(fd, 0, ring_buf_phys_size_bytes) != 0) {
|
||||
CloseRingBufferFD(ring_buf_shm_path, fd);
|
||||
return -1;
|
||||
}
|
||||
#endif
|
||||
return fd;
|
||||
#else
|
||||
assert(false && "Function only needed on Linux.");
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
hsa_status_t AqlQueue::Inactivate() {
|
||||
bool active = active_.exchange(false, std::memory_order_relaxed);
|
||||
if (active) {
|
||||
|
||||
In neuem Issue referenzieren
Einen Benutzer sperren