Pull from github (tstellar):

Prefer using memfd_create() for the ring buffer.

We were using /dev/shm, but this won't work on systems that
either don't have /dev/shm or have mounted it with noexec, because
for everything other than gfx700 we map the ring buffer with PROT_EXEC.

memfd_create() is Linux specific and was added in Linux 3.17, so we
will fallback to using /dev/shm on systems where memfd_create() is
not available.

Change-Id: I58fb533eebc362f6d29dc3e316a80801014d50e8
Dieser Commit ist enthalten in:
Sean Keely
2017-11-22 14:35:59 -06:00
Ursprung 4b603e803d
Commit b93ffafdc7
3 geänderte Dateien mit 76 neuen und 40 gelöschten Zeilen
+7
Datei anzeigen
@@ -58,12 +58,19 @@ include ( hsa_common )
## Find LibElf
find_package(LibElf REQUIRED)
## Check for memfd_create syscall
include(CheckSymbolExists)
CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE )
## Compiler preproc definitions.
add_definitions ( -D__linux__ )
add_definitions ( -DHSA_EXPORT=1 )
add_definitions ( -DHSA_EXPORT_FINALIZER=1 )
add_definitions ( -DHSA_EXPORT_IMAGES=1 )
add_definitions ( -D HSA_DEPRECATED= )
if ( HAVE_MEMFD_CREATE )
add_definitions ( -DHAVE_MEMFD_CREATE )
endif()
## Get the package version. The defaults to 1.0.0.
get_version ( "1.0.0" )
@@ -349,6 +349,10 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Sig
void AllocRegisteredRingBuffer(uint32_t queue_size_pkts);
void FreeRegisteredRingBuffer();
/// @brief Abstracts the file handle use for double mapping queues.
void CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const;
int CreateRingBufferFD(const char* ring_buf_shm_path, uint32_t ring_buf_phys_size_bytes) const;
static bool DynamicScratchHandler(hsa_signal_value_t error_code, void* arg);
/// @brief Define the Scratch Buffer Descriptor and related parameters
@@ -486,37 +486,30 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
int ring_buf_shm_fd = -1;
void* reserve_va = NULL;
do {
// Create a shared memory object to back the ring buffer.
ring_buf_shm_fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL,
S_IRUSR | S_IWUSR);
if (ring_buf_shm_fd == -1) {
break;
}
if (posix_fallocate(ring_buf_shm_fd, 0, ring_buf_phys_size_bytes) != 0)
break;
ring_buf_shm_fd = CreateRingBufferFD(ring_buf_shm_path, ring_buf_phys_size_bytes);
// Reserve a VA range twice the size of the physical backing store.
reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(reserve_va != MAP_FAILED && "mmap failed");
if (ring_buf_shm_fd == -1) {
return;
}
// Remap the lower and upper halves of the VA range.
// Map both halves to the shared memory backing store.
// If the GPU device is KV, do not set PROT_EXEC flag.
void* ring_buf_lower_half = NULL;
void* ring_buf_upper_half = NULL;
if (is_kv_queue_) {
ring_buf_lower_half =
mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
// Reserve a VA range twice the size of the physical backing store.
reserve_va = mmap(NULL, ring_buf_alloc_bytes_, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(reserve_va != MAP_FAILED && "mmap failed");
ring_buf_upper_half =
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes),
ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
// Remap the lower and upper halves of the VA range.
// Map both halves to the shared memory backing store.
// If the GPU device is KV, do not set PROT_EXEC flag.
void* ring_buf_lower_half = NULL;
void* ring_buf_upper_half = NULL;
if (is_kv_queue_) {
ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
assert(ring_buf_lower_half != MAP_FAILED && "mmap failed");
ring_buf_upper_half =
mmap((void*)(uintptr_t(reserve_va) + ring_buf_phys_size_bytes), ring_buf_phys_size_bytes,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, ring_buf_shm_fd, 0);
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
} else {
ring_buf_lower_half = mmap(reserve_va, ring_buf_phys_size_bytes,
PROT_READ | PROT_WRITE | PROT_EXEC,
@@ -530,21 +523,12 @@ void AqlQueue::AllocRegisteredRingBuffer(uint32_t queue_size_pkts) {
assert(ring_buf_upper_half != MAP_FAILED && "mmap failed");
}
// Release explicit reference to shared memory object.
shm_unlink(ring_buf_shm_path);
close(ring_buf_shm_fd);
// Successfully created mapping.
ring_buf_ = ring_buf_lower_half;
return;
} while (false);
// Resource cleanup on failure.
if (reserve_va) munmap(reserve_va, ring_buf_alloc_bytes_);
if (ring_buf_shm_fd != -1) {
shm_unlink(ring_buf_shm_path);
close(ring_buf_shm_fd);
}
// Release explicit reference to shared memory object.
CloseRingBufferFD(ring_buf_shm_path, ring_buf_shm_fd);
return;
#endif
#ifdef _WIN32
HANDLE ring_buf_mapping = INVALID_HANDLE_VALUE;
@@ -651,6 +635,47 @@ void AqlQueue::FreeRegisteredRingBuffer() {
ring_buf_alloc_bytes_ = 0;
}
void AqlQueue::CloseRingBufferFD(const char* ring_buf_shm_path, int fd) const {
#ifdef __linux__
#if !defined(HAVE_MEMFD_CREATE)
shm_unlink(ring_buf_shm_path);
#endif
close(fd);
#else
assert(false && "Function only needed on Linux.");
#endif
}
int AqlQueue::CreateRingBufferFD(const char* ring_buf_shm_path,
uint32_t ring_buf_phys_size_bytes) const {
#ifdef __linux__
int fd;
#ifdef HAVE_MEMFD_CREATE
fd = syscall(__NR_memfd_create, ring_buf_shm_path, 0);
if (fd == -1) return -1;
if (ftruncate(fd, ring_buf_phys_size_bytes) == -1) {
CloseRingBufferFD(ring_buf_shm_path, fd);
return -1;
}
#else
fd = shm_open(ring_buf_shm_path, O_CREAT | O_RDWR | O_EXCL, S_IRUSR | S_IWUSR);
if (fd == -1) return -1;
if (posix_fallocate(fd, 0, ring_buf_phys_size_bytes) != 0) {
CloseRingBufferFD(ring_buf_shm_path, fd);
return -1;
}
#endif
return fd;
#else
assert(false && "Function only needed on Linux.");
return -1;
#endif
}
hsa_status_t AqlQueue::Inactivate() {
bool active = active_.exchange(false, std::memory_order_relaxed);
if (active) {