From 57b93e02a48d969bc5e67b0b5bb5fd3161b2eaaa Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Mon, 29 Apr 2024 15:02:54 +0000 Subject: [PATCH] Use pthread_attr_setaffinity_np when available If pthread_attr_setaffinity_np function exists use it instead of pthread_setaffinity_np as pthread_setaffinity_np seems to fail to set the affinity settings on some systems. Change-Id: Icd8b17039699ac10d9cd5c4dbb6ac44630673949 --- runtime/hsa-runtime/CMakeLists.txt | 5 + .../hsa-runtime/core/util/lnx/os_linux.cpp | 92 +++++++++++-------- 2 files changed, 58 insertions(+), 39 deletions(-) diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index cb35761c17..e8b2cef650 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -114,6 +114,11 @@ CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE ) if ( HAVE_MEMFD_CREATE ) target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_MEMFD_CREATE ) endif() +include(CheckCXXSymbolExists) +CHECK_CXX_SYMBOL_EXISTS( "pthread_attr_setaffinity_np" "pthread.h" HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) +if ( HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) + target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) +endif() ## Set include directories for ROCr runtime target_include_directories( ${CORE_RUNTIME_TARGET} diff --git a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp b/runtime/hsa-runtime/core/util/lnx/os_linux.cpp index fb70b81171..04f1dc3647 100644 --- a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp +++ b/runtime/hsa-runtime/core/util/lnx/os_linux.cpp @@ -89,56 +89,69 @@ class os_thread { : thread(0), lock(nullptr), state(RUNNING) { int err; std::unique_ptr args(new ThreadArgs); + MAKE_SCOPE_GUARD([&]() { args.release(); }); lock = CreateMutex(); if (lock == nullptr) return; - args->entry_args = threadArgument; - args->entry_function = function; - pthread_attr_t attrib; err = pthread_attr_init(&attrib); if (err != 0) { fprintf(stderr, "pthread_attr_init failed: %s\n", strerror(err)); return; } + MAKE_SCOPE_GUARD([&]() { + err = pthread_attr_destroy(&attrib); + if (err) + fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); + }); - if (stackSize != 0) { - stackSize = Max(uint(PTHREAD_STACK_MIN), stackSize); - stackSize = AlignUp(stackSize, 4096); - err = pthread_attr_setstacksize(&attrib, stackSize); - if (err != 0) { - fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); - err = pthread_attr_destroy(&attrib); - if (err != 0) { - fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); - } - return; - } - } - - int create_err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); - - // Probably a stack size error since system limits can be different from PTHREAD_STACK_MIN - // Attempt to grow the stack within reason. - if ((create_err == EINVAL) && stackSize != 0) { - while (stackSize < 20 * 1024 * 1024) { - stackSize *= 2; + auto thread_create_stacksize = [&](pthread_attr_t& attrib) { + if (stackSize != 0) { + stackSize = Max(uint(PTHREAD_STACK_MIN), stackSize); + stackSize = AlignUp(stackSize, 4096); err = pthread_attr_setstacksize(&attrib, stackSize); if (err != 0) { fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); - err = pthread_attr_destroy(&attrib); - if (err != 0) { + if (pthread_attr_destroy(&attrib) != 0) fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); + + return err; + } + } + + int err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); + + // Probably a stack size error since system limits can be different from PTHREAD_STACK_MIN + // Attempt to grow the stack within reason. + if ((err == EINVAL) && stackSize != 0) { + while (stackSize < 20 * 1024 * 1024) { + stackSize *= 2; + err = pthread_attr_setstacksize(&attrib, stackSize); + if (err != 0) { + fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); + if (pthread_attr_destroy(&attrib)) + fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); + + return err; } - return; + err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); + if (err != EINVAL) return 0; + debug_print("pthread_create returned EINVAL, doubling stack size\n"); } - - create_err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); - if (create_err != EINVAL) break; - debug_print("pthread_create returned EINVAL, doubling stack size\n"); } + return 0; + }; + + args->entry_args = threadArgument; + args->entry_function = function; + +#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP + if (thread_create_stacksize(attrib)) { + thread = 0; + return; } +#endif int cores = 0; cpu_set_t* cpuset = nullptr; @@ -154,22 +167,23 @@ class os_thread { for (int i = 0; i < cores; i++) { CPU_SET_S(i, CPU_ALLOC_SIZE(cores), cpuset); } +#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP + err = pthread_attr_setaffinity_np(&attrib, CPU_ALLOC_SIZE(cores), cpuset); +#else err = pthread_setaffinity_np(thread, CPU_ALLOC_SIZE(cores), cpuset); +#endif CPU_FREE(cpuset); if (err != 0) { fprintf(stderr, "pthread_setaffinity_np failed: %s\n", strerror(err)); return; } } - if (create_err == 0) - args.release(); - else - thread = 0; - err = pthread_attr_destroy(&attrib); - if (err != 0) { - fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); - } +#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP + if (thread_create_stacksize(attrib)) + thread = 0; +#endif + return; } os_thread(os_thread&& rhs) {