From 1cee8656df62e13f83dc2dfda4f13ad5be2e8747 Mon Sep 17 00:00:00 2001 From: David Yat Sin Date: Thu, 16 May 2024 14:36:42 -0400 Subject: [PATCH] Revert "Use pthread_attr_setaffinity_np when available" This reverts commit ef95ccf81e59b8608861e8f2f256d981eee19df7. Reason for revert: Causing performance regressions on some systems Change-Id: I82951350cafbd57c495852d6f90023a3373f04f6 Signed-off-by: Chris Freehill --- runtime/hsa-runtime/CMakeLists.txt | 5 - .../hsa-runtime/core/util/lnx/os_linux.cpp | 92 ++++++++----------- 2 files changed, 39 insertions(+), 58 deletions(-) diff --git a/runtime/hsa-runtime/CMakeLists.txt b/runtime/hsa-runtime/CMakeLists.txt index 991f9b17c2..4a27a50971 100644 --- a/runtime/hsa-runtime/CMakeLists.txt +++ b/runtime/hsa-runtime/CMakeLists.txt @@ -105,11 +105,6 @@ CHECK_SYMBOL_EXISTS ( "__NR_memfd_create" "sys/syscall.h" HAVE_MEMFD_CREATE ) if ( HAVE_MEMFD_CREATE ) target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_MEMFD_CREATE ) endif() -include(CheckCXXSymbolExists) -CHECK_CXX_SYMBOL_EXISTS( "pthread_attr_setaffinity_np" "pthread.h" HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) -if ( HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) - target_compile_definitions(${CORE_RUNTIME_TARGET} PRIVATE HAVE_PTHREAD_ATTR_SETAFFINITY_NP ) -endif() ## Set include directories for ROCr runtime target_include_directories( ${CORE_RUNTIME_TARGET} diff --git a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp b/runtime/hsa-runtime/core/util/lnx/os_linux.cpp index 04f1dc3647..fb70b81171 100644 --- a/runtime/hsa-runtime/core/util/lnx/os_linux.cpp +++ b/runtime/hsa-runtime/core/util/lnx/os_linux.cpp @@ -89,69 +89,56 @@ class os_thread { : thread(0), lock(nullptr), state(RUNNING) { int err; std::unique_ptr args(new ThreadArgs); - MAKE_SCOPE_GUARD([&]() { args.release(); }); lock = CreateMutex(); if (lock == nullptr) return; + args->entry_args = threadArgument; + args->entry_function = function; + pthread_attr_t attrib; err = pthread_attr_init(&attrib); if (err != 0) { fprintf(stderr, "pthread_attr_init failed: %s\n", strerror(err)); return; } - MAKE_SCOPE_GUARD([&]() { - err = pthread_attr_destroy(&attrib); - if (err) - fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); - }); - auto thread_create_stacksize = [&](pthread_attr_t& attrib) { - if (stackSize != 0) { - stackSize = Max(uint(PTHREAD_STACK_MIN), stackSize); - stackSize = AlignUp(stackSize, 4096); + if (stackSize != 0) { + stackSize = Max(uint(PTHREAD_STACK_MIN), stackSize); + stackSize = AlignUp(stackSize, 4096); + err = pthread_attr_setstacksize(&attrib, stackSize); + if (err != 0) { + fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); + err = pthread_attr_destroy(&attrib); + if (err != 0) { + fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); + } + return; + } + } + + int create_err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); + + // Probably a stack size error since system limits can be different from PTHREAD_STACK_MIN + // Attempt to grow the stack within reason. + if ((create_err == EINVAL) && stackSize != 0) { + while (stackSize < 20 * 1024 * 1024) { + stackSize *= 2; err = pthread_attr_setstacksize(&attrib, stackSize); if (err != 0) { fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); - if (pthread_attr_destroy(&attrib) != 0) - fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); - - return err; - } - } - - int err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); - - // Probably a stack size error since system limits can be different from PTHREAD_STACK_MIN - // Attempt to grow the stack within reason. - if ((err == EINVAL) && stackSize != 0) { - while (stackSize < 20 * 1024 * 1024) { - stackSize *= 2; - err = pthread_attr_setstacksize(&attrib, stackSize); + err = pthread_attr_destroy(&attrib); if (err != 0) { - fprintf(stderr, "pthread_attr_setstacksize failed: %s\n", strerror(err)); - if (pthread_attr_destroy(&attrib)) - fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); - - return err; + fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); } - err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); - if (err != EINVAL) return 0; - debug_print("pthread_create returned EINVAL, doubling stack size\n"); + return; } + + create_err = pthread_create(&thread, &attrib, ThreadTrampoline, args.get()); + if (create_err != EINVAL) break; + debug_print("pthread_create returned EINVAL, doubling stack size\n"); } - return 0; - }; - - args->entry_args = threadArgument; - args->entry_function = function; - -#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP - if (thread_create_stacksize(attrib)) { - thread = 0; - return; } -#endif int cores = 0; cpu_set_t* cpuset = nullptr; @@ -167,23 +154,22 @@ class os_thread { for (int i = 0; i < cores; i++) { CPU_SET_S(i, CPU_ALLOC_SIZE(cores), cpuset); } -#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP - err = pthread_attr_setaffinity_np(&attrib, CPU_ALLOC_SIZE(cores), cpuset); -#else err = pthread_setaffinity_np(thread, CPU_ALLOC_SIZE(cores), cpuset); -#endif CPU_FREE(cpuset); if (err != 0) { fprintf(stderr, "pthread_setaffinity_np failed: %s\n", strerror(err)); return; } } - -#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP - if (thread_create_stacksize(attrib)) + if (create_err == 0) + args.release(); + else thread = 0; -#endif - return; + + err = pthread_attr_destroy(&attrib); + if (err != 0) { + fprintf(stderr, "pthread_attr_destroy failed: %s\n", strerror(err)); + } } os_thread(os_thread&& rhs) {