From 3fd285e26ba194fd7d944f72ffcff415898d9017 Mon Sep 17 00:00:00 2001 From: foreman Date: Fri, 25 Oct 2019 17:08:39 -0500 Subject: [PATCH] P4 to Git Change 2019864 by axie@axie_opencl_pal on 2019/10/25 18:03:56 SWDEV-204995 - Linux Pro: Houdini18 Application hang is seen with pyro sample on OpenCL selection. The new Houdini application has around 286.6K byte TLS. In Linux, the TLS resides in thread stack. TLS is allocated and initialized during pthread_create. If command queue thread stack size is only 256k byte, pthread_create function failed with return value EINVAL. The above information is verified by this test: I printed out the address of a __thread variable. Then I printed out an address of a local variable. I confirmed both variables are in the same memory segment according to /proc/id/maps. This memory segment is same size of CQ_THREAD_STACK_SIZE and changed with this environment variable. The __thread variable is 286.6K byte away from the bottom of the stack but still inside the stack. I have added printf to verify function guessTlsSize can guess tlsSize correctly. And pthread_create succeeded in first invocation with tls size adjustment. Tests: 1. Test houdini - PASS 2. http://ocltc.amd.com:8111/viewModification.html?modId=128021&personal=true&tab=vcsModificationBuilds ReviewBoard: http://ocltc.amd.com/reviews/r/18175 Affected files ... ... //depot/stg/opencl/drivers/opencl/runtime/os/os_posix.cpp#47 edit [ROCm/clr commit: be8023429af328568955e39ffe4b807ca16fd150] --- projects/clr/rocclr/runtime/os/os_posix.cpp | 35 ++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/projects/clr/rocclr/runtime/os/os_posix.cpp b/projects/clr/rocclr/runtime/os/os_posix.cpp index 9b7e33e317..3617d36d05 100644 --- a/projects/clr/rocclr/runtime/os/os_posix.cpp +++ b/projects/clr/rocclr/runtime/os/os_posix.cpp @@ -394,6 +394,36 @@ bool Os::isThreadAlive(const Thread& thread) { return ::pthread_kill((pthread_t)thread.handle(), 0) == 0; } +static size_t tlsSize = 0; + +// Try to guess the size of TLS (plus some frames) +void* guessTlsSizeThread(void* param) { + address stackBase; + address currentFrame; + size_t stackSize; + Os::currentStackInfo(&stackBase, &stackSize); + currentFrame = reinterpret_cast
(&stackSize); + tlsSize = stackBase - currentFrame; + // align up to page boundary + tlsSize = alignUp(tlsSize, amd::Os::pageSize()); + return NULL; +} + +static void guessTlsSize(void) { + int retval; + pthread_t handle; + pthread_attr_t threadAttr; + + ::pthread_attr_init(&threadAttr); + retval = ::pthread_create(&handle, &threadAttr, guessTlsSizeThread, NULL); + if (retval == 0) { + pthread_join(handle, NULL); + } else { + fatal("pthread_create() failed with default stack size"); + } + ::pthread_attr_destroy(&threadAttr); +} + const void* Os::createOsThread(amd::Thread* thread) { pthread_attr_t threadAttr; ::pthread_attr_init(&threadAttr); @@ -403,7 +433,10 @@ const void* Os::createOsThread(amd::Thread* thread) { if (0 != ::pthread_attr_getguardsize(&threadAttr, &guardsize)) { fatal("pthread_attr_getguardsize() failed"); } - ::pthread_attr_setstacksize(&threadAttr, thread->stackSize_ + guardsize); + + static std::once_flag initOnce; + std::call_once(initOnce, guessTlsSize); + ::pthread_attr_setstacksize(&threadAttr, thread->stackSize_ + guardsize + tlsSize); } // We never plan the use join, so free the resources now.