From 597bf599adc39e96614491fb649c37b4991b9d62 Mon Sep 17 00:00:00 2001 From: German Andryeyev Date: Mon, 2 Mar 2020 18:35:56 -0500 Subject: [PATCH] SWDEV-193956 - [hipclang-vdi-rocm][perf] ~45% to 50% of Performance drop on rocBLAS_int8 test Enable cudaSetDeviceFlags() api call. Use active wait by default for all devices. Change-Id: Ifc2ebe3dd9b0aa3fdbfbc9cb5c2cd8b3b726124f [ROCm/clr commit: b93d997fb78d6f2f4b3fa0ebff7687825c89e27a] --- projects/clr/hipamd/vdi/hip_context.cpp | 3 ++ .../clr/hipamd/vdi/hip_device_runtime.cpp | 28 +++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/projects/clr/hipamd/vdi/hip_context.cpp b/projects/clr/hipamd/vdi/hip_context.cpp index f28ca48239..6151f68c6a 100644 --- a/projects/clr/hipamd/vdi/hip_context.cpp +++ b/projects/clr/hipamd/vdi/hip_context.cpp @@ -50,6 +50,9 @@ void init() { amd::Context* context = new amd::Context(device, amd::Context::Info()); if (!context) return; + // Enable active wait on the device by default + devices[i]->SetActiveWait(true); + if (context && CL_SUCCESS != context->create(nullptr)) { context->release(); } else { diff --git a/projects/clr/hipamd/vdi/hip_device_runtime.cpp b/projects/clr/hipamd/vdi/hip_device_runtime.cpp index be2093a8c8..acf73fa656 100644 --- a/projects/clr/hipamd/vdi/hip_device_runtime.cpp +++ b/projects/clr/hipamd/vdi/hip_device_runtime.cpp @@ -491,16 +491,34 @@ hipError_t hipSetDevice ( int device ) { hipError_t hipSetDeviceFlags ( unsigned int flags ) { HIP_INIT_API(hipSetDeviceFlags, flags); - /* FIXME */ - /* Not all of Ctx may be implemented */ - - unsigned supportedFlags = + constexpr uint32_t supportedFlags = hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax; - if (flags & (~supportedFlags)) { + if (flags & ~supportedFlags) { HIP_RETURN(hipErrorInvalidValue); } + amd::Device* device = hip::getCurrentDevice()->devices()[0]; + switch (flags & hipDeviceScheduleMask) { + case hipDeviceScheduleAuto: + // Current behavior is different from the spec, due to MT usage in runtime + if (hip::host_device->devices().size() >= std::thread::hardware_concurrency()) { + device->SetActiveWait(false); + break; + } + // Fall through for active wait... + case hipDeviceScheduleSpin: + case hipDeviceScheduleYield: + // The both options falls into yield, because MT usage in runtime + device->SetActiveWait(true); + break; + case hipDeviceScheduleBlockingSync: + device->SetActiveWait(false); + break; + default: + break; + } + HIP_RETURN(hipSuccess); }