From 0ca3262f0a59ee8b5a2d5940369784b520f627a5 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sat, 27 May 2017 16:01:23 -0500 Subject: [PATCH] Add event controls for release fences. Env var : HIP_EVENT_SYS_RELEASE Event allocation flags : hipEventReleaseToDevice, hipEventReleaseToSystem (remove hipEventDisableSystemRelease) Update test for new functionality. [ROCm/clr commit: 942ec0eff8cded9995ee8335a27534243e681606] --- .../include/hip/hcc_detail/hip_runtime_api.h | 3 ++- .../include/hip/nvcc_detail/hip_runtime_api.h | 3 ++- projects/clr/hipamd/src/hip_event.cpp | 11 ++++++-- projects/clr/hipamd/src/hip_hcc.cpp | 24 ++++++++++++----- .../src/runtimeApi/memory/hipHostMalloc.cpp | 27 ++++++++++++------- 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h index a8db84c4f2..6059e1e92d 100644 --- a/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/hcc_detail/hip_runtime_api.h @@ -106,7 +106,8 @@ enum hipLimit_t #define hipEventBlockingSync 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency. #define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance. #define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP. -#define hipEventDisableSystemRelease 0x80000000 /// < Disable the system-scope release that event normally performs when it records. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms. +#define hipEventReleaseToDevice 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms. +#define hipEventReleaseToSystem 0x80000000 /// < Use a system-scope release that when recording this event. This flag is useful to make non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms. //! Flags that can be used with hipHostMalloc diff --git a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h index cbc7ed9f9c..b09c9323c7 100644 --- a/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h +++ b/projects/clr/hipamd/include/hip/nvcc_detail/hip_runtime_api.h @@ -58,7 +58,8 @@ hipMemcpyHostToHost #define hipEventBlockingSync cudaEventBlockingSync #define hipEventDisableTiming cudaEventDisableTiming #define hipEventInterprocess cudaEventInterprocess -#define hipEventDisableSystemRelease cudaEventDefault /* no-op on CUDA platform */ +#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */ +#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */ #define hipHostMallocDefault cudaHostAllocDefault diff --git a/projects/clr/hipamd/src/hip_event.cpp b/projects/clr/hipamd/src/hip_event.cpp index 71f6d8ed5b..2c31769718 100644 --- a/projects/clr/hipamd/src/hip_event.cpp +++ b/projects/clr/hipamd/src/hip_event.cpp @@ -95,8 +95,15 @@ hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags) unsigned supportedFlags = hipEventDefault | hipEventBlockingSync | hipEventDisableTiming - | hipEventDisableSystemRelease; - if ((flags & ~supportedFlags) == 0) { + | hipEventReleaseToDevice + | hipEventReleaseToSystem + ; + const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem); + + const bool illegalFlags = (flags & ~supportedFlags) || // can't set any unsupported flags. + (flags & releaseFlags) == releaseFlags; // can't set both + + if (!illegalFlags) { ihipEvent_t *eh = new ihipEvent_t(flags); *event = eh; diff --git a/projects/clr/hipamd/src/hip_hcc.cpp b/projects/clr/hipamd/src/hip_hcc.cpp index 5e13904521..4400e4596e 100644 --- a/projects/clr/hipamd/src/hip_hcc.cpp +++ b/projects/clr/hipamd/src/hip_hcc.cpp @@ -93,8 +93,11 @@ int HIP_SYNC_HOST_ALLOC = 1; // Sync on host between int HIP_SYNC_NULL_STREAM = 1; +// HIP needs to change some behavior based on HCC_OPT_FLUSH : int HCC_OPT_FLUSH = 0; +int HIP_EVENT_SYS_RELEASE=0; + @@ -330,12 +333,18 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event) // Lock the stream to prevent simultaneous access LockedAccessor_StreamCrit_t crit(_criticalData); -#if USE_NO_SCOPE - //printf ("create_marker, flags = %x\n", event->_flags); - event->_marker = crit->_av.create_marker((event->_flags & hipEventDisableSystemRelease) ? hc::no_scope : hc::system_scope); -#else - event->_marker = crit->_av.create_marker((event->_flags & hipEventDisableSystemRelease) ? hc::accelerator_scope : hc::system_scope); -#endif + auto scopeFlag = hc::accelerator_scope; + // The env var HIP_EVENT_SYS_RELEASE sets the default, + // The explicit flags override the env var (if specified) + if (event->_flags & hipEventReleaseToSystem) { + scopeFlag = hc::system_scope; + } else if (event->_flags & hipEventReleaseToDevice) { + scopeFlag = hc::accelerator_scope; + } else { + scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope; + } + + event->_marker = crit->_av.create_marker(scopeFlag); }; //============================================================================= @@ -1221,7 +1230,8 @@ void HipReadEnv() READ_ENV_I(release, HIP_COHERENT_HOST_ALLOC, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact."); - READ_ENV_I(release, HCC_OPT_FLUSH, 0, "Note this flag also impacts HCC. When set, use agent-scope flush rather than system-scope flush when possible."); + READ_ENV_I(release, HCC_OPT_FLUSH, 0, "When set, use agent-scope fence operations rather than system-scope fence operationsflush when possible. This flag controls both HIP and HCC behavior."); + READ_ENV_I(release, HIP_EVENT_SYS_RELEASE, 0, "If set, event are created with hipEventReleaseToSystem by default. If 0, events are created with hipEventReleaseToDevice by default. The defaults can be overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag when creating the event."); // Some flags have both compile-time and runtime flags - generate a warning if user enables the runtime flag but the compile-time flag is disabled. if (HIP_DB && !COMPILE_HIP_DB) { diff --git a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostMalloc.cpp b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostMalloc.cpp index 0e88570e17..54073e4901 100644 --- a/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostMalloc.cpp +++ b/projects/clr/hipamd/tests/src/runtimeApi/memory/hipHostMalloc.cpp @@ -49,9 +49,12 @@ __global__ void Set(int *Ad, int val){ std::vector syncMsg = {"event", "stream", "device"}; -void CheckHostPointer(int numElements, int *ptr, int syncMethod, std::string msg) +void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMethod, std::string msg) { std::cerr << "test: CheckHostPointer " << msg + << " eventFlags = " << std::hex << eventFlags + << ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "") + << ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "") << " ptr=" << ptr << " syncMethod=" << syncMsg[syncMethod] << "\n"; @@ -60,7 +63,7 @@ void CheckHostPointer(int numElements, int *ptr, int syncMethod, std::string msg // Init: HIPCHECK(hipStreamCreate(&s)); - HIPCHECK(hipEventCreateWithFlags(&e, hipEventDisableSystemRelease)); + HIPCHECK(hipEventCreateWithFlags(&e, eventFlags)) dim3 dimBlock(64,1,1); dim3 dimGrid(numElements/dimBlock.x,1,1); @@ -161,18 +164,24 @@ int main(){ int *A = nullptr; HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocNonCoherent)); const char *ptrType = "non-coherent"; // TODO - //CheckHostPointer(numElements, A, SYNC_DEVICE, ptrType); - //CheckHostPointer(numElements, A, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, SYNC_EVENT, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType); + + // agent-scope releases don't provide host visibility, don't use them here: } - if (0) { // TODO, remove me + if (1) { int *A = nullptr; HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent)); const char *ptrType = "coherent"; - CheckHostPointer(numElements, A, SYNC_DEVICE, ptrType); - CheckHostPointer(numElements, A, SYNC_STREAM, ptrType); - CheckHostPointer(numElements, A, SYNC_EVENT, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_EVENT, ptrType); + + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType); }