Add event controls for release fences.

Env var : HIP_EVENT_SYS_RELEASE
Event allocation flags : hipEventReleaseToDevice, hipEventReleaseToSystem
   (remove hipEventDisableSystemRelease)

Update test for new functionality.


[ROCm/clr commit: 942ec0eff8]
This commit is contained in:
Ben Sander
2017-05-27 16:01:23 -05:00
parent d6e8f5bbdc
commit 0ca3262f0a
5 changed files with 48 additions and 20 deletions
@@ -106,7 +106,8 @@ enum hipLimit_t
#define hipEventBlockingSync 0x1 ///< Waiting will yield CPU. Power-friendly and usage-friendly but may increase latency.
#define hipEventDisableTiming 0x2 ///< Disable event's capability to record timing information. May improve performance.
#define hipEventInterprocess 0x4 ///< Event can support IPC. @warning - not supported in HIP.
#define hipEventDisableSystemRelease 0x80000000 /// < Disable the system-scope release that event normally performs when it records. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms.
#define hipEventReleaseToDevice 0x40000000 /// < Use a device-scope release when recording this event. This flag is useful to obtain more precise timings of commands between events. The flag is a no-op on CUDA platforms.
#define hipEventReleaseToSystem 0x80000000 /// < Use a system-scope release that when recording this event. This flag is useful to make non-coherent host memory visible to the host. The flag is a no-op on CUDA platforms.
//! Flags that can be used with hipHostMalloc
@@ -58,7 +58,8 @@ hipMemcpyHostToHost
#define hipEventBlockingSync cudaEventBlockingSync
#define hipEventDisableTiming cudaEventDisableTiming
#define hipEventInterprocess cudaEventInterprocess
#define hipEventDisableSystemRelease cudaEventDefault /* no-op on CUDA platform */
#define hipEventReleaseToDevice 0 /* no-op on CUDA platform */
#define hipEventReleaseToSystem 0 /* no-op on CUDA platform */
#define hipHostMallocDefault cudaHostAllocDefault
+9 -2
View File
@@ -95,8 +95,15 @@ hipError_t ihipEventCreate(hipEvent_t* event, unsigned flags)
unsigned supportedFlags = hipEventDefault
| hipEventBlockingSync
| hipEventDisableTiming
| hipEventDisableSystemRelease;
if ((flags & ~supportedFlags) == 0) {
| hipEventReleaseToDevice
| hipEventReleaseToSystem
;
const unsigned releaseFlags = (hipEventReleaseToDevice | hipEventReleaseToSystem);
const bool illegalFlags = (flags & ~supportedFlags) || // can't set any unsupported flags.
(flags & releaseFlags) == releaseFlags; // can't set both
if (!illegalFlags) {
ihipEvent_t *eh = new ihipEvent_t(flags);
*event = eh;
+17 -7
View File
@@ -93,8 +93,11 @@ int HIP_SYNC_HOST_ALLOC = 1;
// Sync on host between
int HIP_SYNC_NULL_STREAM = 1;
// HIP needs to change some behavior based on HCC_OPT_FLUSH :
int HCC_OPT_FLUSH = 0;
int HIP_EVENT_SYS_RELEASE=0;
@@ -330,12 +333,18 @@ void ihipStream_t::locked_recordEvent(hipEvent_t event)
// Lock the stream to prevent simultaneous access
LockedAccessor_StreamCrit_t crit(_criticalData);
#if USE_NO_SCOPE
//printf ("create_marker, flags = %x\n", event->_flags);
event->_marker = crit->_av.create_marker((event->_flags & hipEventDisableSystemRelease) ? hc::no_scope : hc::system_scope);
#else
event->_marker = crit->_av.create_marker((event->_flags & hipEventDisableSystemRelease) ? hc::accelerator_scope : hc::system_scope);
#endif
auto scopeFlag = hc::accelerator_scope;
// The env var HIP_EVENT_SYS_RELEASE sets the default,
// The explicit flags override the env var (if specified)
if (event->_flags & hipEventReleaseToSystem) {
scopeFlag = hc::system_scope;
} else if (event->_flags & hipEventReleaseToDevice) {
scopeFlag = hc::accelerator_scope;
} else {
scopeFlag = HIP_EVENT_SYS_RELEASE ? hc::system_scope : hc::accelerator_scope;
}
event->_marker = crit->_av.create_marker(scopeFlag);
};
//=============================================================================
@@ -1221,7 +1230,8 @@ void HipReadEnv()
READ_ENV_I(release, HIP_COHERENT_HOST_ALLOC, 0, "If set, all host memory will be allocated as fine-grained system memory. This allows threadfence_system to work but prevents host memory from being cached on GPU which may have performance impact.");
READ_ENV_I(release, HCC_OPT_FLUSH, 0, "Note this flag also impacts HCC. When set, use agent-scope flush rather than system-scope flush when possible.");
READ_ENV_I(release, HCC_OPT_FLUSH, 0, "When set, use agent-scope fence operations rather than system-scope fence operationsflush when possible. This flag controls both HIP and HCC behavior.");
READ_ENV_I(release, HIP_EVENT_SYS_RELEASE, 0, "If set, event are created with hipEventReleaseToSystem by default. If 0, events are created with hipEventReleaseToDevice by default. The defaults can be overridden by specifying hipEventReleaseToSystem or hipEventReleaseToDevice flag when creating the event.");
// Some flags have both compile-time and runtime flags - generate a warning if user enables the runtime flag but the compile-time flag is disabled.
if (HIP_DB && !COMPILE_HIP_DB) {
@@ -49,9 +49,12 @@ __global__ void Set(int *Ad, int val){
std::vector<std::string> syncMsg = {"event", "stream", "device"};
void CheckHostPointer(int numElements, int *ptr, int syncMethod, std::string msg)
void CheckHostPointer(int numElements, int *ptr, unsigned eventFlags, int syncMethod, std::string msg)
{
std::cerr << "test: CheckHostPointer " << msg
<< " eventFlags = " << std::hex << eventFlags
<< ((eventFlags & hipEventReleaseToDevice) ? " hipEventReleaseToDevice" : "")
<< ((eventFlags & hipEventReleaseToSystem) ? " hipEventReleaseToSystem" : "")
<< " ptr=" << ptr
<< " syncMethod=" << syncMsg[syncMethod] << "\n";
@@ -60,7 +63,7 @@ void CheckHostPointer(int numElements, int *ptr, int syncMethod, std::string msg
// Init:
HIPCHECK(hipStreamCreate(&s));
HIPCHECK(hipEventCreateWithFlags(&e, hipEventDisableSystemRelease));
HIPCHECK(hipEventCreateWithFlags(&e, eventFlags))
dim3 dimBlock(64,1,1);
dim3 dimGrid(numElements/dimBlock.x,1,1);
@@ -161,18 +164,24 @@ int main(){
int *A = nullptr;
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocNonCoherent));
const char *ptrType = "non-coherent"; // TODO
//CheckHostPointer(numElements, A, SYNC_DEVICE, ptrType);
//CheckHostPointer(numElements, A, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, SYNC_EVENT, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType);
// agent-scope releases don't provide host visibility, don't use them here:
}
if (0) { // TODO, remove me
if (1) {
int *A = nullptr;
HIPCHECK(hipHostMalloc((void**)&A, sizeBytes, hipHostMallocCoherent));
const char *ptrType = "coherent";
CheckHostPointer(numElements, A, SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, SYNC_EVENT, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToDevice, SYNC_EVENT, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_DEVICE, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_STREAM, ptrType);
CheckHostPointer(numElements, A, hipEventReleaseToSystem, SYNC_EVENT, ptrType);
}