Add HIP_WAIT_MODE env var.

Also weaken cases where hipSetDeviceFlags returns hipErrorInvalidValue.

Change-Id: I7f113338be6fe498eaf1ab40fd0fd6b23849bb5e
This commit is contained in:
Ben Sander
2016-10-18 22:25:26 -05:00
parent 4acb7dd947
commit d4b23da91f
2 changed files with 12 additions and 5 deletions
+3 -2
View File
@@ -285,14 +285,15 @@ hipError_t hipSetDeviceFlags( unsigned int flags)
e = hipSuccess;
break;
default:
e = hipErrorInvalidValue;
e = hipSuccess; // TODO - should this be error? Map to Auto?
//e = hipErrorInvalidValue;
break;
}
}
unsigned supportedFlags = hipDeviceScheduleMask | hipDeviceMapHost | hipDeviceLmemResizeToMax;
if (flags & ~supportedFlags) {
if (flags & (~supportedFlags)) {
e = hipErrorInvalidValue;
}
} else {
+9 -3
View File
@@ -66,7 +66,7 @@ int HIP_ATP_MARKER= 0;
int HIP_DB= 0;
int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU identifiers */
int HIP_NUM_KERNELS_INFLIGHT = 128;
int HIP_FORCE_BLOCKING_SYNC = 0;
int HIP_WAIT_MODE = 0;
#define HIP_USE_PRODUCT_NAME 0
//#define DISABLE_COPY_EXT 1
@@ -285,8 +285,14 @@ void ihipStream_t::wait(LockedAccessor_StreamCrit_t &crit, bool assertQueueEmpty
} else {
assert(0); // bad wait mode.
}
if (HIP_WAIT_MODE == 1) {
waitMode = hc::hcWaitModeBlocked;
} else if (HIP_WAIT_MODE == 2) {
waitMode = hc::hcWaitModeActive;
}
crit->_av.wait(HIP_FORCE_BLOCKING_SYNC ? hc::hcWaitModeBlocked : waitMode);
crit->_av.wait(waitMode);
}
crit->_kernelCnt = 0;
@@ -1120,7 +1126,7 @@ void ihipInit()
READ_ENV_I(release, HIP_VISIBLE_DEVICES, CUDA_VISIBLE_DEVICES, "Only devices whose index is present in the secquence are visible to HIP applications and they are enumerated in the order of secquence" );
READ_ENV_I(release, HIP_FORCE_BLOCKING_SYNC, 0, "Force blocking synchronization for stream waits. This may increase latency but is friendlier to other processes. If 0, used .");
READ_ENV_I(release, HIP_WAIT_MODE, 0, "Force synchronization mode. 1= force yield, 2=force spin, 0=defaults specified in application");
READ_ENV_I(release, HIP_NUM_KERNELS_INFLIGHT, 128, "Max number of inflight kernels per stream before active synchronization is forced.");