SWDEV-276827 - Stream Operations: match API with CUDA

Change-Id: Ia79a950561e97df3b2229f76f5512dede2babf98
这个提交包含在:
Ravi C Akkenapally
2021-03-17 12:33:59 -07:00
父节点 4ba39503bf
当前提交 475d928be8
修改 3 个文件,包含 166 行新增30 行删除
+17 -11
查看文件
@@ -1241,11 +1241,12 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback
* @brief Enqueues a wait command to the stream.
*
* @param [in] stream - Stream identifier
* @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag.
* @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag
* @param [in] value - Value to be used in compare operation
* @param [in] mask - Mask to be applied on value at memory before it is compared with value
* @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte
* hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor.
* hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor
* @param [in] mask - Mask to be applied on value at memory before it is compared with value,
* default value is set to enable every bit
*
* @returns #hipSuccess, #hipErrorInvalidValue
*
@@ -1266,17 +1267,19 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback
* hipStreamWriteValue32, hipDeviceGetAttribute
*/
hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, uint32_t mask, unsigned int flags);
hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags,
uint32_t mask __dparm(0xFFFFFFFF));
/**
* @brief Enqueues a wait command to the stream.
*
* @param [in] stream - Stream identifier
* @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag.
* @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag
* @param [in] value - Value to be used in compare operation
* @param [in] mask - Mask to be applied on value at memory before it is compared with value.
* @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte
* hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor.
* @param [in] mask - Mask to be applied on value at memory before it is compared with value
* default value is set to enable every bit
*
* @returns #hipSuccess, #hipErrorInvalidValue
*
@@ -1297,14 +1300,16 @@ hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, ui
* hipStreamWriteValue32, hipDeviceGetAttribute
*/
hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, uint64_t mask, unsigned int flags);
hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags,
uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF));
/**
* @brief Enqueues a write command to the stream.
*
* @param [in] stream - Stream identifier
* @param [in] ptr - Pointer to a GPU accessible memory object.
* @param [in] ptr - Pointer to a GPU accessible memory object
* @param [in] value - Value to be written
* @param [in] flags - reserved, ignored for now, will be used in future releases
*
* @returns #hipSuccess, #hipErrorInvalidValue
*
@@ -1314,14 +1319,15 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, ui
* @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,
* hipStreamWaitValue64
*/
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value);
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags);
/**
* @brief Enqueues a write command to the stream.
*
* @param [in] stream - Stream identifier
* @param [in] ptr - Pointer to a GPU accessible memory object.
* @param [in] ptr - Pointer to a GPU accessible memory object
* @param [in] value - Value to be written
* @param [in] flags - reserved, ignored for now, will be used in future releases
*
* @returns #hipSuccess, #hipErrorInvalidValue
*
@@ -1331,7 +1337,7 @@ hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value);
* @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32,
* hipStreamWaitValue64
*/
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value);
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags);
// end doxygen Stream Memory Operations
+8 -8
查看文件
@@ -76,8 +76,8 @@ hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void
return hipSuccess;
}
hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, uint32_t mask,
unsigned int flags) {
hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags,
uint32_t mask) {
HIP_INIT_API(hipStreamWaitValue32, stream, ptr, value, mask, flags);
// NOTE: ptr corresponds to a HSA Signal memeory which is 64 bits.
// 32 bit value and mask are converted to 64-bit values.
@@ -91,8 +91,8 @@ hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, ui
0)); // sizeBytes un-used for wait, set it to 0
}
hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, uint64_t mask,
unsigned int flags) {
hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags,
uint64_t mask) {
HIP_INIT_API(hipStreamWaitValue64, stream, ptr, value, mask, flags);
HIP_RETURN_DURATION(ihipStreamOperation(
stream,
@@ -104,8 +104,8 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, ui
0)); // sizeBytes un-used for wait, set it to 0
}
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value) {
HIP_INIT_API(hipStreamWriteValue32, stream, ptr, value);
hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags) {
HIP_INIT_API(hipStreamWriteValue32, stream, ptr, value, flags);
HIP_RETURN_DURATION(ihipStreamOperation(
stream,
ROCCLR_COMMAND_STREAM_WRITE_VALUE,
@@ -116,8 +116,8 @@ hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value) {
4));
}
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value) {
HIP_INIT_API(hipStreamWriteValue64, stream, ptr, value);
hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags) {
HIP_INIT_API(hipStreamWriteValue64, stream, ptr, value, flags);
HIP_RETURN_DURATION(ihipStreamOperation(
stream,
ROCCLR_COMMAND_STREAM_WRITE_VALUE,
@@ -51,6 +51,7 @@ THE SOFTWARE.
// Random predefiend 32 and 64 bit values
constexpr int32_t value32 = 0x70F0F0FF;
constexpr int64_t value64 = 0x7FFF0000FFFF0000;
constexpr unsigned int writeFlag = 0;
constexpr float SLEEP_MS = 100;
void testWrite() {
@@ -77,8 +78,8 @@ void testWrite() {
hipHostRegister(host_ptr32, sizeof(int32_t), 0);
// Test writting registered host pointer
HIPCHECK(hipStreamWriteValue64(stream, host_ptr64, value64));
HIPCHECK(hipStreamWriteValue32(stream, host_ptr32, value32));
HIPCHECK(hipStreamWriteValue64(stream, host_ptr64, value64, writeFlag));
HIPCHECK(hipStreamWriteValue32(stream, host_ptr32, value32, writeFlag));
hipStreamSynchronize(stream);
HIPASSERT(*host_ptr64 == value64);
@@ -92,15 +93,15 @@ void testWrite() {
*host_ptr64 = 0x0;
*host_ptr32 = 0x0;
HIPCHECK(hipStreamWriteValue64(stream, device_ptr64, value64));
HIPCHECK(hipStreamWriteValue32(stream, device_ptr32, value32));
HIPCHECK(hipStreamWriteValue64(stream, device_ptr64, value64, writeFlag));
HIPCHECK(hipStreamWriteValue32(stream, device_ptr32, value32, writeFlag));
hipStreamSynchronize(stream);
HIPASSERT(*host_ptr64 == value64);
HIPASSERT(*host_ptr32 == value32);
// Test Writing to Signal Memory
HIPCHECK(hipStreamWriteValue64(stream, signalPtr, value64));
HIPCHECK(hipStreamWriteValue64(stream, signalPtr, value64, writeFlag));
hipStreamSynchronize(stream);
HIPASSERT(*signalPtr == value64);
@@ -142,7 +143,6 @@ void testWait() {
int64_t signalValuePass;
};
TEST_WAIT testCases[] = {
{
// mask will ignore few MSB bits
@@ -200,11 +200,82 @@ void testWait() {
}
};
struct TEST_WAIT32_NO_MASK {
int compareOp;
int32_t waitValue;
int32_t signalValueFail;
int32_t signalValuePass;
};
// default mask 0xFFFFFFFF will be used.
TEST_WAIT32_NO_MASK testCasesNoMask32[] = {
{
hipStreamWaitValueGte,
0x7FFF0001,
0x7FFF0000,
0x7FFF0010
},
{
hipStreamWaitValueEq,
0x7FFFFFFF,
0x7FFF0000,
0x7FFFFFFF
},
{
hipStreamWaitValueAnd,
0x70F0F0F0,
0x0F0F0F0F,
0X1F0F0F0F
},
{
hipStreamWaitValueNor,
0x7AAAAAAA,
static_cast<int32_t>(0x85555555),
static_cast<int32_t>(0x9AAAAAAA)
}
};
struct TEST_WAIT64_NO_MASK {
int compareOp;
int64_t waitValue;
int64_t signalValueFail;
int64_t signalValuePass;
};
// default mask 0xFFFFFFFFFFFFFFFF will be used.
TEST_WAIT64_NO_MASK testCasesNoMask64[] = {
{
hipStreamWaitValueGte,
0x7FFFFFFFFFFF0001,
0x7FFFFFFFFFFF0000,
0x7FFFFFFFFFFF0001
},
{
hipStreamWaitValueEq,
0x7FFFFFFFFFFFFFFF,
0x7FFFFFFF0FFF0000,
0x7FFFFFFFFFFFFFFF
},
{
hipStreamWaitValueAnd,
0x70F0F0F0F0F0F0F0,
0x0F0F0F0F0F0F0F0F,
0X1F0F0F0F0F0F0F0F
},
{
hipStreamWaitValueNor,
0x4724724747247247,
static_cast<int64_t>(0xbddbddbdbddbddbd),
static_cast<int64_t>(0xbddbddbdbddbddb3)
}
};
if (!streamWaitValueSupported()) {
std::cout << " hipStreamWaitValue: not supported on this device , skipping ... \n";
return;
}
std::cout << " hipStreamWaitValue: testing ... \n";
std::cout << " hipStreamWaitValue32: testing ... \n";
std::cout << " hipStreamWaitValue64: testing ... \n";
hipStream_t stream;
hipStreamCreate(&stream);
@@ -215,6 +286,7 @@ void testWait() {
hipHostRegister(dataPtr32, sizeof(int32_t), 0);
// We run all test cases twice
// Run-1: streamWait is blocking (wait conditions is false)
// Run-2: streamWait is non-blocking (wait condition is true)
for (int run = 0; run < 2; run++) {
@@ -224,8 +296,8 @@ void testWait() {
*signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass;
*dataPtr64 = DATA_INIT;
HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.mask, tc.compareOp));
HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE));
HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp, tc.mask));
HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag));
if (isBlocking) {
// Trigger an implict flush and verify stream has pending work.
@@ -243,8 +315,8 @@ void testWait() {
*dataPtr32 = DATA_INIT;
HIPCHECK(hipStreamWaitValue32(stream, signalPtr, static_cast<int32_t>(tc.waitValue),
static_cast<uint32_t>(tc.mask), tc.compareOp));
HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE));
tc.compareOp, static_cast<uint32_t>(tc.mask)));
HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag));
if (isBlocking) {
// For DEBUG only
@@ -262,6 +334,64 @@ void testWait() {
}
}
std::cout << " hipStreamWaitValue32 with default mask: testing ... \n";
// Run-1: streamWait is blocking (wait conditions is false)
// Run-2: streamWait is non-blocking (wait condition is true)
for (int run = 0; run < 2; run++) {
bool isBlocking = run == 0;
for (const auto& tc : testCasesNoMask32) {
*signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass;
*dataPtr32 = DATA_INIT;
HIPCHECK(hipStreamWaitValue32(stream, signalPtr, tc.waitValue, tc.compareOp));
HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag));
if (isBlocking) {
// For DEBUG only
// usleep(500);
// HIPASSERT(*dataPtr32 == DATA_INIT);
// Trigger an implict flush and verify stream has pending work.
HIPASSERT(hipStreamQuery(stream) == hipErrorNotReady);
// update signal to unblock the wait.
*signalPtr = tc.signalValuePass;
}
hipStreamSynchronize(stream);
HIPASSERT(*dataPtr32 == DATA_UPDATE);
}
}
std::cout << " hipStreamWaitValue64 with default mask: testing ... \n";
// Run-1: streamWait is blocking (wait conditions is false)
// Run-2: streamWait is non-blocking (wait condition is true)
for (int run = 0; run < 2; run++) {
bool isBlocking = run == 0;
for (const auto& tc : testCasesNoMask64) {
*signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass;
*dataPtr64 = DATA_INIT;
HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp));
HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag));
if (isBlocking) {
// For DEBUG only
// usleep(500);
// HIPASSERT(*dataPtr64 == DATA_INIT);
// Trigger an implict flush and verify stream has pending work.
HIPASSERT(hipStreamQuery(stream) == hipErrorNotReady);
// update signal to unblock the wait.
*signalPtr = tc.signalValuePass;
}
hipStreamSynchronize(stream);
HIPASSERT(*dataPtr64 == DATA_UPDATE);
}
}
// Cleanup
HIPCHECK(hipFree(signalPtr));
hipHostUnregister(dataPtr64);