From 475d928be8fd885cd486842e75c61c93f93a74bf Mon Sep 17 00:00:00 2001 From: Ravi C Akkenapally Date: Wed, 17 Mar 2021 12:33:59 -0700 Subject: [PATCH] SWDEV-276827 - Stream Operations: match API with CUDA Change-Id: Ia79a950561e97df3b2229f76f5512dede2babf98 --- include/hip/amd_detail/hip_runtime_api.h | 28 ++-- rocclr/hip_stream_ops.cpp | 16 +- .../streamOperations/hipstream_operations.cpp | 152 ++++++++++++++++-- 3 files changed, 166 insertions(+), 30 deletions(-) diff --git a/include/hip/amd_detail/hip_runtime_api.h b/include/hip/amd_detail/hip_runtime_api.h index 8ce3bdac5f..9f04967164 100644 --- a/include/hip/amd_detail/hip_runtime_api.h +++ b/include/hip/amd_detail/hip_runtime_api.h @@ -1241,11 +1241,12 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback * @brief Enqueues a wait command to the stream. * * @param [in] stream - Stream identifier - * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag. + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag * @param [in] value - Value to be used in compare operation - * @param [in] mask - Mask to be applied on value at memory before it is compared with value * @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte - * hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor. + * hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor + * @param [in] mask - Mask to be applied on value at memory before it is compared with value, + * default value is set to enable every bit * * @returns #hipSuccess, #hipErrorInvalidValue * @@ -1266,17 +1267,19 @@ hipError_t hipStreamAddCallback(hipStream_t stream, hipStreamCallback_t callback * hipStreamWriteValue32, hipDeviceGetAttribute */ -hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, uint32_t mask, unsigned int flags); +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags, + uint32_t mask __dparm(0xFFFFFFFF)); /** * @brief Enqueues a wait command to the stream. * * @param [in] stream - Stream identifier - * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag. + * @param [in] ptr - Pointer to memory object allocated using 'hipMallocSignalMemory' flag * @param [in] value - Value to be used in compare operation - * @param [in] mask - Mask to be applied on value at memory before it is compared with value. * @param [in] flags - Defines the compare operation, supported values are hipStreamWaitValueGte * hipStreamWaitValueEq, hipStreamWaitValueAnd and hipStreamWaitValueNor. + * @param [in] mask - Mask to be applied on value at memory before it is compared with value + * default value is set to enable every bit * * @returns #hipSuccess, #hipErrorInvalidValue * @@ -1297,14 +1300,16 @@ hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, ui * hipStreamWriteValue32, hipDeviceGetAttribute */ -hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, uint64_t mask, unsigned int flags); +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags, + uint64_t mask __dparm(0xFFFFFFFFFFFFFFFF)); /** * @brief Enqueues a write command to the stream. * * @param [in] stream - Stream identifier - * @param [in] ptr - Pointer to a GPU accessible memory object. + * @param [in] ptr - Pointer to a GPU accessible memory object * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases * * @returns #hipSuccess, #hipErrorInvalidValue * @@ -1314,14 +1319,15 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, ui * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, * hipStreamWaitValue64 */ -hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value); +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags); /** * @brief Enqueues a write command to the stream. * * @param [in] stream - Stream identifier - * @param [in] ptr - Pointer to a GPU accessible memory object. + * @param [in] ptr - Pointer to a GPU accessible memory object * @param [in] value - Value to be written + * @param [in] flags - reserved, ignored for now, will be used in future releases * * @returns #hipSuccess, #hipErrorInvalidValue * @@ -1331,7 +1337,7 @@ hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value); * @see hipExtMallocWithFlags, hipFree, hipStreamWriteValue32, hipStreamWaitValue32, * hipStreamWaitValue64 */ -hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value); +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags); // end doxygen Stream Memory Operations diff --git a/rocclr/hip_stream_ops.cpp b/rocclr/hip_stream_ops.cpp index 213d8fe638..06018e639c 100644 --- a/rocclr/hip_stream_ops.cpp +++ b/rocclr/hip_stream_ops.cpp @@ -76,8 +76,8 @@ hipError_t ihipStreamOperation(hipStream_t stream, cl_command_type cmdType, void return hipSuccess; } -hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, uint32_t mask, - unsigned int flags) { +hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags, + uint32_t mask) { HIP_INIT_API(hipStreamWaitValue32, stream, ptr, value, mask, flags); // NOTE: ptr corresponds to a HSA Signal memeory which is 64 bits. // 32 bit value and mask are converted to 64-bit values. @@ -91,8 +91,8 @@ hipError_t hipStreamWaitValue32(hipStream_t stream, void* ptr, int32_t value, ui 0)); // sizeBytes un-used for wait, set it to 0 } -hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, uint64_t mask, - unsigned int flags) { +hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags, + uint64_t mask) { HIP_INIT_API(hipStreamWaitValue64, stream, ptr, value, mask, flags); HIP_RETURN_DURATION(ihipStreamOperation( stream, @@ -104,8 +104,8 @@ hipError_t hipStreamWaitValue64(hipStream_t stream, void* ptr, int64_t value, ui 0)); // sizeBytes un-used for wait, set it to 0 } -hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value) { - HIP_INIT_API(hipStreamWriteValue32, stream, ptr, value); +hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value, unsigned int flags) { + HIP_INIT_API(hipStreamWriteValue32, stream, ptr, value, flags); HIP_RETURN_DURATION(ihipStreamOperation( stream, ROCCLR_COMMAND_STREAM_WRITE_VALUE, @@ -116,8 +116,8 @@ hipError_t hipStreamWriteValue32(hipStream_t stream, void* ptr, int32_t value) { 4)); } -hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value) { - HIP_INIT_API(hipStreamWriteValue64, stream, ptr, value); +hipError_t hipStreamWriteValue64(hipStream_t stream, void* ptr, int64_t value, unsigned int flags) { + HIP_INIT_API(hipStreamWriteValue64, stream, ptr, value, flags); HIP_RETURN_DURATION(ihipStreamOperation( stream, ROCCLR_COMMAND_STREAM_WRITE_VALUE, diff --git a/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp b/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp index 733b65098a..b3e4206f18 100644 --- a/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp +++ b/tests/src/runtimeApi/streamOperations/hipstream_operations.cpp @@ -51,6 +51,7 @@ THE SOFTWARE. // Random predefiend 32 and 64 bit values constexpr int32_t value32 = 0x70F0F0FF; constexpr int64_t value64 = 0x7FFF0000FFFF0000; +constexpr unsigned int writeFlag = 0; constexpr float SLEEP_MS = 100; void testWrite() { @@ -77,8 +78,8 @@ void testWrite() { hipHostRegister(host_ptr32, sizeof(int32_t), 0); // Test writting registered host pointer - HIPCHECK(hipStreamWriteValue64(stream, host_ptr64, value64)); - HIPCHECK(hipStreamWriteValue32(stream, host_ptr32, value32)); + HIPCHECK(hipStreamWriteValue64(stream, host_ptr64, value64, writeFlag)); + HIPCHECK(hipStreamWriteValue32(stream, host_ptr32, value32, writeFlag)); hipStreamSynchronize(stream); HIPASSERT(*host_ptr64 == value64); @@ -92,15 +93,15 @@ void testWrite() { *host_ptr64 = 0x0; *host_ptr32 = 0x0; - HIPCHECK(hipStreamWriteValue64(stream, device_ptr64, value64)); - HIPCHECK(hipStreamWriteValue32(stream, device_ptr32, value32)); + HIPCHECK(hipStreamWriteValue64(stream, device_ptr64, value64, writeFlag)); + HIPCHECK(hipStreamWriteValue32(stream, device_ptr32, value32, writeFlag)); hipStreamSynchronize(stream); HIPASSERT(*host_ptr64 == value64); HIPASSERT(*host_ptr32 == value32); // Test Writing to Signal Memory - HIPCHECK(hipStreamWriteValue64(stream, signalPtr, value64)); + HIPCHECK(hipStreamWriteValue64(stream, signalPtr, value64, writeFlag)); hipStreamSynchronize(stream); HIPASSERT(*signalPtr == value64); @@ -142,7 +143,6 @@ void testWait() { int64_t signalValuePass; }; - TEST_WAIT testCases[] = { { // mask will ignore few MSB bits @@ -200,11 +200,82 @@ void testWait() { } }; + struct TEST_WAIT32_NO_MASK { + int compareOp; + int32_t waitValue; + int32_t signalValueFail; + int32_t signalValuePass; + }; + + // default mask 0xFFFFFFFF will be used. + TEST_WAIT32_NO_MASK testCasesNoMask32[] = { + { + hipStreamWaitValueGte, + 0x7FFF0001, + 0x7FFF0000, + 0x7FFF0010 + }, + { + hipStreamWaitValueEq, + 0x7FFFFFFF, + 0x7FFF0000, + 0x7FFFFFFF + }, + { + hipStreamWaitValueAnd, + 0x70F0F0F0, + 0x0F0F0F0F, + 0X1F0F0F0F + }, + { + hipStreamWaitValueNor, + 0x7AAAAAAA, + static_cast(0x85555555), + static_cast(0x9AAAAAAA) + } + }; + + struct TEST_WAIT64_NO_MASK { + int compareOp; + int64_t waitValue; + int64_t signalValueFail; + int64_t signalValuePass; + }; + + // default mask 0xFFFFFFFFFFFFFFFF will be used. + TEST_WAIT64_NO_MASK testCasesNoMask64[] = { + { + hipStreamWaitValueGte, + 0x7FFFFFFFFFFF0001, + 0x7FFFFFFFFFFF0000, + 0x7FFFFFFFFFFF0001 + }, + { + hipStreamWaitValueEq, + 0x7FFFFFFFFFFFFFFF, + 0x7FFFFFFF0FFF0000, + 0x7FFFFFFFFFFFFFFF + }, + { + hipStreamWaitValueAnd, + 0x70F0F0F0F0F0F0F0, + 0x0F0F0F0F0F0F0F0F, + 0X1F0F0F0F0F0F0F0F + }, + { + hipStreamWaitValueNor, + 0x4724724747247247, + static_cast(0xbddbddbdbddbddbd), + static_cast(0xbddbddbdbddbddb3) + } + }; + if (!streamWaitValueSupported()) { std::cout << " hipStreamWaitValue: not supported on this device , skipping ... \n"; return; } - std::cout << " hipStreamWaitValue: testing ... \n"; + std::cout << " hipStreamWaitValue32: testing ... \n"; + std::cout << " hipStreamWaitValue64: testing ... \n"; hipStream_t stream; hipStreamCreate(&stream); @@ -215,6 +286,7 @@ void testWait() { hipHostRegister(dataPtr32, sizeof(int32_t), 0); // We run all test cases twice + // Run-1: streamWait is blocking (wait conditions is false) // Run-2: streamWait is non-blocking (wait condition is true) for (int run = 0; run < 2; run++) { @@ -224,8 +296,8 @@ void testWait() { *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; *dataPtr64 = DATA_INIT; - HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.mask, tc.compareOp)); - HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE)); + HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp, tc.mask)); + HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag)); if (isBlocking) { // Trigger an implict flush and verify stream has pending work. @@ -243,8 +315,8 @@ void testWait() { *dataPtr32 = DATA_INIT; HIPCHECK(hipStreamWaitValue32(stream, signalPtr, static_cast(tc.waitValue), - static_cast(tc.mask), tc.compareOp)); - HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE)); + tc.compareOp, static_cast(tc.mask))); + HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag)); if (isBlocking) { // For DEBUG only @@ -262,6 +334,64 @@ void testWait() { } } + std::cout << " hipStreamWaitValue32 with default mask: testing ... \n"; + // Run-1: streamWait is blocking (wait conditions is false) + // Run-2: streamWait is non-blocking (wait condition is true) + for (int run = 0; run < 2; run++) { + bool isBlocking = run == 0; + + for (const auto& tc : testCasesNoMask32) { + *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; + *dataPtr32 = DATA_INIT; + + HIPCHECK(hipStreamWaitValue32(stream, signalPtr, tc.waitValue, tc.compareOp)); + HIPCHECK(hipStreamWriteValue32(stream, dataPtr32, DATA_UPDATE, writeFlag)); + + if (isBlocking) { + // For DEBUG only + // usleep(500); + // HIPASSERT(*dataPtr32 == DATA_INIT); + + // Trigger an implict flush and verify stream has pending work. + HIPASSERT(hipStreamQuery(stream) == hipErrorNotReady); + + // update signal to unblock the wait. + *signalPtr = tc.signalValuePass; + } + hipStreamSynchronize(stream); + HIPASSERT(*dataPtr32 == DATA_UPDATE); + } + } + + std::cout << " hipStreamWaitValue64 with default mask: testing ... \n"; + // Run-1: streamWait is blocking (wait conditions is false) + // Run-2: streamWait is non-blocking (wait condition is true) + for (int run = 0; run < 2; run++) { + bool isBlocking = run == 0; + + for (const auto& tc : testCasesNoMask64) { + *signalPtr = isBlocking ? tc.signalValueFail : tc.signalValuePass; + *dataPtr64 = DATA_INIT; + + HIPCHECK(hipStreamWaitValue64(stream, signalPtr, tc.waitValue, tc.compareOp)); + HIPCHECK(hipStreamWriteValue64(stream, dataPtr64, DATA_UPDATE, writeFlag)); + + if (isBlocking) { + // For DEBUG only + // usleep(500); + // HIPASSERT(*dataPtr64 == DATA_INIT); + + // Trigger an implict flush and verify stream has pending work. + HIPASSERT(hipStreamQuery(stream) == hipErrorNotReady); + + // update signal to unblock the wait. + *signalPtr = tc.signalValuePass; + } + hipStreamSynchronize(stream); + HIPASSERT(*dataPtr64 == DATA_UPDATE); + } + } + // Cleanup HIPCHECK(hipFree(signalPtr)); hipHostUnregister(dataPtr64);