From 0ce3886fc82ccc62b52283bbc1e244e290e8f0df Mon Sep 17 00:00:00 2001 From: ROCm CI Service Account <66695075+rocm-ci@users.noreply.github.com> Date: Mon, 28 Nov 2022 20:16:16 +0530 Subject: [PATCH] SWDEV-346657 - add more hipStreamAcb test (#3064) * SWDEV-346657 - add more hipStreamAcb test Change-Id: If94628ec9c285e9689ce471931b3c40b7d3ece84 --- catch/unit/stream/CMakeLists.txt | 2 + .../stream/hipStreamACb_StrmSyncTiming.cc | 138 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 catch/unit/stream/hipStreamACb_StrmSyncTiming.cc diff --git a/catch/unit/stream/CMakeLists.txt b/catch/unit/stream/CMakeLists.txt index 0fbb211593..548bb2e52c 100644 --- a/catch/unit/stream/CMakeLists.txt +++ b/catch/unit/stream/CMakeLists.txt @@ -18,6 +18,7 @@ set(TEST_SRC hipStreamQuery.cc hipStreamWaitEvent.cc hipDeviceGetStreamPriorityRange.cc + hipStreamACb_StrmSyncTiming.cc ) else() set(TEST_SRC @@ -38,6 +39,7 @@ set(TEST_SRC hipStreamSynchronize.cc hipStreamQuery.cc hipDeviceGetStreamPriorityRange.cc + hipStreamACb_StrmSyncTiming.cc ) # set_source_files_properties(hipStreamAttachMemAsync.cc PROPERTIES COMPILE_FLAGS -std=c++17) diff --git a/catch/unit/stream/hipStreamACb_StrmSyncTiming.cc b/catch/unit/stream/hipStreamACb_StrmSyncTiming.cc new file mode 100644 index 0000000000..a607604963 --- /dev/null +++ b/catch/unit/stream/hipStreamACb_StrmSyncTiming.cc @@ -0,0 +1,138 @@ +/* +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/** +Testcase Scenario : +Validate behaviour of HIP when multiple hipStreaAddCallback() are called over +multiple Threads. +*/ + +#include +#include +#include +#include +#include + +#ifdef __HIP_PLATFORM_AMD__ +#define HIPRT_CB +#endif + +#define SECONDS_TO_WAIT 2 +#define TO_MICROSECONDS 1000000 + +hipStream_t mystream; +size_t N_elmts = 4096; +bool cbDone = false; +std::atomic Data_mismatch{0}; + +__global__ void vector_square(float* C_d, float* A_d, size_t N_elmts) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N_elmts; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } + + // Delay the thread 1 + if (offset == 1) { + unsigned long long int wait_t = 3200000000, start = clock64(), cur; + do { + cur = clock64() - start; + } while (cur < wait_t); + } +} + +float *A_h, *C_h; + +static void HIPRT_CB Callback1(hipStream_t stream, hipError_t status, void* userData) { + (void)stream; + (void)status; + (void)userData; + // Validate the data + for (size_t i = 0; i < N_elmts; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + Data_mismatch++; + } + } + + // Delay the callback completion + std::this_thread::sleep_for(std::chrono::seconds(SECONDS_TO_WAIT)); + cbDone = true; +} + +/** + Test multiple hipStreamAddCallback() called over + multiple Threads. + */ +TEST_CASE("Unit_hipStreamAddCallback_StrmSyncTiming") { + float *A_d, *C_d; + size_t Nbytes = N_elmts * sizeof(float); + + A_h = (float*)malloc(Nbytes); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with Phi + i + for (size_t i = 0; i < N_elmts; i++) { + A_h[i] = 1.618f + i; + } + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (N_elmts + 255) / threadsPerBlock; + + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, mystream, C_d, A_d, + N_elmts); + HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + HIPCHECK(hipStreamAddCallback(mystream, Callback1, NULL, 0)); + + // Wait untill Callback() function changes the cbDone value to true + while (!cbDone) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + // Since the callback is supposed to be called only after an implicit stream + // synchronization, and the runtime cannot continue until the callback is done + // hipStreamSynchronize call should not take much time. + auto start = std::chrono::high_resolution_clock::now(); + HIPCHECK(hipStreamSynchronize(mystream)); + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stop - start); + + HIPCHECK(hipStreamDestroy(mystream)); + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + free(A_h); + free(C_h); + + REQUIRE(Data_mismatch.load() == 0); + // HIP runtime cannot proceed further in the queue until callback completes + // Stream synchronize should not have much task to do after callback + // It should just be an extra empty marker wait + // Therefore the hipStreamSynchronize() in the + // main thread should hardly take any time to complete. + REQUIRE(duration.count() < 100); +}