diff --git a/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp b/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp new file mode 100644 index 0000000000..07acc4a591 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_AltEnqueue.cpp @@ -0,0 +1,187 @@ + /* +Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST: %t + * HIT_END + */ + + + +// Testcase Description: This test case is used to verify if the callback +// function called through hipStreamAddCallback() api completes the execution +// in order as hipStreamAddCallback() api queued in their respective streams + + + +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + + +hipStream_t mystream1, mystream2; +size_t Num = 4096; +std::vector Stream1_Order, Stream2_Order; + + +__global__ void vector_square(float* C_d, float* A_d, size_t Num) { + size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = gputhread; i < Num; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } + + // Delay thread 1 only in the GPU + if (gputhread == 1) { + unsigned long long int wait_t = 3200000000, start = clock64(), cur; + do { + cur = clock64() - start; + } while (cur < wait_t); + } +} + +float *A_h, *C_h, *A_h1, *C_h1; + +static void HIPRT_CB Callback_Stream1(hipStream_t stream, hipError_t status, + void* userData) { + for (size_t i = 0; i < Num; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + std::cout << "Data mismatch in stream1 at: " << i << std::endl; + } + } + + // Storing the int passed into this callback into Stream1_Order + // this will help verify the order in which this Callback function + // is called. + Stream1_Order.push_back(*(reinterpret_cast(userData))); + delete reinterpret_cast(userData); +} + +static void HIPRT_CB Callback_Stream2(hipStream_t stream, hipError_t status, + void* userData) { + for (size_t i = 0; i < Num; i++) { + if (C_h1[i] != A_h1[i] * A_h1[i]) { + std::cout << "Data mismatch in stream2 at: " << i << std::endl; + } + } + // Storing the int passed into this callback into Stream2_Order + // this will help verify the order in which this Callback function + // is called. + Stream2_Order.push_back(*(reinterpret_cast(userData))); + delete reinterpret_cast(userData); +} + +int main(int argc, char* argv[]) { + float *A_d, *C_d; + size_t Nbytes = Num * sizeof(float); + + A_h = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + A_h1 = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h1 = reinterpret_cast(malloc(Nbytes)); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with Phi + i + for (size_t i = 0; i < Num; i++) { + A_h[i] = 1.618f + i; + } + for (size_t i = 0; i < Num; i++) { + A_h1[i] = 1.618f + i; + } + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + HIPCHECK(hipStreamCreateWithFlags(&mystream1, hipStreamNonBlocking)); + HIPCHECK(hipStreamCreateWithFlags(&mystream2, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream1)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (Num + 255)/threadsPerBlock; + int *ptr = NULL; + int *ptr1 = NULL; + // Queing jobs in both mystream1/2 followed by hipStreamAddCallback + for (int i = 1; i < 5; ++i) { + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), + 0, mystream1, C_d, A_d, Num); + HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, + mystream1)); + ptr = new int; + *ptr = i; + HIPCHECK(hipStreamAddCallback(mystream1, Callback_Stream1, + reinterpret_cast(ptr), 0)); + + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), + 0, mystream2, C_d, A_d, Num); + HIPCHECK(hipMemcpyAsync(C_h1, C_d, Nbytes, + hipMemcpyDeviceToHost, mystream2)); + ptr1 = new int; + *ptr1 = i; + HIPCHECK(hipStreamAddCallback(mystream2, Callback_Stream2, + reinterpret_cast(ptr1), 0)); + } + + HIPCHECK(hipStreamSynchronize(mystream1)); + HIPCHECK(hipStreamSynchronize(mystream2)); + + HIPCHECK(hipStreamDestroy(mystream1)); + HIPCHECK(hipStreamDestroy(mystream2)); + + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + free(A_h); + free(C_h); + free(A_h1); + free(C_h1); + + // Checking if Stream1_Order has ints in sequencial order or not + int i = 1; + for (auto itr=Stream1_Order.begin(); itr != Stream1_Order.end(); ++itr) { + if (*itr != i) { + printf("hipStreamAddCallBack() did not execute in sequence"); + printf(" in first stream\n"); + failed("Unexpected behavior!"); + } + ++i; + } + + // Checking if Stream2_Order has ints in sequencial order or not + i = 1; + for (auto itr=Stream2_Order.begin(); itr != Stream2_Order.end(); ++itr) { + if (*itr != i) { + printf("hipStreamAddCallBack() did not execute in sequence"); + printf(" in second stream\n"); + failed("Unexpected behavior!"); + } + ++i; + } + passed(); +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp new file mode 100644 index 0000000000..3a25d3331c --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_MStrm_Mgpu.cpp @@ -0,0 +1,180 @@ +/* + Copyright (c) 2019-present Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +// Testcase Description: Streams are launched in individual GPUs with different +// kernel. Verify that all the kernels queued are executed before the callback. + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS -std=c++11 + * TEST: %t + * HIT_END + */ + +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + + +size_t N_ELMTS = 4096; + +// Data structure for holding and validating data +struct gpu_data { + int *int_ptr = NULL; + int gpu; + int acknowledge; +}; + +enum { + SUCCESS = 0, + KERNEL_EXECUTION_MISMATCH, + KERNEL_COMPUTATION_MISMATCH +}; + +__global__ void Add_Data(int* A_d, size_t N_ELMTS) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + for (size_t i = offset; i < N_ELMTS; i += stride) { + // Increment the value of A_d[i] by 1 + A_d[i] = A_d[i] + 1; + } +} + +// below kernel is just to load the gpu with multiple jobs +__global__ void Square_plus_one(int* A_d, int* C_d, size_t N_ELMTS) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + for (size_t i = offset; i < N_ELMTS; i += stride) { + C_d[i] = A_d[i]*A_d[i] + 1; + } +} + +static void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, + void* userData) { + gpu_data *ptr = reinterpret_cast(userData); + + // int_ptr in the passed userData will contain the data copied from device to + // host. Expected data in this field is the gpu ordinal. + if (*((*ptr).int_ptr) != (*ptr).gpu + 1) { + (*ptr).acknowledge = 100; // Assign unexpected value to indicate fail + } else { + (*ptr).acknowledge = (*ptr).gpu; // Assign the gpu ordinal received + } +} + +void launch_gpu(int gpu_ordinal) { + HIPCHECK(hipSetDevice(gpu_ordinal)); + int *A_d, *A_h, *C_h, *C_d; + size_t Nbytes = N_ELMTS * sizeof(int), Data_mismatch = 0; + bool cb = false; + A_h = (int *)malloc(Nbytes); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (int *)malloc(Nbytes); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with 0 + for (size_t i = 0; i < N_ELMTS; i++) { + A_h[i] = 0; + } + + // setting gpu value in the struct object + gpu_data *ptr = new gpu_data; + ptr->int_ptr = C_h; + ptr->gpu = gpu_ordinal; + ptr->acknowledge = 100; + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + hipStream_t mystream; + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (N_ELMTS + 255)/threadsPerBlock; + + // A_d is initialized to 0. Add_Data kernel does A_d = A_d + 1 + // The Add_data kernel is called 1 time for gpu0, 2 times for gpu1 etc. + // At the end of the loop, A_d should have the gpu_ordinal number + for (int i = 0; i < gpu_ordinal + 1; i++) { + hipLaunchKernelGGL(Add_Data, dim3(blocks), dim3(threadsPerBlock), 0, + mystream, A_d, N_ELMTS); + hipLaunchKernelGGL(Square_plus_one, 1, 1, 0, mystream, A_d, C_d, N_ELMTS); + } + HIPCHECK(hipMemcpyAsync(C_h, A_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + + // Pass the ptr as user data which contains the gpu_ordinal, default value + // for ack and the data that is copied to host + HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, + reinterpret_cast(ptr), 0)); + HIPCHECK(hipStreamSynchronize(mystream)); + + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + HIPCHECK(hipStreamDestroy(mystream)); + + int result = SUCCESS; + if (C_h[0] != gpu_ordinal + 1) { + result = KERNEL_EXECUTION_MISMATCH; + } + + if (ptr->gpu != ptr->acknowledge) { + result = KERNEL_COMPUTATION_MISMATCH; + } + + free(A_h); + free(C_h); + free(ptr); + + if (result == KERNEL_EXECUTION_MISMATCH) { + failed("Number of kernels expected to be executed does not match"); + } else if (result == KERNEL_COMPUTATION_MISMATCH) { + failed("Mismatch found in the result of the computation!"); + } +} + + +int main() { + int gpu_cnt = 0; + + HIPCHECK(hipGetDeviceCount(&gpu_cnt)); + if (gpu_cnt < 2) { + printf("Minimum of 2 gpus are needed for this test, skipping the test\n"); + passed(); + } + + std::thread T[gpu_cnt]; + + // Launching threads for each GPU + for (int i = 0; i < gpu_cnt; i++) { + T[i] = std::thread(launch_gpu, i); + } + + for (int i=0; i < gpu_cnt; i++) { + T[i].join(); + } + passed(); +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp new file mode 100644 index 0000000000..a182c85010 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_MultiCalls.cpp @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +// Testcase Description:: This test case is used to check if the runtime is ok +// when hipStreamAddCallback() is called back to back multiple calls + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST: %t + * HIT_END + */ + + +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + +#define NUM_CALLS 1000 + +hipStream_t mystream; +size_t Num = 4096; +std::atomicCb_count{0}, Data_mismatch{0}; +float *A_h, *C_h; + +__global__ void vector_square(float* C_d, float* A_d, size_t Num) { + size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = gputhread; i < Num; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } + + // Delay thread 1 only in the GPU + if (gputhread == 1) { + unsigned long long int wait_t = 3200000000, start = clock64(), cur; + do { + cur = clock64() - start; + } while (cur < wait_t); + } +} + +static void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, + void* userData) { + for (size_t i = 0; i < Num; i++) { + // Validate the data and update Data_mismatch + if (C_h[i] != A_h[i] * A_h[i]) { + Data_mismatch++; + } + } + + // Increment the Cb_count to indicate that the callback is processed. + ++Cb_count; +} + +int main(int argc, char* argv[]) { + float *A_d, *C_d; + size_t Nbytes = Num * sizeof(float); + + A_h = (float*)malloc(Nbytes); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with Phi + i + for (size_t i = 0; i < Num; i++) { + A_h[i] = 1.618f + i; + } + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (Num+255)/threadsPerBlock; + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, + mystream, C_d, A_d, Num); + + HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + + // Add multiple callbacks to the stream + for (int i = 0; i< NUM_CALLS; i++) { + HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, NULL, 0)); + } + + HIPCHECK(hipStreamSynchronize(mystream)); + HIPCHECK(hipStreamDestroy(mystream)); + + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + + free(A_h); + free(C_h); + + // Each callback would have validated the data and if any mismatch is found, + // Data_mismatch will not have proper data. Validate the same. + // Cb_count should match the number of callbacks added. + if (Data_mismatch.load() != 0) { + failed("Mismatch found in the result of the computation!"); + } else if (Cb_count.load() != NUM_CALLS) { + failed("All callbacks for stream did not get called!"); + } + + passed(); +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp b/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp new file mode 100644 index 0000000000..29f6ed5f40 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_MultiThread.cpp @@ -0,0 +1,165 @@ +/* +Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +// Testcase Description: This test case is used to check the behaviour of HIP +// when multiple hipStreaAddCallback() are called over multiple Threads +// This test case is disabled currently. + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 EXCLUDE_HIP_PLATFORM all + * TEST: %t + * HIT_END + */ + + + +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + +#define NUM_THREADS 2000 + +size_t Num = 4096; +std::atomicCb_count{0}, Data_mismatch{0}; +hipStream_t mystream; +float *A_h, *C_h; + +__global__ void vector_square(float* C_d, float* A_d, size_t Num) { + size_t gputhread = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = gputhread; i < Num; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } + + // Delay thread 1 only in the GPU + if (gputhread == 1) { + unsigned long long int wait_t = 3200000000, start = clock64(), cur; + do { + cur = clock64() - start; + } while (cur < wait_t); + } +} + + +static void HIPRT_CB Thread1_Callback(hipStream_t stream, hipError_t status, + void* userData) { + for (size_t i = 0; i < Num; i++) { + // Validate the data and update Data_mismatch + if (C_h[i] != A_h[i] * A_h[i]) { + Data_mismatch++; + } + } + + // Increment the Cb_count to indicate that the callback is processed. + ++Cb_count; +} + +static void HIPRT_CB Thread2_Callback(hipStream_t stream, hipError_t status, + void* userData) { + for (size_t i = 0; i < Num; i++) { + // Validate the data and update Data_mismatch + if (C_h[i] != A_h[i] * A_h[i]) { + Data_mismatch++; + } + } + + // Increment the Cb_count to indicate that the callback is processed. + ++Cb_count; +} + +void Thread1_func() { + HIPCHECK(hipStreamAddCallback(mystream, Thread1_Callback, NULL, 0)); +} + +void Thread2_func() { + HIPCHECK(hipStreamAddCallback(mystream, Thread2_Callback, NULL, 0)); +} + + +int main(int argc, char* argv[]) { + float *A_d, *C_d; + size_t Nbytes = Num * sizeof(float); + + A_h = (float*)malloc(Nbytes); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with Phi + i + for (size_t i = 0; i < Num; i++) { + A_h[i] = 1.618f + i; + } + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (Num+255)/threadsPerBlock; + + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, + mystream, C_d, A_d, Num); + + HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + + std::thread T[NUM_THREADS]; + for (int i = 0; i < NUM_THREADS; i++) { + // Use different callback for every even thread + // The callbacks will be added to same stream from different threads + if ((i%2) == 0) + T[i] = std::thread(Thread1_func); + else + T[i] = std::thread(Thread2_func); + } + + // Wait until all the threads finish their execution + for (int i = 0; i < NUM_THREADS; i++) { + T[i].join(); + } + + HIPCHECK(hipStreamSynchronize(mystream)); + HIPCHECK(hipStreamDestroy(mystream)); + + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + + free(A_h); + free(C_h); + + // Cb_count should match total number of callbacks added from both threads + // Data_mismatch will be updated if there is problem in data validation + if (Cb_count.load() != NUM_THREADS) { + failed("All callbacks for stream did not get called!"); + } else if (Data_mismatch.load() != 0) { + failed("Mismatch found in the result of the computation!"); + } + + passed(); +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp b/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp new file mode 100644 index 0000000000..8c2fe2603b --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_StrmSyncTiming.cpp @@ -0,0 +1,147 @@ +/* +* Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +* IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ + +// Testcase Description: This test case checks whether hipStreamSynchronize() +// is taking less time than the time taken by Callback() function launched +// by hipStreamAddCallback() api. + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST: %t + * HIT_END + */ + +#include +#include +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + +#define SECONDS_TO_WAIT 5 +#define TO_MICROSECONDS 1000000 + +hipStream_t mystream; +size_t N_elmts = 4096; +bool Init_callback = false; +std::atomic Data_mismatch{0}; + +__global__ void vector_square(float* C_d, float* A_d, size_t N_elmts) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < N_elmts; i += stride) { + C_d[i] = A_d[i] * A_d[i]; + } + + // Delay the thread 1 + if (offset == 1) { + unsigned long long int wait_t = 3200000000, start = clock64(), cur; + do { + cur = clock64() - start; + } while (cur < wait_t); + } +} + +float *A_h, *C_h; + +static void HIPRT_CB Callback1(hipStream_t stream, hipError_t status, + void* userData) { + // Mark that the callback is entered. This is checked in main thread. + Init_callback = true; + + // Validate the data + for (size_t i = 0; i < N_elmts; i++) { + if (C_h[i] != A_h[i] * A_h[i]) { + Data_mismatch++; + } + } + + // Delay the callback completion + sleep(SECONDS_TO_WAIT); +} + + +int main(int argc, char* argv[]) { + float *A_d, *C_d; + size_t Nbytes = N_elmts * sizeof(float); + float tElapsed = 1.0f; + + A_h = (float*)malloc(Nbytes); + HIPCHECK(A_h == 0 ? hipErrorOutOfMemory : hipSuccess); + C_h = (float*)malloc(Nbytes); + HIPCHECK(C_h == 0 ? hipErrorOutOfMemory : hipSuccess); + + // Fill with Phi + i + for (size_t i = 0; i < N_elmts; i++) { + A_h[i] = 1.618f + i; + } + + HIPCHECK(hipMalloc(&A_d, Nbytes)); + HIPCHECK(hipMalloc(&C_d, Nbytes)); + + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + + HIPCHECK(hipMemcpyAsync(A_d, A_h, Nbytes, hipMemcpyHostToDevice, mystream)); + + const unsigned threadsPerBlock = 256; + const unsigned blocks = (N_elmts + 255)/threadsPerBlock; + + hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, + mystream, C_d, A_d, N_elmts); + HIPCHECK(hipMemcpyAsync(C_h, C_d, Nbytes, hipMemcpyDeviceToHost, mystream)); + HIPCHECK(hipStreamAddCallback(mystream, Callback1, NULL, 0)); + + // Wait untill Callback() function changes the Init_callback value to true + while (!Init_callback) {} + + // Since the callback is supposed to be called only after an implicit stream + // synchronization, hipStreamSynchronize call shoud not take much time. + auto start = std::chrono::high_resolution_clock::now(); + HIPCHECK(hipStreamSynchronize(mystream)); + auto stop = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stop - start); + + HIPCHECK(hipStreamDestroy(mystream)); + HIPCHECK(hipFree(A_d)); + HIPCHECK(hipFree(C_d)); + free(A_h); + free(C_h); + + if (Data_mismatch.load() != 0) { + failed("Output from kernel execution is not as expected"); + } + + // There is a delay of 5000000 microseconds in the Callback() function, the + // duration.count() value is expected to less than 5000000 microseconds + // because it is expected that stream synchronization completed the moment + // Callback function starts the execution and not untill Callback function + // completes the execution. Therefore the hipStreamSynchronize() in the + // main thread should hardly take any time to complete. + + if (duration.count() < SECONDS_TO_WAIT * TO_MICROSECONDS) { + passed(); + } else { + failed("hipStreamSynchronize is waiting untill Callback() completes."); + } +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp b/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp new file mode 100644 index 0000000000..2eef534ea4 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_ThrdBehaviour.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +// Testcase Description: This test case tests if Host thread continues with +// next command after hipStreamAddCallback() api or wait for callback() call to +// finish. Ideally Host thread should not wait for callback to finish. + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST: %t + * HIT_END + */ + +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" + +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + +bool Callback_Completed = false; + +void HIPRT_CB Callback1(hipStream_t stream, hipError_t status, void* userData) { + sleep(5); + Callback_Completed = true; +} + +int main(int argc, char* argv[]) { + hipStream_t mystream; + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + HIPCHECK(hipStreamAddCallback(mystream, Callback1, NULL, 0)); + sleep(1); + + // Callback_Completed is initialized to false. The same is set to true at + // the end of callback and callback sleeps for 5 seconds. + // So, in case Callback_Completed is true here, it means the main thread + // has waited till callback is complete and is a fail case. + if (Callback_Completed == false) { + HIPCHECK(hipStreamDestroy(mystream)); + passed(); + } else { + HIPCHECK(hipStreamDestroy(mystream)); + failed("Unexpected: Host thread is waiting for callback to finish"); + } +} diff --git a/tests/src/runtimeApi/stream/hipStreamACb_order.cpp b/tests/src/runtimeApi/stream/hipStreamACb_order.cpp new file mode 100644 index 0000000000..7b66441fa6 --- /dev/null +++ b/tests/src/runtimeApi/stream/hipStreamACb_order.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved. + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + * IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * */ + +/* HIT_START + * BUILD: %t %s ../../test_common.cpp NVCC_OPTIONS --std=c++11 + * TEST: %t + * HIT_END + */ + +// Checks the callback execution in the same order it was added +// Also, it checks if the number of callbacks executed are same as the number +// of callbacks added + +#include +#include +#include "hip/hip_runtime.h" +#include "test_common.h" +#ifdef __HIP_PLATFORM_HCC__ +#define HIPRT_CB +#endif + +#define NUM_CALLS 10 +hipStream_t mystream; +bool Callback_SequenceMismatch = false; +std::atomic Cb_ordinal{0}; + +void HIPRT_CB Stream_Callback(hipStream_t stream, hipError_t status, + void* userData) { + // Userdata has the order of the callback. It should match with + // the callback counter Cb_ordinal as the sequence of callback + // should match the sequence of callback addition + if (*(reinterpret_cast(userData)) == Cb_ordinal) { + // Increment the Cb_ordinal to prepare for next sequence + Cb_ordinal++; + } else { + Callback_SequenceMismatch = true; + } + + delete reinterpret_cast(userData); +} + +int main(int argc, char* argv[]) { + int *ptr; + HIPCHECK(hipStreamCreateWithFlags(&mystream, hipStreamNonBlocking)); + for (int i = 0; i< NUM_CALLS; i++) { + ptr = new int; + *ptr = i; + // Pass the userdata with the order of the callback addition + HIPCHECK(hipStreamAddCallback(mystream, Stream_Callback, + reinterpret_cast(ptr), 0)); + } + + HIPCHECK(hipStreamSynchronize(mystream)); + HIPCHECK(hipStreamDestroy(mystream)); + + if (!(Cb_ordinal == (NUM_CALLS))) { + failed("All callbacks for stream did not get called!"); + } + + if (Callback_SequenceMismatch == false) { + passed(); + } else { + failed("hipStreamAddCallback() calls did not execute in sequence!"); + } +}