e6a4e455e5
3 hipMemset*Async* direct tests will crash in static lib because
some hsa objects are not initialized when hip apis call them.
The fix will make the hip apis called in main() so that hsa objects
have been initialized at that stage.
Change-Id: I41eb29b8c2588acac0cd3ae6d96b14fb1abd235a
[ROCm/clr commit: 272f89a966]
195 خطوط
6.1 KiB
C++
195 خطوط
6.1 KiB
C++
/*
|
|
* Copyright (c) 2020-present Advanced Micro Devices, Inc. All rights reserved.
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
/*
|
|
* Test for checking order of execution of device kernel and
|
|
* hipMemsetAsync apis on all gpus
|
|
*/
|
|
|
|
/* HIT_START
|
|
* BUILD: %t %s ../../test_common.cpp EXCLUDE_HIP_PLATFORM nvidia
|
|
* TEST: %t
|
|
* HIT_END
|
|
*/
|
|
|
|
#include "test_common.h"
|
|
#define ITER 10
|
|
#define N 1024 * 1024
|
|
|
|
unsigned blocks = 0;
|
|
|
|
template <typename T>
|
|
__global__ void vector_square(T* B_d, T* C_d, size_t M) {
|
|
for (int i=0 ; i < M ; i++) {
|
|
C_d[i] = B_d[i] * B_d[i];
|
|
}
|
|
}
|
|
|
|
template <typename T>
|
|
class MemSetTest {
|
|
public:
|
|
T *A_h, *B_d, *B_h, *C_d;
|
|
T memSetVal;
|
|
size_t Nbytes;
|
|
bool testResult = true;
|
|
int validateCount = 0;
|
|
hipStream_t stream;
|
|
|
|
void memAllocate(T memSetValue) {
|
|
memSetVal = memSetValue;
|
|
Nbytes = N * sizeof(T);
|
|
|
|
A_h = reinterpret_cast<T*>(malloc(Nbytes));
|
|
HIPASSERT(A_h != NULL);
|
|
HIPCHECK(hipMalloc(&B_d , Nbytes));
|
|
B_h = reinterpret_cast<T*>(malloc(Nbytes));
|
|
HIPASSERT(B_h != NULL);
|
|
HIPCHECK(hipMalloc(&C_d , Nbytes));
|
|
|
|
for (int i = 0 ; i < N ; i++) {
|
|
B_h[i] = i;
|
|
}
|
|
HIPCHECK(hipMemcpy(B_d , B_h , Nbytes , hipMemcpyHostToDevice));
|
|
HIPCHECK(hipStreamCreate(&stream));
|
|
}
|
|
|
|
void memDeallocate() {
|
|
HIPCHECK(hipFree(B_d)); HIPCHECK(hipFree(C_d));
|
|
free(B_h); free(A_h);
|
|
HIPCHECK(hipStreamDestroy(stream));
|
|
}
|
|
|
|
void validateExecutionOrder() {
|
|
for (int p = 0 ; p < N ; p++) {
|
|
if (A_h[p] == memSetVal) {
|
|
validateCount+= 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool resultAfterAllIterations() {
|
|
testResult = (validateCount == (ITER * N)) ? true : false;
|
|
memDeallocate();
|
|
return testResult;
|
|
}
|
|
};
|
|
|
|
bool testhipMemsetAsyncWithKernel() {
|
|
MemSetTest <char> obj;
|
|
obj.memAllocate(memsetval);
|
|
for (int k = 0 ; k < ITER ; k++) {
|
|
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0,
|
|
obj.stream, obj.B_d, obj.C_d, N);
|
|
HIPCHECK(hipMemsetAsync(obj.C_d , obj.memSetVal , N , obj.stream));
|
|
HIPCHECK(hipStreamSynchronize(obj.stream));
|
|
HIPCHECK(hipMemcpy(obj.A_h , obj.C_d , obj.Nbytes , hipMemcpyDeviceToHost));
|
|
|
|
obj.validateExecutionOrder();
|
|
}
|
|
return obj.resultAfterAllIterations();
|
|
}
|
|
|
|
bool testhipMemsetD32AsyncWithKernel() {
|
|
MemSetTest <int32_t> obj;
|
|
obj.memAllocate(memsetD32val);
|
|
for (int k = 0 ; k < ITER ; k++) {
|
|
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0,
|
|
obj.stream, obj.B_d, obj.C_d, N);
|
|
HIPCHECK(hipMemsetD32Async(obj.C_d , obj.memSetVal , N , obj.stream));
|
|
HIPCHECK(hipStreamSynchronize(obj.stream));
|
|
HIPCHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
obj.validateExecutionOrder();
|
|
}
|
|
return obj.resultAfterAllIterations();
|
|
}
|
|
|
|
bool testhipMemsetD16AsyncWithKernel() {
|
|
MemSetTest <int16_t> obj;
|
|
obj.memAllocate(memsetD16val);
|
|
for (int k = 0 ; k < ITER ; k++) {
|
|
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0,
|
|
obj.stream, obj.B_d, obj.C_d, N);
|
|
HIPCHECK(hipMemsetD16Async(obj.C_d , obj.memSetVal , N , obj.stream));
|
|
HIPCHECK(hipStreamSynchronize(obj.stream));
|
|
HIPCHECK(hipMemcpy(obj.A_h , obj.C_d, obj.Nbytes , hipMemcpyDeviceToHost));
|
|
|
|
obj.validateExecutionOrder();
|
|
}
|
|
return obj.resultAfterAllIterations();
|
|
}
|
|
|
|
bool testhipMemsetD8AsyncWithKernel() {
|
|
MemSetTest <char> obj;
|
|
obj.memAllocate(memsetD8val);
|
|
for (int k = 0; k < ITER; k++) {
|
|
hipLaunchKernelGGL(vector_square, dim3(blocks), dim3(threadsPerBlock), 0,
|
|
obj.stream, obj.B_d, obj.C_d, N);
|
|
HIPCHECK(hipMemsetD8Async(obj.C_d, obj.memSetVal, N, obj.stream));
|
|
HIPCHECK(hipStreamSynchronize(obj.stream));
|
|
HIPCHECK(hipMemcpy(obj.A_h, obj.C_d, obj.Nbytes, hipMemcpyDeviceToHost));
|
|
|
|
obj.validateExecutionOrder();
|
|
}
|
|
return obj.resultAfterAllIterations();
|
|
}
|
|
|
|
int main() {
|
|
bool testResult = true;
|
|
int numDevices = 0;
|
|
blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
|
|
printf("blocks: %u\n", blocks);
|
|
|
|
HIPCHECK(hipGetDeviceCount(&numDevices));
|
|
printf("total number of gpus in the system: %d\n", numDevices);
|
|
|
|
for (int i = 0; i < numDevices; i++) {
|
|
HIPCHECK(hipSetDevice(i));
|
|
printf("test running on gpu %d\n", i);
|
|
|
|
testResult &= testhipMemsetAsyncWithKernel();
|
|
if (!(testResult)) {
|
|
printf("Mismatch in order of execution of hipMemsetAsync and kernel\n");
|
|
}
|
|
|
|
testResult &= testhipMemsetD32AsyncWithKernel();
|
|
if (!(testResult)) {
|
|
printf("Mismatch in order of execution of hipMemsetD32Async and kernel\n");
|
|
}
|
|
|
|
testResult &= testhipMemsetD16AsyncWithKernel();
|
|
if (!(testResult)) {
|
|
printf("Mismatch in order of execution of hipMemsetD16Async and kernel\n");
|
|
}
|
|
|
|
testResult &= testhipMemsetD8AsyncWithKernel();
|
|
if (!(testResult)) {
|
|
printf("Mismatch in order of execution of hipMemsetD8Async and kernel\n");
|
|
}
|
|
}
|
|
|
|
if (testResult) {
|
|
printf("Execution order of Kernel and hipMemsetAsync apis on "
|
|
"all gpus is correct!\n");
|
|
passed();
|
|
} else {
|
|
failed("One or more hipMemsetAsync tests failed\n");
|
|
}
|
|
}
|