EXSWCPHIPT-102 - Adding hipEventRecord Tests (#2722)
This commit is contained in:
@@ -19,72 +19,102 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// Test hipEventRecord serialization behavior.
|
||||
// Through manual inspection of the reported timestamps, can determine if recording a NULL event
|
||||
// forces synchronization : set
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <kernels.hh>
|
||||
#include <hip_test_context.hh>
|
||||
|
||||
#include <hip_test_common.hh>
|
||||
|
||||
#include <kernels.hh>
|
||||
#include <hip_test_checkers.hh>
|
||||
#include <hip_test_context.hh>
|
||||
|
||||
TEST_CASE("Unit_hipEventRecord") {
|
||||
size_t N = 4 * 1024 * 1024;
|
||||
unsigned threadsPerBlock = 256;
|
||||
int iterations = 1;
|
||||
constexpr size_t N = 1024;
|
||||
constexpr int iterations = 1;
|
||||
|
||||
unsigned blocks = (N + threadsPerBlock - 1) / threadsPerBlock;
|
||||
if (blocks > 1024) blocks = 1024;
|
||||
if (blocks == 0) blocks = 1;
|
||||
constexpr int blocks = 1024;
|
||||
|
||||
printf("N=%zu (A+B+C= %6.1f MB total) blocks=%u threadsPerBlock=%u iterations=%d\n", N,
|
||||
((double)3 * N * sizeof(float)) / 1024 / 1024, blocks, threadsPerBlock, iterations);
|
||||
printf("iterations=%d\n", iterations);
|
||||
constexpr size_t Nbytes = N * sizeof(float);
|
||||
|
||||
size_t Nbytes = N * sizeof(float);
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
|
||||
float *A_h, *B_h, *C_h;
|
||||
float *A_d, *B_d, *C_d;
|
||||
HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N);
|
||||
enum TestType {
|
||||
WithFlags_Default = hipEventDefault,
|
||||
WithFlags_Blocking = hipEventBlockingSync,
|
||||
WithFlags_DisableTiming = hipEventDisableTiming,
|
||||
#if HT_AMD
|
||||
WithFlags_ReleaseToDevice = hipEventReleaseToDevice,
|
||||
WithFlags_ReleaseToSystem = hipEventReleaseToSystem,
|
||||
#endif
|
||||
WithoutFlags
|
||||
};
|
||||
|
||||
hipEvent_t start, stop;
|
||||
#if HT_AMD
|
||||
auto flags = GENERATE(WithFlags_Default, WithFlags_Blocking, WithFlags_DisableTiming,
|
||||
WithFlags_ReleaseToDevice, WithFlags_ReleaseToSystem, WithoutFlags);
|
||||
#endif
|
||||
|
||||
// NULL stream check:
|
||||
#if HT_NVIDIA
|
||||
auto flags =
|
||||
GENERATE(WithFlags_Default, WithFlags_Blocking, WithFlags_DisableTiming, WithoutFlags);
|
||||
#endif
|
||||
|
||||
|
||||
hipEvent_t start{}, stop{};
|
||||
|
||||
if (flags == WithoutFlags) {
|
||||
HIP_CHECK(hipEventCreate(&start));
|
||||
HIP_CHECK(hipEventCreate(&stop));
|
||||
} else {
|
||||
HIP_CHECK(hipEventCreateWithFlags(&start, flags));
|
||||
HIP_CHECK(hipEventCreateWithFlags(&stop, flags));
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
|
||||
HIP_CHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
|
||||
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
//--- START TIMED REGION
|
||||
long long hostStart = HipTest::get_time();
|
||||
// Record the start event
|
||||
HIP_CHECK(hipEventRecord(start, NULL));
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
//--- START TIMED REGION
|
||||
long long hostStart = HipTest::get_time();
|
||||
// Record the start event
|
||||
HIP_CHECK(hipEventRecord(start, NULL));
|
||||
|
||||
HipTest::launchKernel<float>(HipTest::vectorADD<float>, blocks, threadsPerBlock, 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d), C_d, N);
|
||||
HipTest::launchKernel<float>(HipTest::vectorADD<float>, blocks, 1, 0, 0,
|
||||
static_cast<const float*>(A_d), static_cast<const float*>(B_d),
|
||||
C_d, N);
|
||||
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
//--- STOP TIMED REGION
|
||||
HIP_CHECK(hipEventRecord(stop, NULL));
|
||||
HIP_CHECK(hipEventSynchronize(stop));
|
||||
long long hostStop = HipTest::get_time();
|
||||
//--- STOP TIMED REGION
|
||||
|
||||
float eventMs = 1.0f;
|
||||
HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop));
|
||||
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
|
||||
float hostMs = HipTest::elapsed_time(hostStart, hostStop);
|
||||
|
||||
printf("host_time (chrono) =%6.3fms\n", hostMs);
|
||||
printf("kernel_time (hipEventElapsedTime) =%6.3fms\n", eventMs);
|
||||
printf("\n");
|
||||
INFO("host_time (chrono) = " << hostMs);
|
||||
|
||||
// Make sure timer is timing something...
|
||||
REQUIRE(eventMs > 0.0f);
|
||||
// Make sure timer is timing something...
|
||||
if (flags != WithFlags_DisableTiming) {
|
||||
float eventMs = 1.0f;
|
||||
HIP_CHECK(hipEventElapsedTime(&eventMs, start, stop));
|
||||
INFO("kernel_time (hipEventElapsedTime) = " << eventMs);
|
||||
REQUIRE(eventMs > 0.0f);
|
||||
}
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
|
||||
|
||||
HIP_CHECK(hipEventDestroy(start));
|
||||
HIP_CHECK(hipEventDestroy(stop));
|
||||
HIP_CHECK(hipEventDestroy(start));
|
||||
HIP_CHECK(hipEventDestroy(stop));
|
||||
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
|
||||
HipTest::checkVectorADD(A_h, B_h, C_h, N, true);
|
||||
HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
|
||||
TestContext::get().cleanContext();
|
||||
}
|
||||
|
||||
TEST_CASE("Unit_hipEventRecord_Negative") {
|
||||
SECTION("Nullptr event") {
|
||||
HIP_CHECK_ERROR(hipEventRecord(nullptr, nullptr), hipErrorInvalidResourceHandle);
|
||||
}
|
||||
}
|
||||
مرجع در شماره جدید
Block a user