From 1f5de1a84af8e9b20b7e481c6dfd5f43be18f85a Mon Sep 17 00:00:00 2001 From: Anton Mitkov Date: Mon, 11 Jul 2022 10:12:18 +0100 Subject: [PATCH] EXSWCPHIPT-88 - [catch2] Testing for hipHostGetFlags API extention (#2691) [ROCm/hip commit: 5b10765c6b0324089ef77dde39880dd2068f9ff4] --- .../catch/unit/memory/hipHostGetFlags.cc | 215 ++++++++++++++---- 1 file changed, 172 insertions(+), 43 deletions(-) diff --git a/projects/hip/tests/catch/unit/memory/hipHostGetFlags.cc b/projects/hip/tests/catch/unit/memory/hipHostGetFlags.cc index f150aaa5a8..25fb5b2734 100644 --- a/projects/hip/tests/catch/unit/memory/hipHostGetFlags.cc +++ b/projects/hip/tests/catch/unit/memory/hipHostGetFlags.cc @@ -1,5 +1,5 @@ /* -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -27,8 +27,34 @@ This testcase verifies the basic scenario of hipHostGetFlags API #include #include #include +#include +#include -static constexpr auto LEN{1024*1024}; +std::vector FlagPart1Vec{hipHostMallocDefault, + hipHostMallocDefault | hipHostMallocPortable, + hipHostMallocDefault | hipHostMallocMapped, + hipHostMallocDefault | hipHostMallocWriteCombined, + hipHostMallocPortable, + hipHostMallocPortable | hipHostMallocMapped, + hipHostMallocPortable | hipHostMallocWriteCombined, + hipHostMallocMapped, + hipHostMallocMapped | hipHostMallocWriteCombined, + hipHostMallocWriteCombined}; +#if HT_AMD +// For cases where flags from FlagPart1Vec are not used, +// hipHostMallocDefault is the default on AMD +// and hipHostMallocMapped on Nvidia +std::vector FlagPart2Vec{0x0, + hipHostMallocNumaUser, + hipHostMallocNumaUser | hipHostMallocCoherent, + hipHostMallocNumaUser | hipHostMallocNonCoherent, + hipHostMallocCoherent, + hipHostMallocNonCoherent}; +#else +std::vector FlagPart2Vec{0x0}; +#endif + +static constexpr auto LEN{1024 * 1024}; /* This testcase verifies hipHostGetFlags API basic scenario @@ -38,57 +64,160 @@ This testcase verifies hipHostGetFlags API basic scenario 3. Validates it with the initial flags used while allocating memory */ -TEMPLATE_TEST_CASE("Unit_hipHostGetFlags_Basic", "", int, - float, double) { - constexpr auto SIZE{LEN * sizeof(TestType)}; - TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; - TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}; - unsigned int FlagA, FlagB, FlagC; - FlagA = hipHostMallocWriteCombined | hipHostMallocMapped; - FlagB = hipHostMallocWriteCombined | hipHostMallocMapped; - FlagC = hipHostMallocMapped; +/* Possible host flags + * hipHostMallocDefault 0x0 + * hipHostMallocPortable 0x1 + * hipHostMallocMapped 0x2 + * hipHostMallocWriteCombined 0x4 + * NOT on Nvidia + * hipHostMallocNumaUser 0x20000000 + * hipHostMallocCoherent 0x40000000 + * hipHostMallocNonCoherent 0x80000000 + */ + +inline void checkFlags(unsigned int expected, unsigned int obtained) { + // Account for cases where flags from FlagPart1Vec do not include hipHostMallocMapped, + // on Nvidia devices it is added by default +#if HT_NVIDIA + expected = expected | hipHostMallocMapped; +#endif + REQUIRE(expected == obtained); +} + +TEST_CASE("Unit_hipHostGetFlags_flagCombos") { + + constexpr auto SIZE{LEN * sizeof(int)}; + int* A_h{nullptr}; + + const unsigned int FlagPart1 = GENERATE(from_range(FlagPart1Vec.begin(), FlagPart1Vec.end())); + const unsigned int FlagPart2 = GENERATE(from_range(FlagPart2Vec.begin(), FlagPart2Vec.end())); + + unsigned int FlagComp = FlagPart1 | FlagPart2; hipDeviceProp_t prop; - int device; + int device{}; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + + // Skip test if device does not support the property canMapHostMemory + if (prop.canMapHostMemory != 1) { + HipTest::HIP_SKIP_TEST("Device Property canMapHostMemory is not set"); + return; + } else { + // Allocate using the generated flags combos + INFO("Flag passed when allocating: 0x" << std::hex << FlagComp << "\n"); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, FlagComp)); + unsigned int flagA{}; + + // get the flags from allocations and check if they are the same as the one set + HIP_CHECK(hipHostGetFlags(&flagA, A_h)); + + checkFlags(FlagComp, flagA); + HIP_CHECK(hipHostFree(A_h)); + } +} + +// Test Allocation with flags and getting flags in another thread +TEST_CASE("Unit_hipHostGetFlags_DifferentThreads") { + constexpr auto SIZE{LEN * sizeof(int)}; + int* A_h{nullptr}; + + const unsigned int FlagPart1 = GENERATE(from_range(FlagPart1Vec.begin(), FlagPart1Vec.end())); + const unsigned int FlagPart2 = GENERATE(from_range(FlagPart2Vec.begin(), FlagPart2Vec.end())); + + + unsigned int FlagComp = FlagPart1 | FlagPart2; + + hipDeviceProp_t prop; + int device{}; HIP_CHECK(hipGetDevice(&device)); HIP_CHECK(hipGetDeviceProperties(&prop, device)); if (prop.canMapHostMemory != 1) { - SUCCEED("Device Property canMapHostMemory is not set"); + HipTest::HIP_SKIP_TEST("Device Property canMapHostMemory is not set"); + return; } else { - HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, - hipHostMallocWriteCombined | hipHostMallocMapped)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&B_h), SIZE, - hipHostMallocWriteCombined | hipHostMallocMapped)); - HIP_CHECK(hipHostMalloc(reinterpret_cast(&C_h), SIZE, - hipHostMallocMapped)); - - unsigned int flagA, flagB, flagC; - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&B_d), B_h, 0)); - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&C_d), C_h, 0)); + // Make sure we allocate before trying to get the flags + std::thread malloc_thread( + [&]() { HIP_CHECK_THREAD(hipHostMalloc(reinterpret_cast(&A_h), SIZE, FlagComp)); }); + malloc_thread.join(); + HIP_CHECK_THREAD_FINALIZE(); + unsigned int flagA{}; HIP_CHECK(hipHostGetFlags(&flagA, A_h)); - HIP_CHECK(hipHostGetFlags(&flagB, B_h)); - HIP_CHECK(hipHostGetFlags(&flagC, C_h)); - HipTest::setDefaultData(LEN, A_h, B_h, C_h); + checkFlags(FlagComp, flagA); - dim3 dimGrid(LEN / 512, 1, 1); - dim3 dimBlock(512, 1, 1); - hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock, - 0, 0, static_cast(A_d), - static_cast(B_d), C_d, LEN); - - HIP_CHECK(hipMemcpy(C_h, C_d, SIZE, hipMemcpyDeviceToHost)); - // Note this really HostToHost not - // DeviceToHost, since memory is mapped... - HipTest::checkVectorADD(A_h, B_h, C_h, LEN); - - REQUIRE(flagA == FlagA); - REQUIRE(flagB == FlagB); - REQUIRE(flagC == FlagC); HIP_CHECK(hipHostFree(A_h)); - HIP_CHECK(hipHostFree(B_h)); - HIP_CHECK(hipHostFree(C_h)); + } +} + +// Test behaviour of hipHostGetFlags with invalid args +TEST_CASE("Unit_hipHostGetFlags_InvalidArgs") { + constexpr auto SIZE{LEN * sizeof(int)}; + int* A_h{nullptr}; + + hipDeviceProp_t prop; + int device{}; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + + // Skip test if device does not support the property canMapHostMemory + if (prop.canMapHostMemory != 1) { + HipTest::HIP_SKIP_TEST("Device Property canMapHostMemory is not set"); + return; + } else { + SECTION("Invalid flag ptr being passed to hipHostGetFlags") { + // Use default flag + unsigned int FlagComp = 0x0; + + // Allocate using the generated flags combos + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, FlagComp)); + + // use a nullptr to return flags to + unsigned int* flagA = nullptr; + + // get the flags from allocations and check if they are the same as the one set + HIP_CHECK_ERROR(hipHostGetFlags(flagA, A_h), hipErrorInvalidValue); + + HIP_CHECK(hipHostFree(A_h)); + } + + SECTION("Device ptr allocated with hipMalloc passed to hipHostGetFlags") { + unsigned int FlagComp = 0x4; + + // Allocate memory on device + HIP_CHECK(hipMalloc(reinterpret_cast(&A_h), SIZE)); + + unsigned int flagA{}; + + // get the flags from allocations and check if they are the same as the one set + HIP_CHECK_ERROR(hipHostGetFlags(&flagA, A_h), hipErrorInvalidValue); + INFO("Flag passed when allocating: " << std::hex << FlagComp << " Returned flag: " << std::hex + << flagA << "\n"); + + HIP_CHECK(hipFree(A_h)); + } + + SECTION("Ptr from hipHostGetDevicePointer passed to hipHostGetFlags") { + unsigned int FlagComp = 0x4; + + int* A_d{nullptr}; + // Allocate memory on device + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, FlagComp)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + + unsigned int flagA; + + // get the flags from allocations and check if they are the same as the one set + HIP_CHECK(hipHostGetFlags(&flagA, A_d)); + INFO("Flag passed when allocating: " << std::hex << FlagComp << " Returned flag: " << std::hex + << flagA << "\n"); +#if HT_NVIDIA + // on Nvidia adjust for cudaHostAllocMapped being set by default + FlagComp = FlagComp | hipHostMallocMapped; +#endif + REQUIRE(flagA == FlagComp); + HIP_CHECK(hipHostFree(A_h)); + } } }