diff --git a/tests/catch/multiproc/CMakeLists.txt b/tests/catch/multiproc/CMakeLists.txt index eab78965b0..4d2c69b114 100644 --- a/tests/catch/multiproc/CMakeLists.txt +++ b/tests/catch/multiproc/CMakeLists.txt @@ -9,6 +9,7 @@ set(LINUX_TEST_SRC hipGetDeviceCountMproc.cc hipGetDevicePropertiesMproc.cc hipSetGetDeviceMproc.cc + hipIpcMemAccessTest.cc ) if(UNIX) diff --git a/tests/catch/multiproc/hipIpcMemAccessTest.cc b/tests/catch/multiproc/hipIpcMemAccessTest.cc new file mode 100644 index 0000000000..4e143111c4 --- /dev/null +++ b/tests/catch/multiproc/hipIpcMemAccessTest.cc @@ -0,0 +1,156 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the hipIpcMemAccess APIs by creating memory handle +in parent process and access it in child process. +*/ + +#include +#include + +#ifdef __linux__ +#include +#include +#include +#include +#include +#include + +typedef struct mem_handle { + int device; + hipIpcMemHandle_t memHandle; + bool IfTestPassed; +} hip_ipc_t; + + +// This testcase verifies the hipIpcMemAccess APIs as follows +// The following program spawns a child process and does the following +// Parent iterate through each device, create memory -- create hipIpcMemhandle +// stores the mem handle in mmaped memory, release the child using sem_post() +// and wait for child to release itself(parent process) +// child process: +// Child process get the ipc mem handle using hipIpcOpenMemHandle +// Iterate through all the available gpus and do Device to Device copies +// and check for data consistencies and close the hipIpcCloseMemHandle +// release the parent and wait for parent to release itself(child) + +TEST_CASE("Unit_hipIpcMemAccess_Semaphores") { + hip_ipc_t *shrd_mem = NULL; + pid_t pid; + size_t N = 1024; + size_t Nbytes = N * sizeof(int); + int *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}; + int *A_h{nullptr}, *C_h{nullptr}; + sem_t *sem_ob1{nullptr}, *sem_ob2{nullptr}; + int Num_devices = 0, CanAccessPeer = 0; + + std::string cmd_line = "rm -rf /dev/shm/sem.my-sem-object*"; + int res = system(cmd_line.c_str()); + REQUIRE(res != -1); + sem_ob1 = sem_open("/my-sem-object1", O_CREAT|O_EXCL, 0660, 0); + sem_ob2 = sem_open("/my-sem-object2", O_CREAT|O_EXCL, 0660, 0); + REQUIRE(sem_ob1 != SEM_FAILED); + REQUIRE(sem_ob2 != SEM_FAILED); + + shrd_mem = reinterpret_cast(mmap(NULL, sizeof(hip_ipc_t), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + 0, 0)); + REQUIRE(shrd_mem != NULL); + shrd_mem->IfTestPassed = true; + HipTest::initArrays(nullptr, nullptr, nullptr, + &A_h, nullptr, &C_h, N, false); + pid = fork(); + if (pid != 0) { + // Parent process + HIP_CHECK(hipGetDeviceCount(&Num_devices)); + for (int i = 0; i < Num_devices; ++i) { + if (shrd_mem->IfTestPassed == true) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipIpcGetMemHandle(reinterpret_cast + (&shrd_mem->memHandle), + A_d)); + HIP_CHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); + shrd_mem->device = i; + if ((sem_post(sem_ob1)) == -1) { + // Need to use inline function to release resources. + shrd_mem->IfTestPassed = false; + WARN("sem_post() call failed in parent process."); + } + if ((sem_wait(sem_ob2)) == -1) { + shrd_mem->IfTestPassed = false; + WARN("sem_wait() call failed in parent process."); + } + HIP_CHECK(hipFree(A_d)); + } + } + } else { + // Child process + HIP_CHECK(hipGetDeviceCount(&Num_devices)); + for (int j = 0; j < Num_devices; ++j) { + HIP_CHECK(hipSetDevice(j)); + if ((sem_wait(sem_ob1)) == -1) { + shrd_mem->IfTestPassed = false; + WARN("sem_wait() call failed in child process."); + if ((sem_post(sem_ob2)) == -1) { + shrd_mem->IfTestPassed = false; + WARN("sem_post() call on sem_ob2 failed"); + exit(1); + } + } + for (int i = 0; i < Num_devices; ++i) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMalloc(&C_d, Nbytes)); + HIP_CHECK(hipIpcOpenMemHandle(reinterpret_cast(&B_d), + shrd_mem->memHandle, + hipIpcMemLazyEnablePeerAccess)); + HIP_CHECK(hipDeviceCanAccessPeer(&CanAccessPeer, i, shrd_mem->device)); + if (CanAccessPeer == 1) { + HIP_CHECK(hipMemcpy(C_d, B_d, Nbytes, hipMemcpyDeviceToDevice)); + HIP_CHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); + HipTest::checkTest(A_h, C_h, N); + memset(reinterpret_cast(C_h), 0, Nbytes); + // Checking if the data obtained from Ipc shared memory is consistent + HIP_CHECK(hipMemcpy(C_h, B_d, Nbytes, hipMemcpyDeviceToHost)); + HipTest::checkTest(A_h, C_h, N); + } + HIP_CHECK(hipIpcCloseMemHandle(reinterpret_cast(B_d))); + HIP_CHECK(hipFree(C_d)); + } + if ((sem_post(sem_ob2)) == -1) { + shrd_mem->IfTestPassed = false; + WARN("sem_post() call on sem_ob2 failed"); + exit(1); + } + } + exit(0); + } + if ((sem_unlink("/my-sem-object1")) == -1) { + WARN("sem_unlink() call on /my-sem-object1 failed"); + } + if ((sem_unlink("/my-sem-object2")) == -1) { + WARN("sem_unlink() call on /my-sem-object2 failed"); + } + int rFlag = 0; + waitpid(pid, &rFlag, 0); + REQUIRE(shrd_mem->IfTestPassed == true); +} +#endif diff --git a/tests/catch/unit/memory/CMakeLists.txt b/tests/catch/unit/memory/CMakeLists.txt index 83186db858..d9e688d167 100644 --- a/tests/catch/unit/memory/CMakeLists.txt +++ b/tests/catch/unit/memory/CMakeLists.txt @@ -1,4 +1,5 @@ # Common Tests - Test independent of all platforms +if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC memset.cc malloc.cc @@ -24,8 +25,49 @@ set(TEST_SRC hipMemcpyAllApiNegative.cc hipMemcpyWithStreamMultiThread.cc hipMemcpy_MultiThread.cc + hipHostMalloc.cc + hipHostRegister.cc + hipMemPtrGetInfo.cc + hipPointerGetAttributes.cc + hipHostGetFlags.cc + hipMemoryAllocateCoherent.cc + hipMallocManaged_MultiScenario.cc + hipManagedKeyword.cc ) - +else() +set(TEST_SRC + memset.cc + malloc.cc + hipMemcpy2DToArray.cc + hipMemcpy2DToArrayAsync.cc + hipMemcpyPeer.cc + hipMemcpyPeerAsync.cc + hipMemcpy3D.cc + hipMemcpy3DAsync.cc + hipMemcpyParam2D.cc + hipMemcpyParam2DAsync.cc + hipMemcpy2D.cc + hipMemcpy2DAsync.cc + hipMemcpy2DFromArray.cc + hipMemcpy2DFromArrayAsync.cc + hipMemcpyAtoH.cc + hipMemcpyHtoA.cc + hipMemcpyDtoD.cc + hipMemcpyDtoDAsync.cc + hipMemcpyAsync.cc + hipMemcpy.cc + hipMemcpyWithStream.cc + hipMemcpyAllApiNegative.cc + hipMemcpyWithStreamMultiThread.cc + hipMemcpy_MultiThread.cc + hipHostMalloc.cc + hipHostRegister.cc + hipHostGetFlags.cc + hipMemoryAllocateCoherent.cc + hipMallocManaged_MultiScenario.cc + hipManagedKeyword.cc +) +endif() # Create shared lib of all tests add_library(MemoryTest SHARED EXCLUDE_FROM_ALL ${TEST_SRC}) diff --git a/tests/catch/unit/memory/hipHostGetFlags.cc b/tests/catch/unit/memory/hipHostGetFlags.cc new file mode 100644 index 0000000000..f150aaa5a8 --- /dev/null +++ b/tests/catch/unit/memory/hipHostGetFlags.cc @@ -0,0 +1,94 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the basic scenario of hipHostGetFlags API +*/ + +#include +#include +#include + +static constexpr auto LEN{1024*1024}; + +/* +This testcase verifies hipHostGetFlags API basic scenario +1. Allocates the memory using different flags +2. Gets the flags of the respective variable using + hipHostGetFlags API +3. Validates it with the initial flags used while allocating + memory +*/ +TEMPLATE_TEST_CASE("Unit_hipHostGetFlags_Basic", "", int, + float, double) { + constexpr auto SIZE{LEN * sizeof(TestType)}; + TestType *A_h{nullptr}, *B_h{nullptr}, *C_h{nullptr}; + TestType *A_d{nullptr}, *B_d{nullptr}, *C_d{nullptr}; + unsigned int FlagA, FlagB, FlagC; + + FlagA = hipHostMallocWriteCombined | hipHostMallocMapped; + FlagB = hipHostMallocWriteCombined | hipHostMallocMapped; + FlagC = hipHostMallocMapped; + + hipDeviceProp_t prop; + int device; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + if (prop.canMapHostMemory != 1) { + SUCCEED("Device Property canMapHostMemory is not set"); + } else { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, + hipHostMallocWriteCombined | hipHostMallocMapped)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&B_h), SIZE, + hipHostMallocWriteCombined | hipHostMallocMapped)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&C_h), SIZE, + hipHostMallocMapped)); + + unsigned int flagA, flagB, flagC; + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&B_d), B_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&C_d), C_h, 0)); + HIP_CHECK(hipHostGetFlags(&flagA, A_h)); + HIP_CHECK(hipHostGetFlags(&flagB, B_h)); + HIP_CHECK(hipHostGetFlags(&flagC, C_h)); + + HipTest::setDefaultData(LEN, A_h, B_h, C_h); + + dim3 dimGrid(LEN / 512, 1, 1); + dim3 dimBlock(512, 1, 1); + hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock, + 0, 0, static_cast(A_d), + static_cast(B_d), C_d, LEN); + + HIP_CHECK(hipMemcpy(C_h, C_d, SIZE, hipMemcpyDeviceToHost)); + // Note this really HostToHost not + // DeviceToHost, since memory is mapped... + HipTest::checkVectorADD(A_h, B_h, C_h, LEN); + + REQUIRE(flagA == FlagA); + REQUIRE(flagB == FlagB); + REQUIRE(flagC == FlagC); + HIP_CHECK(hipHostFree(A_h)); + HIP_CHECK(hipHostFree(B_h)); + HIP_CHECK(hipHostFree(C_h)); + } +} diff --git a/tests/catch/unit/memory/hipHostMalloc.cc b/tests/catch/unit/memory/hipHostMalloc.cc new file mode 100644 index 0000000000..70459cf9d2 --- /dev/null +++ b/tests/catch/unit/memory/hipHostMalloc.cc @@ -0,0 +1,236 @@ +/* + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* +This testfile verifies the following scenarios of hipHostMalloc API +1. Basic scenario of hipHostMalloc API +2. Negative Scenarios of hipHostMalloc API +3. Allocating memory using hipHostMalloc with Coherent flag +4. Allocating memory using hipHostMalloc with NonCoherent flag +5. Allocating memory using hipHostMalloc with default flag +*/ + +#include +#include +#include + +#define SYNC_EVENT 0 +#define SYNC_STREAM 1 +#define SYNC_DEVICE 2 + +std::vector syncMsg = {"event", "stream", "device"}; +static constexpr int numElements{1024 * 16}; +static constexpr size_t sizeBytes{numElements * sizeof(int)}; + +__global__ void Set(int* Ad, int val) { + int tx = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tx] = val; +} + +void CheckHostPointer(int numElements, int* ptr, unsigned eventFlags, + int syncMethod, std::string msg) { + std::cerr << "test: CheckHostPointer " + << msg + << " eventFlags = " << std::hex << eventFlags + << ((eventFlags & hipEventReleaseToDevice) ? + " hipEventReleaseToDevice" : "") + << ((eventFlags & hipEventReleaseToSystem) ? + " hipEventReleaseToSystem" : "") + << " ptr=" << ptr << " syncMethod=" + << syncMsg[syncMethod] << "\n"; + + hipStream_t s; + hipEvent_t e; + + // Init: + HIP_CHECK(hipStreamCreate(&s)); + HIP_CHECK(hipEventCreateWithFlags(&e, eventFlags)) + dim3 dimBlock(64, 1, 1); + dim3 dimGrid(numElements / dimBlock.x, 1, 1); + + const int expected = 13; + + // Init array to know state: + hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, 0x0, ptr, -42); + HIP_CHECK(hipDeviceSynchronize()); + + hipLaunchKernelGGL(Set, dimGrid, dimBlock, 0, s, ptr, expected); + HIP_CHECK(hipEventRecord(e, s)); + + // Host waits for event : + switch (syncMethod) { + case SYNC_EVENT: + HIP_CHECK(hipEventSynchronize(e)); + break; + case SYNC_STREAM: + HIP_CHECK(hipStreamSynchronize(s)); + break; + case SYNC_DEVICE: + HIP_CHECK(hipDeviceSynchronize()); + break; + default: + assert(0); + } + + for (int i = 0; i < numElements; i++) { + if (ptr[i] != expected) { + printf("mismatch at %d: %d != %d\n", i, ptr[i], expected); + REQUIRE(ptr[i] == expected); + } + } + + HIP_CHECK(hipStreamDestroy(s)); + HIP_CHECK(hipEventDestroy(e)); +} +/* +This testcase performs the basic scenario of hipHostMalloc API +Allocates the memory using hipHostMalloc API +Launches the kernel and performs vector addition. +validates thes result. +*/ +TEST_CASE("Unit_hipHostMalloc_Basic") { + static constexpr auto LEN{1024 * 1024}; + static constexpr auto SIZE{LEN * sizeof(float)}; + + hipDeviceProp_t prop; + int device; + HIP_CHECK(hipGetDevice(&device)); + HIP_CHECK(hipGetDeviceProperties(&prop, device)); + if (prop.canMapHostMemory != 1) { + SUCCEED("Does support HostPinned Memory"); + } else { + float *A_h, *B_h, *C_h; + float *A_d, *B_d, *C_d; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_h), SIZE, + hipHostMallocWriteCombined | hipHostMallocMapped)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&B_h), SIZE, + hipHostMallocDefault)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&C_h), SIZE, + hipHostMallocMapped)); + + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&A_d), A_h, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&C_d), C_h, 0)); + + HipTest::setDefaultData(LEN, A_h, B_h, C_h); + + HIP_CHECK(hipMalloc(reinterpret_cast(&B_d), SIZE)); + HIP_CHECK(hipMemcpy(B_d, B_h, SIZE, hipMemcpyHostToDevice)); + + dim3 dimGrid(LEN / 512, 1, 1); + dim3 dimBlock(512, 1, 1); + hipLaunchKernelGGL(HipTest::vectorADD, dimGrid, dimBlock, + 0, 0, static_cast(A_d), + static_cast(B_d), C_d, LEN); + HIP_CHECK(hipMemcpy(C_h, C_d, LEN*sizeof(float), + hipMemcpyDeviceToHost)); + HIP_CHECK(hipDeviceSynchronize()); + HipTest::checkVectorADD(A_h, B_h, C_h, numElements); + + HIP_CHECK(hipHostFree(A_h)); + HIP_CHECK(hipHostFree(B_h)); + HIP_CHECK(hipHostFree(C_h)); + } +} +/* +This testcase verifies the hipHostMalloc API by passing nullptr +to the pointer variable +*/ +TEST_CASE("Unit_hipHostMalloc_Negative") { +#if HT_AMD + { + // Stimulate error condition: + int* A = nullptr; + REQUIRE(hipHostMalloc(reinterpret_cast(&A), sizeBytes, + hipHostMallocCoherent | hipHostMallocNonCoherent) + != hipSuccess); + REQUIRE(A == nullptr); + } +#endif +} +/* +This testcase verifies the hipHostMalloc API by +1.Allocating memory using noncoherent flag +2. Launches the kernel and modifies the variable + using different synchronization + techniquies +3. validates the result. +*/ +TEST_CASE("Unit_hipHostMalloc_NonCoherent") { + int* A = nullptr; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A), + sizeBytes, hipHostMallocNonCoherent)); + const char* ptrType = "non-coherent"; + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_EVENT, ptrType); +} + +/* +This testcase verifies the hipHostMalloc API by +1.Allocating memory using coherent flag +2. Launches the kernel and modifies the variable + using different synchronization + techniquies +3. validates the result. +*/ +TEST_CASE("Unit_hipHostMalloc_Coherent") { + int* A = nullptr; + if (hipHostMalloc(reinterpret_cast(&A), sizeBytes, + hipHostMallocCoherent) == hipSuccess) { + const char* ptrType = "coherent"; + CheckHostPointer(numElements, A, hipEventReleaseToDevice, + SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToDevice, + SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToDevice, + SYNC_EVENT, ptrType); + + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, hipEventReleaseToSystem, + SYNC_EVENT, ptrType); + } else { + SUCCEED("Coherence memory allocation failed. Is SVM atomic supported?"); + } +} + +/* +This testcase verifies the hipHostMalloc API by +1.Allocating memory using default flag +2. Launches the kernel and modifies the variable + using different synchronization + techniquies +3. validates the result. +*/ +TEST_CASE("Unit_hipHostMalloc_Default") { + int* A = nullptr; + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A), sizeBytes)); + const char* ptrType = "default"; + CheckHostPointer(numElements, A, 0, SYNC_DEVICE, ptrType); + CheckHostPointer(numElements, A, 0, SYNC_STREAM, ptrType); + CheckHostPointer(numElements, A, 0, SYNC_EVENT, ptrType); +} diff --git a/tests/catch/unit/memory/hipHostRegister.cc b/tests/catch/unit/memory/hipHostRegister.cc new file mode 100644 index 0000000000..8312cf7f50 --- /dev/null +++ b/tests/catch/unit/memory/hipHostRegister.cc @@ -0,0 +1,160 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testfile verifies the following scenarios of hipHostRegister API +1. Referencing the hipHostRegister variable from kernel and performing + memset on that variable.This is verified for different datatypes. +2. hipHostRegister and perform hipMemcpy on it. +*/ + +#include +#include +#include + +#define OFFSET 128 +static constexpr auto LEN{1024*1024}; + +template +__global__ void Inc(T* Ad) { + int tx = threadIdx.x + blockIdx.x * blockDim.x; + Ad[tx] = Ad[tx] + static_cast(1); +} + +template +void doMemCopy(size_t numElements, int offset, T* A, T* Bh, T* Bd, + bool internalRegister) { + constexpr auto memsetval = 13.0f; + A = A + offset; + numElements -= offset; + + size_t sizeBytes = numElements * sizeof(T); + + if (internalRegister) { + HIP_CHECK(hipHostRegister(A, sizeBytes, 0)); + } + + // Reset + for (size_t i = 0; i < numElements; i++) { + A[i] = static_cast(i); + Bh[i] = 0.0f; + } + + HIP_CHECK(hipMemset(Bd, memsetval, sizeBytes)); + + HIP_CHECK(hipMemcpy(Bd, A, sizeBytes, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(Bh, Bd, sizeBytes, hipMemcpyDeviceToHost)); + + // Make sure the copy worked + for (size_t i = 0; i < numElements; i++) { + REQUIRE(Bh[i] == A[i]); + } + + if (internalRegister) { + HIP_CHECK(hipHostUnregister(A)); + } +} +/* +This testcase verifies the hipHostRegister API by +1. Allocating the memory using malloc +2. hipHostRegister that variable +3. Getting the corresponding device pointer of the registered varible +4. Launching kernel and access the device pointer variable +5. performing hipMemset on the device pointer variable +*/ +TEMPLATE_TEST_CASE("Unit_hipHostRegister_ReferenceFromKernelandhipMemset", + "", int, + float, double) { + size_t sizeBytes{LEN * sizeof(TestType)}; + TestType *A, **Ad; + int num_devices; + HIP_CHECK(hipGetDeviceCount(&num_devices)); + Ad = new TestType*[num_devices]; + A = reinterpret_cast(malloc(sizeBytes)); + HIP_CHECK(hipHostRegister(A, sizeBytes, 0)); + + for (int i = 0; i < LEN; i++) { + A[i] = static_cast(1); + } + + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&Ad[i]), A, 0)); + } + + // Reference the registered device pointer Ad from inside the kernel: + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + hipLaunchKernelGGL(Inc, dim3(LEN / 512), dim3(512), 0, 0, Ad[i]); + HIP_CHECK(hipDeviceSynchronize()); + } + REQUIRE(A[10] == 1 + static_cast(num_devices)); + // Reference the registered device pointer Ad in hipMemset: + for (int i = 0; i < num_devices; i++) { + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMemset(Ad[i], 0, sizeBytes)); + } + REQUIRE(A[10] == 0); + + HIP_CHECK(hipHostUnregister(A)); + + free(A); + delete [] Ad; +} +/* +This testcase verifies hipHostRegister API by +performing memcpy on the hipHostRegistered variable. +*/ +TEMPLATE_TEST_CASE("Unit_hipHostRegister_Memcpy", "", + int, float, double) { + // 1 refers to hipHostRegister + // 0 refers to malloc + auto mem_type = GENERATE(0, 1); + HIP_CHECK(hipSetDevice(0)); + + + size_t sizeBytes = LEN * sizeof(TestType); + TestType* A = reinterpret_cast(malloc(sizeBytes)); + + // Copy to B, this should be optimal pinned malloc copy: + // Note we are using the host pointer here: + TestType *Bh, *Bd; + Bh = reinterpret_cast(malloc(sizeBytes)); + HIP_CHECK(hipMalloc(&Bd, sizeBytes)); + + REQUIRE(LEN > OFFSET); + if (mem_type) { + for (size_t i = 0; i < OFFSET; i++) { + doMemCopy(LEN, i, A, Bh, Bd, true /*internalRegister*/); + } + } else { + HIP_CHECK(hipHostRegister(A, sizeBytes, 0)); + for (size_t i = 0; i < OFFSET; i++) { + doMemCopy(LEN, i, A, Bh, Bd, false /*internalRegister*/); + } + HIP_CHECK(hipHostUnregister(A)); + } + + free(A); + free(Bh); + hipFree(Bd); +} diff --git a/tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc b/tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc new file mode 100644 index 0000000000..bf67d70765 --- /dev/null +++ b/tests/catch/unit/memory/hipMallocManaged_MultiScenario.cc @@ -0,0 +1,323 @@ +/* + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* + This testcase verifies the hipMallocManaged API in the following scenarios + 1. MultiChunkSingleDevice Scenario + 2. MultiChunkMultiDevice Scenario + 3. Negative Scenarios + 4. OverSubscription scenario + 5. Device context change + 6. Multiple Pointers + */ + +#include +#include +#include +#include + +const size_t MAX_GPU{256}; +static size_t N{4*1024*1024}; +#define INIT_VAL 123 + + +/* + * Kernel function to perform addition operation. + */ +template +__global__ void +vector_sum(T *Ad1, T *Ad2, size_t NUM_ELMTS) { + size_t offset = (blockIdx.x * blockDim.x + threadIdx.x); + size_t stride = blockDim.x * gridDim.x; + + for (size_t i = offset; i < NUM_ELMTS; i += stride) { + Ad2[i] = Ad1[i] + Ad1[i]; + } +} + +// The following Test case tests the following scenario: +// A large chunk of hipMallocManaged() memory(Hmm) is created +// Equal parts of Hmm is accessed and +// kernel is launched on acessed chunk of hmm memory +// and checks if there are any inconsistencies or access issues +TEST_CASE("Unit_hipMallocManaged_MultiChunkSingleDevice") { +std::atomic DataMismatch{0}; + constexpr int Chunks = 4; + int Counter = 0; + int NUM_ELMS = (1024 * 1024); + float *Ad[Chunks], *Hmm = nullptr, *Ah = new float[NUM_ELMS]; + hipStream_t stream[Chunks]; + for (int i = 0; i < Chunks; ++i) { + HIP_CHECK(hipMalloc(&Ad[i], NUM_ELMS * sizeof(float))); + HIP_CHECK(hipMemset(Ad[i], 0, NUM_ELMS * sizeof(float))); + HIP_CHECK(hipStreamCreate(&stream[i])); + } + HIP_CHECK(hipMallocManaged(&Hmm, (Chunks * NUM_ELMS * sizeof(float)))); + for (int i = 0; i < Chunks; ++i) { + for (; Counter < ((i + 1) * NUM_ELMS); ++Counter) { + Hmm[Counter] = (INIT_VAL + i); + } + } + const unsigned threadsPerBlock = 256; + const unsigned blocks = (NUM_ELMS + 255)/256; + for (int k = 0; k < Chunks; ++k) { + vector_sum <<>> + (&Hmm[k * NUM_ELMS], Ad[k], NUM_ELMS); + } + HIP_CHECK(hipDeviceSynchronize()); + for (int m = 0; m < Chunks; ++m) { + HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), + hipMemcpyDeviceToHost)); + for (int n = 0; n < NUM_ELMS; ++n) { + if (Ah[n] != ((INIT_VAL + m) * 2)) { + DataMismatch++; + } + } + } + REQUIRE(DataMismatch.load() == 0); + for (int i = 0; i < Chunks; ++i) { + HIP_CHECK(hipFree(Ad[i])); + HIP_CHECK(hipStreamDestroy(stream[i])); + } + HIP_CHECK(hipFree(Hmm)); + delete [] Ah; +} + +// The following Test case tests the following scenario: +// A large chunk of hipMallocManaged() memory(Hmm) is created +// Equal parts of Hmm is accessed on available gpus and +// kernel is launched on acessed chunk of hmm memory +// and checks if there are any inconsistencies or access issues +TEST_CASE("Unit_hipMallocManaged_MultiChunkMultiDevice") { + std::atomic DataMismatch{0}; + int Counter = 0; + int NumDevices = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevices)); + unsigned int NUM_ELMS = (1024 * 1024); + float *Ad[MAX_GPU], *Hmm = NULL, *Ah = new float[NUM_ELMS]; + hipStream_t stream[MAX_GPU]; + for (int Oloop = 0; Oloop < NumDevices; ++Oloop) { + HIP_CHECK(hipSetDevice(Oloop)); + HIP_CHECK(hipMalloc(&Ad[Oloop], NUM_ELMS * sizeof(float))); + HIP_CHECK(hipMemset(Ad[Oloop], 0, NUM_ELMS * sizeof(float))); + HIP_CHECK(hipStreamCreate(&stream[Oloop])); + } + HIP_CHECK(hipMallocManaged(&Hmm, (NumDevices * NUM_ELMS * sizeof(float)))); + for (int i = 0; i < NumDevices; ++i) { + for (; Counter < static_cast((i + 1) * NUM_ELMS); ++Counter) { + Hmm[Counter] = INIT_VAL + i; + } + } + const unsigned threadsPerBlock = 256; + const unsigned blocks = (NUM_ELMS + 255)/256; + for (int Klaunch = 0; Klaunch < NumDevices; ++Klaunch) { + HIP_CHECK(hipSetDevice(Klaunch)); + vector_sum <<>> + (&Hmm[Klaunch * NUM_ELMS], Ad[Klaunch], NUM_ELMS); + } + HIP_CHECK(hipDeviceSynchronize()); + for (int m = 0; m < NumDevices; ++m) { + HIP_CHECK(hipMemcpy(Ah, Ad[m], NUM_ELMS * sizeof(float), + hipMemcpyDeviceToHost)); + for (size_t n = 0; n < NUM_ELMS; ++n) { + if (Ah[n] != ((INIT_VAL + m) * 2)) { + DataMismatch++; + } + } + memset(reinterpret_cast(Ah), 0, NUM_ELMS * sizeof(float)); + } + REQUIRE(DataMismatch.load() == 0); + for (int i = 0; i < NumDevices; ++i) { + HIP_CHECK(hipFree(Ad[i])); + HIP_CHECK(hipStreamDestroy(stream[i])); + } + HIP_CHECK(hipFree(Hmm)); + delete [] Ah; +} + +// The following tests oversubscription hipMallocManaged() api +// Currently disabled. +TEST_CASE("Unit_hipMallocManaged_OverSubscription") { + void *A = nullptr; + size_t total = 0, free = 0; + HIP_CHECK(hipMemGetInfo(&free, &total)); + // ToDo: In case of HMM, memory over-subscription is allowed. Hence, relook + // into how out of memory can be tested. + // Demanding more mem size than available +#if HT_AMD + REQUIRE(hipMallocManaged(&A, (free +1), hipMemAttachGlobal) != hipSuccess); +#endif +} + +// The following test does negative testing of hipMallocManaged() api +// by passing invalid values and check if the behavior is as expected +TEST_CASE("Unit_hipMallocManaged_Negative") { + void *A; + size_t total = 0, free = 0; + HIP_CHECK(hipMemGetInfo(&free, &total)); + + SECTION("Nullptr to devPtr") { + REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachGlobal) != hipSuccess); + } + + // cuda api doc says : If size is 0, cudaMallocManaged returns + // cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns + // success and contradicts with api doc. + + // With size(0), api expected to return error code (or) + // reset ptr while returning success (to accommodate cuda 11.2 api behavior). + SECTION("size 0 with flag hipMemAttachGlobal") { +#if HT_AMD + REQUIRE(hipMallocManaged(&A, 0, hipMemAttachGlobal) != hipSuccess); +#else + REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess); +#endif + } + + SECTION("devptr is nullptr with flag hipMemAttachHost") { + REQUIRE(hipMallocManaged(NULL, 1024, hipMemAttachHost) != hipSuccess); + } + + // cuda api doc says : If size is 0, cudaMallocManaged returns + // cudaErrorInvalidValue. However, it is observed that cuda 11.2 api returns + // success and contradicts with api doc. + + // With size(0), api expected to return error code (or) + // reset ptr while returning success (to accommodate cuda 11.2 api behavior). + SECTION("size 0 with flag hipMemAttachHost") { +#if HT_AMD + REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) != hipSuccess); +#else + REQUIRE(hipMallocManaged(&A, 0, hipMemAttachHost) == hipSuccess); +#endif + } + SECTION("nullptr to devptr, size 0 and flag 0") { + REQUIRE(hipMallocManaged(NULL, 0, 0) != hipSuccess); + } + + SECTION("Numeric value to flag parameter") { + REQUIRE(hipMallocManaged(&A, 1024, 145) != hipSuccess); + } + + SECTION("Negative value to size") { + REQUIRE(hipMallocManaged(&A, -10, hipMemAttachGlobal)); + } +} + +// Allocate two pointers using hipMallocManaged(), initialize, +// then launch kernel using these pointers directly and +// later validate the content without using any Memcpy. +TEMPLATE_TEST_CASE("Unit_hipMallocManaged_TwoPointers", "", + int, float, double) { + int NumDevices = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevices)); + TestType *Hmm1 = nullptr, *Hmm2 = nullptr; + + for (int i = 0; i < NumDevices; ++i) { + HIP_CHECK(hipSetDevice(i)); + std::atomic DataMismatch{0}; + HIP_CHECK(hipMallocManaged(&Hmm1, N * sizeof(TestType))); + HIP_CHECK(hipMallocManaged(&Hmm2, N * sizeof(TestType))); + for (size_t m = 0; m < N; ++m) { + Hmm1[m] = m; + Hmm2[m] = 0; + } + const unsigned threadsPerBlock = 256; + const unsigned blocks = (N + 255)/256; + // Kernel launch + vector_sum <<>> (Hmm1, Hmm2, N); + HIP_CHECK(hipDeviceSynchronize()); + for (size_t v = 0; v < N; ++v) { + if (Hmm2[v] != static_cast(v + v)) { + DataMismatch++; + } + } + REQUIRE(DataMismatch.load() == 0); + HIP_CHECK(hipFree(Hmm1)); + HIP_CHECK(hipFree(Hmm2)); + } +} + +// In the following test, a memory is created using hipMallocManaged() by +// setting a device and verified if it is accessible when the context is set +// to all other devices. This include verification and Device two Device +// transfers and kernel launch o discover if there any access issues. + +TEMPLATE_TEST_CASE("Unit_hipMallocManaged_DeviceContextChange", "", + unsigned char, int, float, double) { + std::atomic DataMismatch; + TestType *Ah1 = new TestType[N], *Ah2 = new TestType[N], *Ad = nullptr, + *Hmm = nullptr; + int NumDevices = 0; + HIP_CHECK(hipGetDeviceCount(&NumDevices)); + + for (size_t i =0; i < N; ++i) { + Ah1[i] = INIT_VAL; + Ah2[i] = 0; + } + for (int Oloop = 0; Oloop < NumDevices; ++Oloop) { + DataMismatch = 0; + HIP_CHECK(hipSetDevice(Oloop)); + HIP_CHECK(hipMallocManaged(&Hmm, N * sizeof(TestType))); + for (int Iloop = 0; Iloop < NumDevices; ++Iloop) { + HIP_CHECK(hipSetDevice(Iloop)); + HIP_CHECK(hipMalloc(&Ad, N * sizeof(TestType))); + // Copy data from host to hipMallocMananged memory and verify + HIP_CHECK(hipMemcpy(Hmm, Ah1, N * sizeof(TestType), + hipMemcpyHostToDevice)); + for (size_t v = 0; v < N; ++v) { + if (Hmm[v] != INIT_VAL) { + DataMismatch++; + } + } + REQUIRE(DataMismatch.load() == 0); + + // Executing D2D transfer with hipMallocManaged memory and verify + HIP_CHECK(hipMemcpy(Ad, Hmm, N * sizeof(TestType), + hipMemcpyDeviceToDevice)); + HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), + hipMemcpyDeviceToHost)); + for (size_t k = 0; k < N; ++k) { + if (Ah2[k] != INIT_VAL) { + DataMismatch++; + } + } + REQUIRE(DataMismatch.load() == 0); + HIP_CHECK(hipMemset(Ad, 0, N * sizeof(TestType))); + const unsigned threadsPerBlock = 256; + const unsigned blocks = (N + 255)/256; + // Launching the kernel to check if there is any access issue with + // hipMallocManaged memory and local device's memory + vector_sum <<>> (Hmm, Ad, N); + hipDeviceSynchronize(); + HIP_CHECK(hipMemcpy(Ah2, Ad, N * sizeof(TestType), + hipMemcpyDeviceToHost)); + for (size_t m = 0; m < N; ++m) { + if (Ah2[m] != 246) { + DataMismatch++; + } + } + REQUIRE(DataMismatch.load() == 0); + HIP_CHECK(hipFree(Ad)); + } + HIP_CHECK(hipFree(Hmm)); + } + free(Ah1); + free(Ah2); +} diff --git a/tests/catch/unit/memory/hipManagedKeyword.cc b/tests/catch/unit/memory/hipManagedKeyword.cc new file mode 100644 index 0000000000..f5e013b7ba --- /dev/null +++ b/tests/catch/unit/memory/hipManagedKeyword.cc @@ -0,0 +1,75 @@ +/* + Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR + IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/* + This testcase verifies the hipManagedKeyword basic scenario + */ + +#include +#include + +#define N 1048576 +__managed__ float A[N]; // Accessible by ALL CPU and GPU functions !!! +__managed__ float B[N]; +__managed__ int x = 0; + +__global__ void add(const float *A, float *B) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < N; i += stride) + B[i] = A[i] + B[i]; +} + +__global__ void GPU_func() { + x++; +} + +TEST_CASE("Unit_hipManagedKeyword_SingleGpu") { + for (int i = 0; i < N; i++) { + A[i] = 1.0f; + B[i] = 2.0f; + } + + int blockSize = 256; + int numBlocks = (N + blockSize - 1) / blockSize; + dim3 dimGrid(numBlocks, 1, 1); + dim3 dimBlock(blockSize, 1, 1); + hipLaunchKernelGGL(add, dimGrid, dimBlock, 0, 0, static_cast(A), + static_cast(B)); + + hipDeviceSynchronize(); + + float maxError = 0.0f; + for (int i = 0; i < N; i++) + maxError = fmax(maxError, fabs(B[i]-3.0f)); + + REQUIRE(maxError == 0.0f); +} + +TEST_CASE("Unit_hipManagedKeyword_MultiGpu") { + int numDevices = 0; + hipGetDeviceCount(&numDevices); + + for (int i = 0; i < numDevices; i++) { + hipSetDevice(i); + GPU_func<<< 1, 1 >>>(); + hipDeviceSynchronize(); + } + REQUIRE(x == numDevices); +} diff --git a/tests/catch/unit/memory/hipMemPtrGetInfo.cc b/tests/catch/unit/memory/hipMemPtrGetInfo.cc new file mode 100644 index 0000000000..0e737efb9c --- /dev/null +++ b/tests/catch/unit/memory/hipMemPtrGetInfo.cc @@ -0,0 +1,54 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testfile verifies the basic scenario of hipMemPtrGetInfo API +*/ +#include +struct MemInfo{ + float a; + int b; + void* c; +}; + +/* +This testcase verifies the basic scenario of +hipMemPtrGetInfo API +1. Allocates specific size of memory for the variables +2. Gets the allocated size of that variable using hipMemPtrGetInfo API +3. Validates the initial size and allocated size +*/ +TEST_CASE("Unit_hipMemPtrGetInfo_Basic") { + int* iPtr; + float* fPtr; + MemInfo* sPtr; + size_t sSetSize = 1024, sGetSize; + HIP_CHECK(hipMalloc(&iPtr, sSetSize)); + HIP_CHECK(hipMalloc(&fPtr, sSetSize)); + HIP_CHECK(hipMalloc(&sPtr, sSetSize)); + HIP_CHECK(hipMemPtrGetInfo(iPtr, &sGetSize)); + REQUIRE(sGetSize == sSetSize); + HIP_CHECK(hipMemPtrGetInfo(fPtr, &sGetSize)); + REQUIRE(sGetSize == sSetSize); + HIP_CHECK(hipMemPtrGetInfo(sPtr, &sGetSize)); + REQUIRE(sGetSize == sSetSize); +} diff --git a/tests/catch/unit/memory/hipMemcpy.cc b/tests/catch/unit/memory/hipMemcpy.cc index 71737538a0..8063857812 100644 --- a/tests/catch/unit/memory/hipMemcpy.cc +++ b/tests/catch/unit/memory/hipMemcpy.cc @@ -512,23 +512,6 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpy_H2H-H2D-D2H-H2PinMem", "", int, HipTest::freeArrays(nullptr, nullptr, nullptr, A_Ph, B_Ph, nullptr, true); } -/* -This testcase verfies the boundary checks of hipMemcpy API for different sizes -*/ -TEST_CASE("Unit_hipMemcpy_BoundaryCheck") { - size_t maxElem = 32 * 1024 * 1024; - DeviceMemory memD(maxElem); - HostMemory memU(maxElem, 0 /*usePinnedHost*/); - HostMemory memP(maxElem, 0 /*usePinnedHost*/); - memcpytest2(&memD, &memU, 32 * 1024 * 1024, 0, 0, 0); - auto sizes = GENERATE(15 * 1024 * 1024, 16 * 1024 * 1024, - 16 * 1024 * 1024 + 16 * 1024, - 16 * 1024 * 1024 + 512 * 1024, - 17 * 1024 * 1024 + 1024, - 32 * 1024 * 1024); - memcpytest2(&memD, &memP, sizes, 0, 0, 0); -} - /* This testcase verifies the multi thread scenario */ @@ -543,16 +526,6 @@ TEST_CASE("Unit_hipMemcpy_MultiThreadWithSerialization") { multiThread_1(true, false); } -/* -This testcase verifies the device offsets -*/ -TEMPLATE_TEST_CASE("Unit_hipMemcpy_DeviceOffsets", "", float, double) { - HIP_CHECK(hipDeviceReset()); - size_t maxSize = 256 * 1024; - memcpytest2_offsets(maxSize, true, false); - memcpytest2_offsets(maxSize, false, true); -} - /* This testcase verifies hipMemcpy API with pinnedMemory and hostRegister along with kernel launches diff --git a/tests/catch/unit/memory/hipMemcpyParam2DAsync.cc b/tests/catch/unit/memory/hipMemcpyParam2DAsync.cc index 5683f5c15c..182d064f99 100644 --- a/tests/catch/unit/memory/hipMemcpyParam2DAsync.cc +++ b/tests/catch/unit/memory/hipMemcpyParam2DAsync.cc @@ -71,13 +71,14 @@ TEMPLATE_TEST_CASE("Unit_hipMemcpyParam2DAsync_multiDevice-StreamOnDiffDevice", &pitch_E, width, NUM_H)); // Initalizing A_d with C_h - HIP_CHECK(hipMemcpy2D(A_d, pitch_A, C_h, width, - NUM_W*sizeof(TestType), NUM_H, - hipMemcpyHostToDevice)); HIP_CHECK(hipSetDevice(1)); hipStream_t stream; hipStreamCreate(&stream); + HIP_CHECK(hipMemcpy2DAsync(A_d, pitch_A, C_h, width, + NUM_W*sizeof(TestType), NUM_H, + hipMemcpyHostToDevice, stream)); + HIP_CHECK(hipStreamSynchronize(stream)); // Device to Device hip_Memcpy2D desc = {}; #ifdef __HIP_PLATFORM_NVCC__ diff --git a/tests/catch/unit/memory/hipMemcpyWithStream.cc b/tests/catch/unit/memory/hipMemcpyWithStream.cc index 275d98ff19..f7e1be6001 100644 --- a/tests/catch/unit/memory/hipMemcpyWithStream.cc +++ b/tests/catch/unit/memory/hipMemcpyWithStream.cc @@ -33,7 +33,6 @@ This testfile verifies the following scenarios 7. hipMemcpyWithStream API with testkind TestkindDefault 8. hipMemcpyWithStream API with testkind TestkindDefaultForDtoD 9. hipMemcpyWithStream API DtoD on same device -10.Multi threaded scenario */ @@ -540,55 +539,6 @@ void TestkindHtoH(void) { HIP_CHECK(hipStreamDestroy(stream)); } -TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") { - size_t thread_count = 10; - std::vector threads; - int deviceCount = 0; - HIP_CHECK(hipGetDeviceCount(&deviceCount)); - if (deviceCount < 2) { - SUCCEED("deviceCount < 2"); - } else { - for (int op = static_cast(ops::TestwithOnestream); - op < static_cast(ops::END_OF_LIST); ++op) { - for (uint32_t i = 0; i < thread_count; i++) { - threads.emplace_back(std::thread{[&] { - switch ( op ) { - case static_cast(ops::TestwithOnestream): - TestwithOnestream(); - break; - case static_cast(ops::TestwithTwoStream): - TestwithTwoStream(); - break; - case static_cast(ops::TestkindDtoH): - TestkindDtoH(); - break; - case static_cast(ops::TestkindHtoH): - TestkindHtoH(); - break; - case static_cast(ops::TestkindDtoD): - TestkindDtoD(); - break; - case static_cast(ops::TestOnMultiGPUwithOneStream): - TestOnMultiGPUwithOneStream(); - break; - case static_cast(ops::TestkindDefault): - TestkindDefault(); - break; -#ifndef __HIP_PLATFORM_NVCC__ - case static_cast(ops::TestkindDefaultForDtoD): - TestkindDefaultForDtoD(); - break; -#endif - case static_cast(ops::TestDtoDonSameDevice): - TestDtoDonSameDevice(); - break; - default:{} - } - }}); - } - } - } -} TEST_CASE("Unit_hipMemcpyWithStream_TestWithOneStream") { TestwithOnestream(); diff --git a/tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc b/tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc index 3f0c28239b..435a94ac2b 100644 --- a/tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc +++ b/tests/catch/unit/memory/hipMemcpyWithStreamMultiThread.cc @@ -610,8 +610,8 @@ void HipMemcpyWithStreamMultiThreadtests::TestkindHtoH(bool &val_res) { HIPCHECK(hipStreamDestroy(stream)); } -TEST_CASE("Unit_hipMemcpyWithStream_NewMultiThread") { - const auto Threadcount{100}; +TEST_CASE("Unit_hipMemcpyWithStream_MultiThread") { + const auto Threadcount{10}; bool ret_val[Threadcount]; std::thread th[Threadcount]; for (int op = static_cast(ops::TestwithOnestream); diff --git a/tests/catch/unit/memory/hipMemcpy_MultiThread.cc b/tests/catch/unit/memory/hipMemcpy_MultiThread.cc index 1e8f7e46a8..1dad378a6c 100644 --- a/tests/catch/unit/memory/hipMemcpy_MultiThread.cc +++ b/tests/catch/unit/memory/hipMemcpy_MultiThread.cc @@ -27,7 +27,7 @@ This testfile verifies the following scenarios of all hipMemcpy API 1. Multi thread */ static constexpr auto NUM_ELM{1024}; -static constexpr auto NUM_THREADS{10}; +static constexpr auto NUM_THREADS{5}; static auto Available_Gpus{0}; static constexpr auto MAX_GPU{256}; diff --git a/tests/catch/unit/memory/hipMemoryAllocateCoherent.cc b/tests/catch/unit/memory/hipMemoryAllocateCoherent.cc new file mode 100644 index 0000000000..0d14539191 --- /dev/null +++ b/tests/catch/unit/memory/hipMemoryAllocateCoherent.cc @@ -0,0 +1,64 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANNTY OF ANY KIND, EXPRESS OR +IMPLIED, INNCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANNY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER INN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR INN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +This testcase verifies the following scenario +1. Allocating the memory and modifying it coherently +*/ + +#include +#include +#include + +constexpr auto wait_sec = 5000; + +__global__ void Kernel(float* hostRes, int clkRate) { + int tid = threadIdx.x + blockIdx.x * blockDim.x; + hostRes[tid] = tid + 1; + __threadfence_system(); + // expecting that the data is getting flushed to host here! + uint64_t start = clock64()/clkRate, cur; + if (clkRate > 1) { + do { cur = clock64()/clkRate-start;}while (cur < wait_sec); + } else { + do { cur = clock64()/start;}while (cur < wait_sec); + } +} + +TEST_CASE("Unit_hipHostMalloc_CoherentAccess") { + int blocks = 2; + float* hostRes; + hipHostMalloc(&hostRes, blocks * sizeof(float), + hipHostMallocMapped); + hostRes[0] = 0; + hostRes[1] = 0; + int clkRate; + HIP_CHECK(hipDeviceGetAttribute(&clkRate, hipDeviceAttributeClockRate, 0)); + std::cout << clkRate << std::endl; + hipLaunchKernelGGL(HIP_KERNEL_NAME(Kernel), dim3(1), dim3(blocks), + 0, 0, hostRes, clkRate); + int eleCounter = 0; + while (eleCounter < blocks) { + // blocks until the value changes + while (hostRes[eleCounter] == 0) {printf("waiting for counter inc\n");} + eleCounter++; + } + hipHostFree(reinterpret_cast(hostRes)); +} + diff --git a/tests/catch/unit/memory/hipPointerGetAttributes.cc b/tests/catch/unit/memory/hipPointerGetAttributes.cc new file mode 100644 index 0000000000..b14ea9858e --- /dev/null +++ b/tests/catch/unit/memory/hipPointerGetAttributes.cc @@ -0,0 +1,338 @@ +/* +Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* +Following scenarios are verified for hipPointerGetAttributes API +1. Run through a couple simple cases to test lookups host pointer arithmetic +2. Allocates memory across all devices withing the specified size range +3. Allocates tiny memory across all devices +4. Multi-threaded test with many simul allocs. + +*/ +#include +#include +#include +#include + +size_t Nbytes = 0; +constexpr size_t N{1000000}; + + + +//================================================================================================= +// Utility Functions: +//================================================================================================= + +bool operator==(const hipPointerAttribute_t& lhs, + const hipPointerAttribute_t& rhs) { + return ((lhs.hostPointer == rhs.hostPointer) && + (lhs.devicePointer == rhs.devicePointer) && + (lhs.memoryType == rhs.memoryType) && (lhs.device == rhs.device) && + (lhs.allocationFlags == rhs.allocationFlags)); +} + + +bool operator!=(const hipPointerAttribute_t& lhs, + const hipPointerAttribute_t& rhs) { + return !(lhs == rhs); +} + + +const char* memoryTypeToString(hipMemoryType memoryType) { + switch (memoryType) { + case hipMemoryTypeHost: + return "[Host]"; + case hipMemoryTypeDevice: + return "[Device]"; + default: + return "[Unknown]"; + } +} + + +void resetAttribs(hipPointerAttribute_t* attribs) { + attribs->hostPointer = reinterpret_cast(-1); + attribs->devicePointer = reinterpret_cast(-1); + attribs->memoryType = hipMemoryTypeHost; + attribs->device = -2; + attribs->isManaged = -1; + attribs->allocationFlags = 0xffff; +} + + +void printAttribs(const hipPointerAttribute_t* attribs) { + printf( + "hostPointer:%p devicePointer:%p memType:%s deviceId:%d isManaged:%d " + "allocationFlags:%u\n", + attribs->hostPointer, attribs->devicePointer, + memoryTypeToString(attribs->memoryType), + attribs->device, attribs->isManaged, attribs->allocationFlags); +} + + +inline int zrand(int max) { return rand() % max; } + + + +// Store the hipPointer attrib and some extra info +// so can later compare the looked-up info against +// the reference expectation +struct SuperPointerAttribute { + void* _pointer; + size_t _sizeBytes; + hipPointerAttribute_t _attrib; +}; + + +// Support function to check result against a reference: +void checkPointer(const SuperPointerAttribute& ref, int major, + int minor, void* pointer) { + hipPointerAttribute_t attribs; + resetAttribs(&attribs); + + hipError_t e = hipPointerGetAttributes(&attribs, pointer); + if ((e != hipSuccess) || (attribs != ref._attrib)) { + HIP_CHECK(e); + REQUIRE(attribs != ref._attrib); + } else { + printf("#%4d.%d GOOD:%p getattr :: ", major, minor, pointer); + printAttribs(&attribs); + } +} + + +// Test that allocates memory across all devices withing the +// specified size range +// (minSize...maxSize). Then does lookups to make sure the +// info reported by the tracker matches +// expecations Then deallocates it all. +// Multiple threads can call this function and in fact +// we do this in the testMultiThreaded_1 test. +void clusterAllocs(int numAllocs, size_t minSize, size_t maxSize) { + Nbytes = N * sizeof(char); + printf("clusterAllocs numAllocs=%d size=%lu..%lu\n", + numAllocs, minSize, maxSize); + const int Max_Devices = 256; + std::vector reference(numAllocs); + + REQUIRE(minSize > 0); + REQUIRE(maxSize >= minSize); + + int numDevices; + HIP_CHECK(hipGetDeviceCount(&numDevices)); + + //--- + // Populate with device and host allocations. + size_t totalDeviceAllocated[Max_Devices]; + for (int i = 0; i < numDevices; i++) { + totalDeviceAllocated[i] = 0; + } + for (int i = 0; i < numAllocs; i++) { + unsigned rand_seed = time(NULL); + bool isDevice = rand_r(&rand_seed) & 0x1; + reference[i]._sizeBytes = zrand(maxSize - minSize) + minSize; + + reference[i]._attrib.device = zrand(numDevices); + HIP_CHECK(hipSetDevice(reference[i]._attrib.device)); + reference[i]._attrib.isManaged = 0; + + void* ptr; + if (isDevice) { + totalDeviceAllocated[reference[i]._attrib.device] += + reference[i]._sizeBytes; + HIP_CHECK(hipMalloc(reinterpret_cast(&ptr), + reference[i]._sizeBytes)); + reference[i]._attrib.memoryType = hipMemoryTypeDevice; + reference[i]._attrib.devicePointer = ptr; + reference[i]._attrib.hostPointer = NULL; + reference[i]._attrib.allocationFlags = 0; + } else { + HIP_CHECK(hipHostMalloc(reinterpret_cast(&ptr), + reference[i]._sizeBytes, + hipHostMallocDefault)); + reference[i]._attrib.memoryType = hipMemoryTypeHost; + reference[i]._attrib.devicePointer = ptr; + reference[i]._attrib.hostPointer = ptr; + reference[i]._attrib.allocationFlags = 0; + } + reference[i]._pointer = ptr; + } + + for (int i = 0; i < numDevices; i++) { + size_t free, total; + HIP_CHECK(hipSetDevice(i)); + HIP_CHECK(hipMemGetInfo(&free, &total)); + printf( + " device#%d: hipMemGetInfo: " + "free=%zu (%4.2fMB) totalDevice=%lu (%4.2fMB) total=%zu " + "(%4.2fMB)\n", + i, free, (free / 1024.0 / 1024.0), totalDeviceAllocated[i], + (totalDeviceAllocated[i]) / 1024.0 / 1024.0, total, + (total / 1024.0 / 1024.0)); + REQUIRE(free + totalDeviceAllocated[i] <= total); + } + + // Now look up each pointer we inserted and verify we can find it: + char * ptr; + for (int i = 0; i < numAllocs; i++) { + SuperPointerAttribute& ref = reference[i]; + ptr = static_cast(ref._pointer); + checkPointer(ref, i, 0, ref._pointer); + checkPointer(ref, i, 1, (ptr + + ref._sizeBytes / 2)); + if (ref._sizeBytes > 1) { + checkPointer(ref, i, 2, (ptr + + ref._sizeBytes - 1)); + } + + if (ref._attrib.memoryType == hipMemoryTypeDevice) { + hipFree(ref._pointer); + } else { + hipHostFree(ref._pointer); + } + } +} + +//======================================================================== +// Functions to run tests +//======================================================================= +//-- +// Run through a couple simple cases to test lookups host pointer arithmetic: +TEST_CASE("Unit_hipPointerGetAttributes_Basic") { + HIP_CHECK(hipSetDevice(0)); + Nbytes = N * sizeof(char); + printf("\n"); + printf("=============================================================\n"); + printf("Simple Tests\n"); + printf("=============================================================\n"); + + char* A_d; + char* A_Pinned_h; + char* A_OSAlloc_h; + hipError_t e; + + HIP_CHECK(hipMalloc(&A_d, Nbytes)); + HIP_CHECK(hipHostMalloc(reinterpret_cast(&A_Pinned_h), Nbytes, + hipHostMallocDefault)); + A_OSAlloc_h = reinterpret_cast(malloc(Nbytes)); + + size_t free, total; + HIP_CHECK(hipMemGetInfo(&free, &total)); + printf("hipMemGetInfo: free=%zu (%4.2f) Nbytes=%lu total=%zu (%4.2f)\n", free, + (free / 1024.0 / 1024.0), Nbytes, total, + (total / 1024.0 / 1024.0)); + REQUIRE(free + Nbytes <= total); + + + hipPointerAttribute_t attribs; + hipPointerAttribute_t attribs2; + + // Device memory + printf("\nDevice memory (hipMalloc)\n"); + HIP_CHECK(hipPointerGetAttributes(&attribs, A_d)); + + // Check pointer arithmetic cases: + resetAttribs(&attribs2); + HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + 100)); + char *ptr = reinterpret_cast(attribs.devicePointer); + REQUIRE(ptr + 100 == + reinterpret_cast(attribs2.devicePointer)); + + // Corner case at end of array: + resetAttribs(&attribs2); + HIP_CHECK(hipPointerGetAttributes(&attribs2, A_d + Nbytes - 1)); + REQUIRE((ptr + Nbytes - 1) == + reinterpret_cast(attribs2.devicePointer)); + + // Pointer just beyond array must be invalid or at least a different pointer + resetAttribs(&attribs2); + e = hipPointerGetAttributes(&attribs2, A_d + Nbytes + 1); + if (e != hipErrorInvalidValue) { + // We might have strayed into another pointer area. + REQUIRE(reinterpret_cast(ptr) != + reinterpret_cast(attribs2.devicePointer)); + } + + + resetAttribs(&attribs2); + e = hipPointerGetAttributes(&attribs2, A_d + Nbytes); + if (e != hipErrorInvalidValue) { + REQUIRE(attribs.devicePointer != attribs2.devicePointer); + } + hipFree(A_d); + e = hipPointerGetAttributes(&attribs, A_d); + REQUIRE(e == hipErrorInvalidValue); + + // Device-visible host memory + printf("\nDevice-visible host memory (hipHostMalloc)\n"); + HIP_CHECK(hipPointerGetAttributes(&attribs, A_Pinned_h)); + + resetAttribs(&attribs2); + HIP_CHECK(hipPointerGetAttributes(&attribs2, A_Pinned_h + Nbytes / 2)); + char *ptr1 = reinterpret_cast(attribs.hostPointer); + REQUIRE((ptr1 + Nbytes / 2) + == reinterpret_cast(attribs2.hostPointer)); + + + hipHostFree(A_Pinned_h); + e = hipPointerGetAttributes(&attribs, A_Pinned_h); + REQUIRE(e == hipErrorInvalidValue); + + // OS memory + printf("\nOS-allocated memory (malloc)\n"); + e = hipPointerGetAttributes(&attribs, A_OSAlloc_h); + REQUIRE(e == hipErrorInvalidValue); +} + +TEST_CASE("Unit_hipPointerGetAttributes_ClusterAlloc") { + srand(0x100); + printf("\n=============================================\n"); + clusterAllocs(100, 1024 * 1, 1024 * 1024); +} + +TEST_CASE("Unit_hipPointerGetAttributes_TinyClusterAlloc") { + srand(0x200); + printf("\n=============================================\n"); + clusterAllocs(1000, 1, 10); // Many tiny allocations; +} + +// Multi-threaded test with many simul allocs. +// IN : serialize will force the test to run in serial fashion. +TEST_CASE("Unit_hipPointerGetAttributes_MultiThread") { + srand(0x300); + auto serialize = 1; + printf("\n=============================================\n"); + printf("MultiThreaded_1\n"); + if (serialize) printf("[SERIALIZE]\n"); + printf("===============================================\n"); + std::thread t1(clusterAllocs, 1000, 101, 1000); + if (serialize) t1.join(); + + std::thread t2(clusterAllocs, 1000, 11, 100); + if (serialize) t2.join(); + + std::thread t3(clusterAllocs, 1000, 5, 10); + if (serialize) t3.join(); + + std::thread t4(clusterAllocs, 1000, 1, 4); + if (serialize) t4.join(); +}