/* Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // Simple test for memset. // Also serves as a template for other tests. /* HIT_START * BUILD: %t %s test_common.cpp * RUN: %t EXCLUDE_HIP_PLATFORM all * RUN: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM all * RUN: %t --mirrorPeers EXCLUDE_HIP_PLATFORM all * HIT_END */ #include "hip/hip_runtime.h" #include "test_common.h" bool p_memcpyWithPeer = false; // use the peer device for the P2P copy bool p_mirrorPeers = false; // in addition to mapping current to peer space, map peer to current space. int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1. int g_currentDevice; int g_peerDevice; void help(char *argv[]) { printf ("usage: %s [OPTIONS]\n", argv[0]); printf (" --memcpyWithPeer : Perform memcpy with peer.\n"); printf (" --mirrorPeers : Mirror memory onto both default device and peerdevice. If 0, memory is mapped only on the default device.\n"); printf (" --peerDevice N : Set peer device.\n"); }; static hipError_t myHipMemcpy(void *dest, const void *src, size_t sizeBytes, hipMemcpyKind kind, hipStream_t stream, bool async) { if (async) { hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream); //HIPCHECK(hipStreamSynchronize(stream)); return (e); } else { return hipMemcpy(dest, src, sizeBytes, kind); }; } void parseMyArguments(int argc, char *argv[]) { int more_argc = HipTest::parseStandardArguments(argc, argv, false); // parse args for this test: for (int i = 1; i < more_argc; i++) { const char *arg = argv[i]; if (!strcmp(arg, "--help")) { help(argv); exit(-1); } else if (!strcmp(arg, "--memcpyWithPeer")) { p_memcpyWithPeer = true; } else if (!strcmp(arg, "--mirrorPeers")) { p_mirrorPeers = true; } else if (!strcmp(arg, "--peerDevice")) { if (++i >= argc || !HipTest::parseInt(argv[i], &p_peerDevice)) { failed("Bad peerDevice argument"); } } else { failed("Bad argument '%s'", arg); } }; }; void syncBothDevices() { int saveDevice; HIPCHECK(hipGetDevice(&saveDevice)); HIPCHECK(hipSetDevice(g_currentDevice)); HIPCHECK(hipDeviceSynchronize()); HIPCHECK(hipSetDevice(g_peerDevice)); HIPCHECK(hipDeviceSynchronize()); HIPCHECK(hipSetDevice(saveDevice)); } // Sets globals g_currentDevice, g_peerDevice void setupPeerTests() { int deviceCnt; HIPCHECK(hipGetDeviceCount(&deviceCnt)); g_currentDevice = p_gpuDevice; g_peerDevice = (p_peerDevice == -1) ? ((g_currentDevice + 1) % deviceCnt) : p_peerDevice; printf ("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, g_currentDevice, g_peerDevice, deviceCnt); // Must be on a multi-gpu system: assert (g_currentDevice != g_peerDevice); int canAccessPeer; HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_currentDevice, g_peerDevice)); printf ("dev#%d canAccessPeer:#%d=%d\n", g_currentDevice, g_peerDevice, canAccessPeer); assert(canAccessPeer); HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK(hipDeviceReset()); HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK(hipDeviceReset()); } //--- // Test which enables peer2peer first, then allocates the memory. void enablePeerFirst(bool useAsyncCopy) { printf ("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy); setupPeerTests(); // Always enable g_currentDevice to see the allocations on peerDevice. HIPCHECK(hipSetDevice(g_currentDevice)); HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0)); if (p_mirrorPeers) { // Mirror peers allows the peer device to see the allocations on currentDevice. int canAccessPeer; HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice)); assert(canAccessPeer); HIPCHECK(hipSetDevice(g_peerDevice)); HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0)); } size_t Nbytes = N*sizeof(char); char *A_d0, *A_d1; char *A_h; A_h = (char*)malloc(Nbytes); // allocate and initialize memory on device0 HIPCHECK (hipSetDevice(g_currentDevice)); HIPCHECK (hipMalloc(&A_d0, Nbytes) ); HIPCHECK (hipMemset(A_d0, memsetval, Nbytes) ); // allocate and initialize memory on peer device HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (hipMalloc(&A_d1, Nbytes) ); HIPCHECK (hipMemset(A_d1, 0x13, Nbytes) ); // Device0 push to device1, using P2P: // NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for A_d1 and we need to use a // a host staging copy for the P2P access. HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice)); HIPCHECK (myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault, 0/*stream*/, useAsyncCopy)); // This is P2P copy. // Copy data back to host: // Have to wait for previous operation to finish, since we are switching to another one: HIPCHECK(hipDeviceSynchronize()); HIPCHECK (hipSetDevice(g_peerDevice)); HIPCHECK (myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0/*stream*/, useAsyncCopy)); HIPCHECK(hipDeviceSynchronize()); HIPCHECK (hipSetDevice(g_currentDevice)); // Check host data: for (int i=0; i