bdb3a4b393
Change-Id: I67194ccf77a0019368579ff7d95b7790fcf228f3
425 wiersze
14 KiB
C++
425 wiersze
14 KiB
C++
/*
|
|
Copyright (c) 2015-2017 Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
*/
|
|
// Simple test for memset.
|
|
// Also serves as a template for other tests.
|
|
|
|
/* HIT_START
|
|
* BUILD: %t %s ../test_common.cpp
|
|
* TEST: %t EXCLUDE_HIP_PLATFORM hcc vdi
|
|
* TEST: %t --memcpyWithPeer EXCLUDE_HIP_PLATFORM hcc vdi
|
|
* TEST: %t --mirrorPeers EXCLUDE_HIP_PLATFORM hcc vdi
|
|
* HIT_END
|
|
*/
|
|
|
|
#include "hip/hip_runtime.h"
|
|
#include "test_common.h"
|
|
|
|
bool p_memcpyWithPeer = false; // use the peer device for the P2P copy
|
|
bool p_mirrorPeers =
|
|
false; // in addition to mapping current to peer space, map peer to current space.
|
|
int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDevice + 1.
|
|
|
|
|
|
int g_currentDevice;
|
|
int g_peerDevice;
|
|
|
|
void help(char* argv[]) {
|
|
printf("usage: %s [OPTIONS]\n", argv[0]);
|
|
printf(" --memcpyWithPeer : Perform memcpy with peer.\n");
|
|
printf(
|
|
" --mirrorPeers : Mirror memory onto both default device and peerdevice. If 0, memory "
|
|
"is mapped only on the default device.\n");
|
|
printf(" --peerDevice N : Set peer device.\n");
|
|
};
|
|
|
|
|
|
static hipError_t myHipMemcpy(void* dest, const void* src, size_t sizeBytes, hipMemcpyKind kind,
|
|
hipStream_t stream, bool async) {
|
|
if (async) {
|
|
hipError_t e = hipMemcpyAsync(dest, src, sizeBytes, kind, stream);
|
|
// HIPCHECK(hipStreamSynchronize(stream));
|
|
return (e);
|
|
} else {
|
|
return hipMemcpy(dest, src, sizeBytes, kind);
|
|
};
|
|
}
|
|
|
|
|
|
void parseMyArguments(int argc, char* argv[]) {
|
|
int more_argc = HipTest::parseStandardArguments(argc, argv, false);
|
|
// parse args for this test:
|
|
for (int i = 1; i < more_argc; i++) {
|
|
const char* arg = argv[i];
|
|
|
|
if (!strcmp(arg, "--help")) {
|
|
help(argv);
|
|
exit(-1);
|
|
} else if (!strcmp(arg, "--memcpyWithPeer")) {
|
|
p_memcpyWithPeer = true;
|
|
} else if (!strcmp(arg, "--mirrorPeers")) {
|
|
p_mirrorPeers = true;
|
|
} else if (!strcmp(arg, "--peerDevice")) {
|
|
if (++i >= argc || !HipTest::parseInt(argv[i], &p_peerDevice)) {
|
|
failed("Bad peerDevice argument");
|
|
}
|
|
} else {
|
|
failed("Bad argument '%s'", arg);
|
|
}
|
|
};
|
|
};
|
|
|
|
void syncBothDevices() {
|
|
int saveDevice;
|
|
HIPCHECK(hipGetDevice(&saveDevice));
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipSetDevice(saveDevice));
|
|
}
|
|
|
|
|
|
// Sets globals g_currentDevice, g_peerDevice
|
|
void setupPeerTests() {
|
|
int deviceCnt;
|
|
|
|
HIPCHECK(hipGetDeviceCount(&deviceCnt));
|
|
|
|
g_currentDevice = p_gpuDevice;
|
|
g_peerDevice = (p_peerDevice == -1) ? ((g_currentDevice + 1) % deviceCnt) : p_peerDevice;
|
|
|
|
printf("N=%zu device=%d peerDevice=%d (%d devices total)\n", N, g_currentDevice, g_peerDevice,
|
|
deviceCnt);
|
|
|
|
// Must be on a multi-gpu system:
|
|
assert(g_currentDevice != g_peerDevice);
|
|
|
|
int canAccessPeer;
|
|
HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_currentDevice, g_peerDevice));
|
|
printf("dev#%d canAccessPeer:#%d=%d\n", g_currentDevice, g_peerDevice, canAccessPeer);
|
|
|
|
assert(canAccessPeer);
|
|
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceReset());
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipDeviceReset());
|
|
}
|
|
|
|
//---
|
|
// Test which enables peer2peer first, then allocates the memory.
|
|
void enablePeerFirst(bool useAsyncCopy) {
|
|
printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy);
|
|
|
|
setupPeerTests();
|
|
|
|
// Always enable g_currentDevice to see the allocations on peerDevice.
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0));
|
|
|
|
if (p_mirrorPeers) {
|
|
// Mirror peers allows the peer device to see the allocations on currentDevice.
|
|
int canAccessPeer;
|
|
HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice));
|
|
assert(canAccessPeer);
|
|
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0));
|
|
}
|
|
|
|
size_t Nbytes = N * sizeof(char);
|
|
|
|
char *A_d0, *A_d1;
|
|
char* A_h;
|
|
|
|
A_h = (char*)malloc(Nbytes);
|
|
|
|
// allocate and initialize memory on device0
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipMalloc(&A_d0, Nbytes));
|
|
HIPCHECK(hipMemset(A_d0, memsetval, Nbytes));
|
|
|
|
// allocate and initialize memory on peer device
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipMalloc(&A_d1, Nbytes));
|
|
HIPCHECK(hipMemset(A_d1, 0x13, Nbytes));
|
|
|
|
|
|
// Device0 push to device1, using P2P:
|
|
// NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for
|
|
// A_d1 and we need to use a
|
|
// a host staging copy for the P2P access.
|
|
HIPCHECK(hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice));
|
|
HIPCHECK(myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault, 0 /*stream*/,
|
|
useAsyncCopy)); // This is P2P copy.
|
|
|
|
// Copy data back to host:
|
|
// Have to wait for previous operation to finish, since we are switching to another one:
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy));
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
|
|
// Check host data:
|
|
for (int i = 0; i < N; i++) {
|
|
if (A_h[i] != memsetval) {
|
|
failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i,
|
|
(int)A_h[i], (int)memsetval);
|
|
}
|
|
}
|
|
|
|
printf("==done: %s useAsyncCopy:%d\n\n", __func__, useAsyncCopy);
|
|
}
|
|
|
|
|
|
//---
|
|
// Test which allocated memory first, then enables peer2peer.
|
|
// Enabling peer needs to scan all allocated memory and enable peer access.
|
|
void allocMemoryFirst(bool useAsyncCopy) {
|
|
printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy);
|
|
|
|
setupPeerTests();
|
|
|
|
size_t Nbytes = N * sizeof(char);
|
|
|
|
char *A_d0, *A_d1;
|
|
char* A_h;
|
|
|
|
A_h = (char*)malloc(Nbytes);
|
|
|
|
//---
|
|
// allocate and initialize memory on device0
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipMalloc(&A_d0, Nbytes));
|
|
HIPCHECK(hipMemset(A_d0, memsetval, Nbytes));
|
|
|
|
// allocate and initialize memory on peer device
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipMalloc(&A_d1, Nbytes));
|
|
HIPCHECK(hipMemset(A_d1, 0x13, Nbytes));
|
|
|
|
|
|
//---
|
|
// Enable peer access, for memory already allocated:
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0));
|
|
|
|
if (p_mirrorPeers) {
|
|
int canAccessPeer;
|
|
HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice));
|
|
assert(canAccessPeer);
|
|
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0));
|
|
}
|
|
|
|
|
|
//---
|
|
// Copies to test functionality:
|
|
// Device0 push to device1, using P2P:
|
|
HIPCHECK(hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice));
|
|
HIPCHECK(myHipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault, 0 /*stream*/, useAsyncCopy));
|
|
|
|
syncBothDevices(); // TODO - remove me, should handle this in implementation.
|
|
|
|
// Copy data back to host:
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy));
|
|
|
|
syncBothDevices(); // TODO - remove me, should handle this in implementation.
|
|
|
|
|
|
//---
|
|
// Check host data:
|
|
for (int i = 0; i < N; i++) {
|
|
if (A_h[i] != memsetval) {
|
|
failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i,
|
|
(int)A_h[i], (int)memsetval);
|
|
}
|
|
}
|
|
printf("==done: %s useAsyncCopy=%d\n\n", __func__, useAsyncCopy);
|
|
}
|
|
|
|
|
|
//---
|
|
// Test which tests peer H2D copy - ie: copy-engine=1, dst=1, src=0 (Host)
|
|
// A_d0 is pinned host on dev0 (this)
|
|
// A_d1 is device memory on dev1 (peer)
|
|
//
|
|
void testPeerHostToDevice(bool useAsyncCopy) {
|
|
printf("\n==testing: %s useAsyncCopy=%d\n", __func__, useAsyncCopy);
|
|
|
|
setupPeerTests();
|
|
|
|
// Always enable g_currentDevice to see the allocations on peerDevice.
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0));
|
|
|
|
if (p_mirrorPeers) {
|
|
// Mirror peers allows the peer device to see the allocations on currentDevice.
|
|
int canAccessPeer;
|
|
HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice));
|
|
assert(canAccessPeer);
|
|
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_currentDevice, 0));
|
|
}
|
|
|
|
size_t Nbytes = N * sizeof(char);
|
|
|
|
char *A_host_d0, *A_d1;
|
|
char* A_h;
|
|
|
|
A_h = (char*)malloc(Nbytes);
|
|
|
|
// allocate and initialize memory on device0
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipHostMalloc(&A_host_d0, Nbytes));
|
|
HIPCHECK(hipMemset(A_host_d0, memsetval, Nbytes));
|
|
|
|
// allocate and initialize memory on peer device
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(hipMalloc(&A_d1, Nbytes));
|
|
HIPCHECK(hipMemset(A_d1, 0x13, Nbytes));
|
|
|
|
bool firstAsyncCopy = useAsyncCopy; /*TODO - should be useAsyncCopy*/
|
|
|
|
syncBothDevices();
|
|
|
|
|
|
// Device0 push to device1, using P2P:
|
|
// NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for
|
|
// A_d1 and we need to use a
|
|
// a host staging copy for the P2P access.
|
|
if (p_memcpyWithPeer) {
|
|
// p_memcpyWithPeer=1 case is HostToDevice.
|
|
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
|
|
// if p_mirrorPeers = 0, this should fall back to host (because peer can't see A_host_d0)
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(myHipMemcpy(A_d1, A_host_d0, Nbytes, hipMemcpyHostToDevice, 0 /*stream*/,
|
|
firstAsyncCopy)); // This is P2P copy.
|
|
} else {
|
|
// p_memcpyWithPeer=0 case is HostToDevice.
|
|
// if p_mirrorPeers = 1, this is accelerated copy over PCIe.
|
|
// if p_mirrorPeers = 0, this should fall back to host (because device0 can't see A_d1)
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(myHipMemcpy(A_d1, A_host_d0, Nbytes, hipMemcpyHostToDevice, 0 /*stream*/,
|
|
firstAsyncCopy)); // This is P2P copy.
|
|
}
|
|
|
|
syncBothDevices();
|
|
|
|
// Copy data back to host:
|
|
HIPCHECK(hipSetDevice(g_peerDevice));
|
|
HIPCHECK(myHipMemcpy(A_h, A_d1, Nbytes, hipMemcpyDeviceToHost, 0 /*stream*/, useAsyncCopy));
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceSynchronize());
|
|
|
|
// Check host data:
|
|
for (int i = 0; i < N; i++) {
|
|
if (A_h[i] != memsetval) {
|
|
failed("mismatch at index:%d computed:0x%02x, golden memsetval:0x%02x\n", i,
|
|
(int)A_h[i], (int)memsetval);
|
|
}
|
|
}
|
|
|
|
printf("==done: %s useAsyncCopy:%d\n\n", __func__, useAsyncCopy);
|
|
}
|
|
|
|
|
|
void simpleNegative() {
|
|
printf("\n==testing: %s\n", __func__);
|
|
|
|
setupPeerTests();
|
|
|
|
int deviceId;
|
|
HIPCHECK(hipGetDevice(&deviceId));
|
|
|
|
//---
|
|
//-- self is not a peer
|
|
int canAccessPeer;
|
|
hipError_t e = hipDeviceCanAccessPeer(&canAccessPeer, deviceId, deviceId);
|
|
HIPASSERT(e == hipSuccess); // no error returned, it doesn't hurt to ask.
|
|
HIPASSERT(canAccessPeer == 0); // but self is not a peer.
|
|
|
|
e = hipSuccess;
|
|
//---
|
|
// Enable same device twice in a row:
|
|
HIPCHECK(hipSetDevice(g_currentDevice));
|
|
HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0));
|
|
e = (hipDeviceEnablePeerAccess(g_peerDevice, 0));
|
|
HIPASSERT(e == hipErrorPeerAccessAlreadyEnabled);
|
|
|
|
//---
|
|
// try disabling twice in a row
|
|
HIPCHECK(hipDeviceDisablePeerAccess(g_peerDevice));
|
|
e = (hipDeviceDisablePeerAccess(g_peerDevice));
|
|
HIPASSERT(e == hipErrorPeerAccessNotEnabled);
|
|
|
|
|
|
// More tests here:
|
|
printf("==done: %s\n\n", __func__);
|
|
}
|
|
|
|
|
|
int main(int argc, char* argv[]) {
|
|
parseMyArguments(argc, argv);
|
|
int gpuCount;
|
|
HIPCHECK(hipGetDeviceCount(&gpuCount));
|
|
|
|
if (gpuCount < 2) {
|
|
printf("P2P application requires atleast 2 gpu devices\n");
|
|
} else {
|
|
if (p_tests & 0x100) {
|
|
testPeerHostToDevice(false /*useAsyncCopy*/);
|
|
}
|
|
testPeerHostToDevice(true /*useAsyncCopy*/);
|
|
|
|
if (p_tests & 0x1) {
|
|
enablePeerFirst(false /*useAsyncCopy*/);
|
|
}
|
|
|
|
if (p_tests & 0x2) {
|
|
allocMemoryFirst(false /*useAsyncCopy*/);
|
|
}
|
|
|
|
if (p_tests & 0x4) {
|
|
simpleNegative();
|
|
}
|
|
|
|
if (p_tests & 0x8) {
|
|
enablePeerFirst(true /*useAsyncCopy*/);
|
|
}
|
|
if (p_tests & 0x10) {
|
|
allocMemoryFirst(true /*useAsyncCopy*/);
|
|
}
|
|
}
|
|
passed();
|
|
}
|