Add debug for Peer APIs. Enable PeerMemcpy APIs by default.

Change-Id: I46e39a9e7b07686a78484c1f3b5495b08e052fbb


[ROCm/hip commit: 3f0a2b8dc1]
Этот коммит содержится в:
Ben Sander
2016-11-04 08:51:16 -05:00
родитель 411cf6c80a
Коммит 1e5540e07f
4 изменённых файлов: 78 добавлений и 52 удалений
+5 -1
Просмотреть файл
@@ -1278,7 +1278,11 @@ hipError_t hipDeviceEnablePeerAccess (int peerDeviceId, unsigned int flags);
hipError_t hipDeviceDisablePeerAccess (int peerDeviceId);
#ifdef PEER_NON_UNIFIED
#ifndef USE_PEER_NON_UNIFIED
#define USE_PEER_NON_UNIFIED 1
#endif
#if USE_PEER_NON_UNIFIED==1
/**
* @brief Copies memory from one device to memory on another device.
*
+29 -21
Просмотреть файл
@@ -73,6 +73,8 @@ int HIP_VISIBLE_DEVICES = 0; /* Contains a comma-separated sequence of GPU ident
int HIP_NUM_KERNELS_INFLIGHT = 128;
int HIP_WAIT_MODE = 0;
int HIP_FORCE_P2P_HOST = 0;
@@ -540,7 +542,7 @@ void ihipCtxCriticalBase_t<CtxMutex>::recomputePeerAgents()
template<>
bool ihipCtxCriticalBase_t<CtxMutex>::isPeer(const ihipCtx_t *peer)
bool ihipCtxCriticalBase_t<CtxMutex>::isPeerWatcher(const ihipCtx_t *peer)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
return (match != std::end(_peers));
@@ -548,12 +550,14 @@ bool ihipCtxCriticalBase_t<CtxMutex>::isPeer(const ihipCtx_t *peer)
template<>
bool ihipCtxCriticalBase_t<CtxMutex>::addPeer(ihipCtx_t *peer)
bool ihipCtxCriticalBase_t<CtxMutex>::addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peerWatcher)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
auto match = std::find(_peers.begin(), _peers.end(), peerWatcher);
if (match == std::end(_peers)) {
// Not already a peer, let's update the list:
_peers.push_back(peer);
tprintf(DB_COPY, "addPeerWatcher. Allocations on %s now visible to peerWatcher %s.\n",
thisCtx->toString().c_str(), peerWatcher->toString().c_str());
_peers.push_back(peerWatcher);
recomputePeerAgents();
return true;
}
@@ -564,12 +568,14 @@ bool ihipCtxCriticalBase_t<CtxMutex>::addPeer(ihipCtx_t *peer)
template<>
bool ihipCtxCriticalBase_t<CtxMutex>::removePeer(ihipCtx_t *peer)
bool ihipCtxCriticalBase_t<CtxMutex>::removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peerWatcher)
{
auto match = std::find(_peers.begin(), _peers.end(), peer);
auto match = std::find(_peers.begin(), _peers.end(), peerWatcher);
if (match != std::end(_peers)) {
// Found a valid peer, let's remove it.
_peers.remove(peer);
tprintf(DB_COPY, "removePeerWatcher. Allocations on %s no longer visible to former peerWatcher %s.\n",
thisCtx->toString().c_str(), peerWatcher->toString().c_str());
_peers.remove(peerWatcher);
recomputePeerAgents();
return true;
} else {
@@ -579,16 +585,17 @@ bool ihipCtxCriticalBase_t<CtxMutex>::removePeer(ihipCtx_t *peer)
template<>
void ihipCtxCriticalBase_t<CtxMutex>::resetPeers(ihipCtx_t *thisDevice)
void ihipCtxCriticalBase_t<CtxMutex>::resetPeerWatchers(ihipCtx_t *thisCtx)
{
tprintf(DB_COPY, "resetPeerWatchers for context=%s\n", thisCtx->toString().c_str());
_peers.clear();
_peerCnt = 0;
addPeer(thisDevice); // peer-list always contains self agent.
addPeerWatcher(thisCtx, thisCtx); // peer-list always contains self agent.
}
template<>
void ihipCtxCriticalBase_t<CtxMutex>::printPeers(FILE *f) const
void ihipCtxCriticalBase_t<CtxMutex>::printPeerWatchers(FILE *f) const
{
for (auto iter = _peers.begin(); iter!=_peers.end(); iter++) {
fprintf (f, "%s ", (*iter)->toString().c_str());
@@ -993,7 +1000,7 @@ void ihipCtx_t::locked_reset()
// Reset peer list to just me:
crit->resetPeers(this);
crit->resetPeerWatchers(this);
// Reset and release all memory stored in the tracker:
// Reset will remove peer mapping so don't need to do this explicitly.
@@ -1360,7 +1367,7 @@ void ihipInit()
READ_ENV_I(release, HIP_WAIT_MODE, 0, "Force synchronization mode. 1= force yield, 2=force spin, 0=defaults specified in application");
READ_ENV_I(release, HIP_FORCE_P2P_HOST, 0, "Force use of host/staging copy for peer-to-peer copiecopies");
READ_ENV_I(release, HIP_NUM_KERNELS_INFLIGHT, 128, "Max number of inflight kernels per stream before active synchronization is forced.");
// Some flags have both compile-time and runtime flags - generate a warning if user enables the runtime flag but the compile-time flag is disabled.
@@ -1726,14 +1733,14 @@ void ihipSetTs(hipEvent_t e)
// So we check dstCtx's and srcCtx's peerList to see if the both include thisCtx.
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx)
{
tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
tprintf (DB_COPY, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str());
// Use blocks to control scope of critical sections.
{
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
tprintf(DB_SYNC, "dstCrit lock succeeded\n");
if (!ctxCrit->isPeer(thisCtx)) {
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
}
@@ -1741,7 +1748,7 @@ bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx,
{
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
tprintf(DB_SYNC, "srcCrit lock succeeded\n");
if (!ctxCrit->isPeer(thisCtx)) {
if (!ctxCrit->isPeerWatcher(thisCtx)) {
return false;
};
}
@@ -1832,13 +1839,13 @@ void ihipStream_t::locked_copySync(void* dst, const void* src, size_t sizeBytes,
if (hcCopyDir == hc::hcMemcpyDeviceToDevice) {
if (!canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId))) {
forceHostCopyEngine = true;
tprintf (DB_COPY1, "Forcing use of host copy engine.\n");
tprintf (DB_COPY, "Forcing use of host copy engine.\n");
} else {
tprintf (DB_COPY1, "Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str());
tprintf (DB_COPY, "Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str());
}
};
tprintf (DB_COPY1, "locked_copy dir=%s dst=%p src=%p sz=%zu\n", memcpyStr(kind), src, dst, sizeBytes);
tprintf (DB_COPY, "locked_copy dir=%s dst=%p src=%p sz=%zu\n", memcpyStr(kind), src, dst, sizeBytes);
{
LockedAccessor_StreamCrit_t crit (_criticalData);
@@ -1859,12 +1866,12 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
const ihipCtx_t *ctx = this->getCtx();
if ((ctx == nullptr) || (ctx->getDevice() == nullptr)) {
tprintf (DB_COPY1, "locked_copyAsync bad ctx or device\n");
tprintf (DB_COPY, "locked_copyAsync bad ctx or device\n");
throw ihipException(hipErrorInvalidDevice);
}
if (kind == hipMemcpyHostToHost) {
tprintf (DB_COPY1, "locked_copyAsync: H2H with memcpy");
tprintf (DB_COPY, "locked_copyAsync: H2H with memcpy");
// TODO - consider if we want to perhaps use the GPU SDMA engines anyway, to avoid the host-side sync here and keep everything flowing on the GPU.
/* As this is a CPU op, we need to wait until all
@@ -1890,7 +1897,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
copyEngineCanSeeSrcAndDest = canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId));
}
tprintf (DB_COPY1, "locked_copyAsync: async memcpy dstTracked=%d srcTracked=%d copyEngineCanSeeSrcAndDest=%d\n",
tprintf (DB_COPY, "locked_copyAsync: async memcpy dstTracked=%d srcTracked=%d copyEngineCanSeeSrcAndDest=%d\n",
dstTracked, srcTracked, copyEngineCanSeeSrcAndDest);
@@ -1915,6 +1922,7 @@ void ihipStream_t::locked_copyAsync(void* dst, const void* src, size_t sizeBytes
} else {
// TODO - call copy_ext directly here?
locked_copySync(dst, src, sizeBytes, kind);
//crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
}
}
}
+9 -10
Просмотреть файл
@@ -43,6 +43,7 @@ THE SOFTWARE.
//static const int debug = 0;
extern const int release;
// TODO - this blocks both kernels and memory ops. Perhaps should have separate env var for kernels?
extern int HIP_LAUNCH_BLOCKING;
extern int HIP_PRINT_ENV;
@@ -225,9 +226,8 @@ extern void recordApiTrace(std::string *fullStr, const std::string &apiStr);
#define DB_API 0 /* 0x01 - shortcut to enable HIP_TRACE_API on single switch */
#define DB_SYNC 1 /* 0x02 - trace synchronization pieces */
#define DB_MEM 2 /* 0x04 - trace memory allocation / deallocation */
#define DB_COPY1 3 /* 0x08 - trace memory copy commands. . */
#define DB_COPY 3 /* 0x08 - trace memory copy and peer commands. . */
#define DB_SIGNAL 4 /* 0x10 - trace signal pool commands */
#define DB_COPY2 5 /* 0x20 - trace memory copy commands. Detailed. */
#define DB_MAX_FLAG 5
// When adding a new debug flag, also add to the char name table below.
//
@@ -242,9 +242,8 @@ static const DbName dbName [] =
{KGRN, "api"}, // not used,
{KYEL, "sync"},
{KCYN, "mem"},
{KMAG, "copy1"},
{KMAG, "copy"},
{KRED, "signal"},
{KNRM, "copy2"},
};
@@ -596,11 +595,11 @@ public:
// Peer Accessor classes:
bool isPeer(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
bool addPeer(ihipCtx_t *peer);
bool removePeer(ihipCtx_t *peer);
void resetPeers(ihipCtx_t *thisDevice);
void printPeers(FILE *f) const;
bool isPeerWatcher(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
bool addPeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
bool removePeerWatcher(const ihipCtx_t *thisCtx, ihipCtx_t *peer);
void resetPeerWatchers(ihipCtx_t *thisDevice);
void printPeerWatchers(FILE *f) const;
uint32_t peerCnt() const { return _peerCnt; };
hsa_agent_t *peerAgents() const { return _peerAgents; };
@@ -750,7 +749,7 @@ inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
{
os << "ctx:" << static_cast<const void*> (c)
<< " dev:" << c->getDevice()->_deviceId;
<< ".dev:" << c->getDevice()->_deviceId;
return os;
}
+35 -20
Просмотреть файл
@@ -35,6 +35,33 @@ THE SOFTWARE.
// public APIs are thin wrappers which call into this internal implementations.
// TODO - actually not yet - currently the integer deviceId flavors just call the context APIs. need to fix.
hipError_t ihipDeviceCanAccessPeer (int* canAccessPeer, hipCtx_t thisCtx, hipCtx_t peerCtx)
{
hipError_t err = hipSuccess;
if ((thisCtx != NULL) && (peerCtx != NULL)) {
if (thisCtx == peerCtx) {
*canAccessPeer = 0;
tprintf(DB_COPY, "Can't be peer to self. (this=%s, peer=%s)\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
} else {
*canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
tprintf(DB_COPY, "deviceCanAccessPeer this=%s peer=%s canAccessPeer=%d\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str(), *canAccessPeer);
}
} else {
*canAccessPeer = 0;
err = hipErrorInvalidDevice;
}
return err;
}
/**
* HCC returns 0 in *canAccessPeer ; Need to update this function when RT supports P2P
*/
@@ -43,23 +70,7 @@ hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, hipCtx_t thisCtx, hipCtx_
{
HIP_INIT_API(canAccessPeer, thisCtx, peerCtx);
hipError_t err = hipSuccess;
if ((thisCtx != NULL) && (peerCtx != NULL)) {
if (thisCtx == peerCtx) {
*canAccessPeer = 0;
} else {
*canAccessPeer = peerCtx->getDevice()->_acc.get_is_peer(thisCtx->getDevice()->_acc);
}
} else {
*canAccessPeer = 0;
err = hipErrorInvalidDevice;
}
return ihipLogStatus(err);
return ihipLogStatus(ihipDeviceCanAccessPeer(canAccessPeer, thisCtx, peerCtx));
}
@@ -80,8 +91,10 @@ hipError_t ihipDisablePeerAccess (hipCtx_t peerCtx)
err = hipErrorInvalidDevice; // Can't disable peer access to self.
} else {
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
bool changed = peerCrit->removePeer(thisCtx);
bool changed = peerCrit->removePeerWatcher(peerCtx, thisCtx);
if (changed) {
tprintf(DB_COPY, "device %s disable access to memory allocated on peer:%s\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
// Update the peers for all memory already saved in the tracker:
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(), peerCrit->peerAgents());
} else {
@@ -112,8 +125,10 @@ hipError_t ihipEnablePeerAccess (hipCtx_t peerCtx, unsigned int flags)
} else if ((thisCtx != NULL) && (peerCtx != NULL)) {
LockedAccessor_CtxCrit_t peerCrit(peerCtx->criticalData());
// Add thisCtx to peerCtx's access list so that new allocations on peer will be made visible to this device:
bool isNewPeer = peerCrit->addPeer(thisCtx);
bool isNewPeer = peerCrit->addPeerWatcher(peerCtx, thisCtx);
if (isNewPeer) {
tprintf(DB_COPY, "device=%s can now see all memory allocated on peer=%s\n",
thisCtx->toString().c_str(), peerCtx->toString().c_str());
am_memtracker_update_peers(peerCtx->getDevice()->_acc, peerCrit->peerCnt(), peerCrit->peerAgents());
} else {
err = hipErrorPeerAccessAlreadyEnabled;
@@ -158,7 +173,7 @@ hipError_t hipMemcpyPeerAsync (void* dst, hipCtx_t dstDevice, const void* src, h
hipError_t hipDeviceCanAccessPeer (int* canAccessPeer, int deviceId, int peerDeviceId)
{
HIP_INIT_API(canAccessPeer, deviceId, peerDeviceId);
return hipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId));
return ihipDeviceCanAccessPeer(canAccessPeer, ihipGetPrimaryCtx(deviceId), ihipGetPrimaryCtx(peerDeviceId));
}