- Bug fix for peer visibility.  Now contexts correctly detect when they can use SDMA for P2P vs staging buffers.
- Interface to new HCC copy_ext function.
- Improve context and peer print /debug options.
- Add comments and usage to hipPeerToPeer_simple test.
Этот коммит содержится в:
Ben Sander
2016-09-22 10:39:17 -05:00
родитель a397e60ba9
Коммит 944d4eaa18
4 изменённых файлов: 126 добавлений и 12 удалений
+14 -1
Просмотреть файл
@@ -416,6 +416,8 @@ private:
// The unsigned return is hipMemcpyKind
unsigned resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem);
bool canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx);
private: // Data
// Critical Data - MUST be accessed through LockedAccessor_StreamCrit_t
@@ -516,10 +518,11 @@ public:
// Peer Accessor classes:
bool isPeer(const ihipCtx_t *peer); // returns Trus if peer has access to memory physically located on this device.
bool isPeer(const ihipCtx_t *peer); // returns True if peer has access to memory physically located on this device.
bool addPeer(ihipCtx_t *peer);
bool removePeer(ihipCtx_t *peer);
void resetPeers(ihipCtx_t *thisDevice);
void printPeers(FILE *f) const;
uint32_t peerCnt() const { return _peerCnt; };
hsa_agent_t *peerAgents() const { return _peerAgents; };
@@ -535,6 +538,7 @@ private:
//--- Peer Tracker:
// These reflect the currently Enabled set of peers for this GPU:
// Enabled peers have permissions to access the memory physically allocated on this device.
// Note the peers always contain the self agent for easy interfacing with HSA APIs.
std::list<ihipCtx_t*> _peers; // list of enabled peer devices.
uint32_t _peerCnt; // number of enabled peers
hsa_agent_t *_peerAgents; // efficient packed array of enabled agents (to use for allocations.)
@@ -578,6 +582,8 @@ public: // Functions:
// TODO - review uses of getWriteableDevice(), can these be converted to getDevice()
ihipDevice_t *getWriteableDevice() const { return _device; };
std::string toString() const;
public: // Data
// The NULL stream is used if no other stream is specified.
// Default stream has special synchronization properties with other streams.
@@ -663,5 +669,12 @@ inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e)
return os;
}
inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c)
{
os << "ctx:" << static_cast<const void*> (c)
<< " dev:" << c->getDevice()->_deviceId;
return os;
}
#endif
+94 -9
Просмотреть файл
@@ -381,6 +381,18 @@ void ihipCtxCriticalBase_t<CtxMutex>::resetPeers(ihipCtx_t *thisDevice)
}
template<>
void ihipCtxCriticalBase_t<CtxMutex>::printPeers(FILE *f) const
{
for (auto iter = _peers.begin(); iter!=_peers.end(); iter++) {
fprintf (f, "%s ", (*iter)->toString().c_str());
};
}
template<>
void ihipCtxCriticalBase_t<CtxMutex>::addStream(ihipStream_t *stream)
{
@@ -784,6 +796,13 @@ void ihipCtx_t::locked_reset()
};
//---
std::string ihipCtx_t::toString() const
{
std::ostringstream ss;
ss << this;
return ss.str();
};
//----
@@ -1328,8 +1347,36 @@ void ihipSetTs(hipEvent_t e)
}
// Returns true if thisCtx can see the memory allocated on dstCtx and srcCtx.
// The peer-list for a context controls which contexts have access to the memory allocated on that context.
// So we check dstCtx's and srcCtx's peerList to see if the booth include thisCtx.
bool ihipStream_t::canSeePeerMemory(const ihipCtx_t *thisCtx, ihipCtx_t *dstCtx, ihipCtx_t *srcCtx)
{
tprintf (DB_COPY1, "Checking if direct copy can be used. thisCtx:%s; dstCtx:%s ; srcCtx:%s\n",
thisCtx->toString().c_str(), dstCtx->toString().c_str(), srcCtx->toString().c_str());
// Use blocks to control scope of critical sections.
{
LockedAccessor_CtxCrit_t ctxCrit(dstCtx->criticalData());
if (!ctxCrit->isPeer(thisCtx)) {
return false;
};
}
{
LockedAccessor_CtxCrit_t ctxCrit(srcCtx->criticalData());
if (!ctxCrit->isPeer(thisCtx)) {
return false;
};
}
return true;
};
// Resolve hipMemcpyDefault to a known type.
// TODO - review why is this so complicated, does this need srcTracked and dstTracked?
unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked, bool srcInDeviceMem, bool dstInDeviceMem)
{
hipMemcpyKind kind = hipMemcpyDefault;
@@ -1358,6 +1405,7 @@ unsigned ihipStream_t::resolveMemcpyDirection(bool srcTracked, bool dstTracked,
}
// TODO - remove kind parm from here or use it below?
void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const void* src, size_t sizeBytes, unsigned kind, bool resolveOn)
{
ihipCtx_t *ctx = this->getCtx();
@@ -1367,7 +1415,38 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
throw ihipException(hipErrorInvalidDevice);
}
crit->_av.copy(src, dst, sizeBytes);
hc::accelerator acc;
hc::AmPointerInfo dstPtrInfo(NULL, NULL, 0, acc, 0, 0);
hc::AmPointerInfo srcPtrInfo(NULL, NULL, 0, acc, 0, 0);
bool dstTracked = (hc::am_memtracker_getinfo(&dstPtrInfo, dst) == AM_SUCCESS);
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
if (kind == hipMemcpyDefault) {
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
}
hc::hcCommandKind hcCopyDir;
switch (kind) {
case hipMemcpyHostToHost: hcCopyDir = hc::hcMemcpyHostToHost; break;
case hipMemcpyHostToDevice: hcCopyDir = hc::hcMemcpyHostToDevice; break;
case hipMemcpyDeviceToHost: hcCopyDir = hc::hcMemcpyDeviceToHost; break;
case hipMemcpyDeviceToDevice: hcCopyDir = hc::hcMemcpyDeviceToDevice; break;
};
// If this is P2P accessi, we need to check to see if the copy agent (specified by the stream where the copy is enqueued)
// has peer access enabled to both the source and dest. If this is true, then the copy agent can see both pointers
// and we can perform the access with the copy engine from the current stream. If not true, then we will copy through the host. (forceHostCopyEngine=true).
bool forceHostCopyEngine = false;
if (hcCopyDir == hc::hcMemcpyDeviceToDevice) {
if (!canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId))) {
forceHostCopyEngine = true;
tprintf (DB_COPY1, "Forcing use of host copy engine.\n");
} else {
tprintf (DB_COPY1, "Will use SDMA engine on streamDevice=%s.\n", ctx->toString().c_str());
}
};
crit->_av.copy_ext(src, dst, sizeBytes, hcCopyDir, srcPtrInfo, dstPtrInfo, forceHostCopyEngine);
}
@@ -1410,16 +1489,23 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
bool srcTracked = (hc::am_memtracker_getinfo(&srcPtrInfo, src) == AM_SUCCESS);
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
if (!dstTracked || !srcTracked) {
trueAsync = false;
bool copyEngineCanSeeSrcAndDest = true;
if (kind == hipMemcpyDeviceToDevice) {
copyEngineCanSeeSrcAndDest = canSeePeerMemory(ctx, ihipGetPrimaryCtx(dstPtrInfo._appId), ihipGetPrimaryCtx(srcPtrInfo._appId));
}
// "tracked" really indicates if the pointer's virtual address is available in the GPU address space.
// If both pointers are not tracked, we need to fall back to a sync copy.
if (!dstTracked || !srcTracked || !copyEngineCanSeeSrcAndDest) {
trueAsync = false;
}
if (trueAsync == true) {
// Perform a syncrhonous copy:
// Perform a synchronous copy:
try {
crit->_av.copy_async(src, dst, sizeBytes);
} catch (Kalmar::runtime_exception) {
@@ -1432,10 +1518,8 @@ void ihipStream_t::copyAsync(void* dst, const void* src, size_t sizeBytes, unsig
this->wait(crit);
}
} else {
// Perform a syncrhonous copy:
// Perform a synchronous copy:
if (kind == hipMemcpyDefault) {
bool srcInDeviceMem = (srcTracked && srcPtrInfo._isInDeviceMem);
bool dstInDeviceMem = (dstTracked && dstPtrInfo._isInDeviceMem);
kind = resolveMemcpyDirection(srcTracked, dstTracked, srcPtrInfo._isInDeviceMem, dstPtrInfo._isInDeviceMem);
}
copySync(crit, dst, src, sizeBytes, kind);
@@ -1481,3 +1565,4 @@ hipError_t hipHccGetAcceleratorView(hipStream_t stream, hc::accelerator_view **a
}
//// TODO - add identifier numbers for streams and devices to help with debugging.
//TODO - add a contect sequence number for debug. Print operator<< ctx:0.1 (device.ctx)
+1 -1
Просмотреть файл
@@ -104,7 +104,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes)
if (sizeBytes == 0)
{
*ptr = NULL;
return ihipLogStatus(hip_status);
return ihipLogStatus(hipSuccess);
}
auto ctx = ihipGetTlsDefaultCtx();
+17 -1
Просмотреть файл
@@ -33,6 +33,15 @@ int p_peerDevice = -1; // explicly specify which peer to use, else use p_gpuDe
int g_currentDevice;
int g_peerDevice;
void help(char *argv[])
{
printf ("usage: %s [OPTIONS]\n", argv[0]);
printf (" --memcpyWithPeer : Perform memcpy with peer.\n");
printf (" --mirrorPeersi : Mirror memory onto both default device and peerdevice. If 0, memory is mapped only on the default device.\n");
printf (" --peerDevice N : Set peer device.\n");
};
void parseMyArguments(int argc, char *argv[])
{
int more_argc = HipTest::parseStandardArguments(argc, argv, false);
@@ -40,7 +49,10 @@ void parseMyArguments(int argc, char *argv[])
for (int i = 1; i < more_argc; i++) {
const char *arg = argv[i];
if (!strcmp(arg, "--memcpyWithPeer")) {
if (!strcmp(arg, "--help")) {
help(argv);
exit(-1);
} else if (!strcmp(arg, "--memcpyWithPeer")) {
p_memcpyWithPeer = true;
} else if (!strcmp(arg, "--mirrorPeers")) {
p_mirrorPeers = true;
@@ -90,10 +102,12 @@ void enablePeerFirst()
setupPeerTests();
// Always enable g_currentDevice to see the allocations on peerDevice.
HIPCHECK(hipSetDevice(g_currentDevice));
HIPCHECK(hipDeviceEnablePeerAccess(g_peerDevice, 0));
if (p_mirrorPeers) {
// Mirror peers allows the peer device to see the allocations on currentDevice.
int canAccessPeer;
HIPCHECK(hipDeviceCanAccessPeer(&canAccessPeer, g_peerDevice, g_currentDevice));
assert(canAccessPeer);
@@ -122,6 +136,8 @@ void enablePeerFirst()
// Device0 push to device1, using P2P:
// NOTE : if p_mirrorPeers=0 and p_memcpyWithPeer=1, then peer device does not have mapping for A_d1 and we need to use a
// a host staging copy for the P2P access.
HIPCHECK (hipSetDevice(p_memcpyWithPeer ? g_peerDevice : g_currentDevice));
HIPCHECK (hipMemcpy(A_d1, A_d0, Nbytes, hipMemcpyDefault)); // This is P2P copy.