diff --git a/hipamd/CMakeLists.txt b/hipamd/CMakeLists.txt index 8958eaa090..59d3507c20 100644 --- a/hipamd/CMakeLists.txt +++ b/hipamd/CMakeLists.txt @@ -142,6 +142,16 @@ if(NOT DEFINED COMPILE_HIP_ATP_MARKER) endif() add_to_config(_buildInfo COMPILE_HIP_ATP_MARKER) +# Check if we need to force finegrained system memory allocation +if(NOT DEFINED HIP_COHERENT_HOST_ALLOC) + if(NOT DEFINED ENV{HIP_COHERENT_HOST_ALLOC}) + set(HIP_COHERENT_HOST_ALLOC 0) + else() + set(HIP_COHERENT_HOST_ALLOC $ENV{HIP_COHERENT_HOST_ALLOC}) + endif() +endif() +add_to_config(_buildInfo HIP_COHERENT_HOST_ALLOC) + ############################# # Build steps ############################# diff --git a/hipamd/src/hip_hcc.h b/hipamd/src/hip_hcc.h index 4ebf002a58..f18d68473d 100644 --- a/hipamd/src/hip_hcc.h +++ b/hipamd/src/hip_hcc.h @@ -75,7 +75,7 @@ private: int _shortTid; // monotonically increasing API sequence number for this threa. - uint64_t _apiSeqNum; + uint64_t _apiSeqNum; }; struct ProfTrigger { @@ -155,6 +155,12 @@ extern const char *API_COLOR_END; #endif +// Compile code that force hipHostMalloc only allocates finegrained system memory. +#ifndef HIP_COHERENT_HOST_ALLOC +#define HIP_COHERENT_HOST_ALLOC 0 +#endif + + // Compile support for trace markers that are displayed on CodeXL GUI at start/stop of each function boundary. // TODO - currently we print the trace message at the beginning. if we waited, we could also include return codes, and any values returned @@ -169,8 +175,8 @@ extern const char *API_COLOR_END; // Swallow scoped markers: #define MARKER_BEGIN(markerName,group) #define MARKER_END() -#define RESUME_PROFILING -#define STOP_PROFILING +#define RESUME_PROFILING +#define STOP_PROFILING #endif @@ -246,7 +252,7 @@ static const DbName dbName [] = {KRED, "signal"}, }; - + #if COMPILE_HIP_DB #define tprintf(trace_level, ...) {\ @@ -467,7 +473,7 @@ public: void launchModuleKernel(hc::accelerator_view av, hsa_signal_t signal, uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ, uint32_t gridDimX, uint32_t gridDimY, uint32_t gridDimZ, - uint32_t groupSegmentSize, uint32_t sharedMemBytes, + uint32_t groupSegmentSize, uint32_t sharedMemBytes, void *kernarg, size_t kernSize, uint64_t kernel); @@ -490,7 +496,7 @@ private: // The unsigned return is hipMemcpyKind unsigned resolveMemcpyDirection(bool srcInDeviceMem, bool dstInDeviceMem); - void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, + void resolveHcMemcpyDirection(unsigned hipMemKind, const hc::AmPointerInfo *dstPtrInfo, const hc::AmPointerInfo *srcPtrInfo, hc::hcCommandKind *hcCopyDir, bool *forceHostCopyEngine); bool canSeePeerMemory(const ihipCtx_t *thisCtx, const hc::AmPointerInfo *dstInfo, const hc::AmPointerInfo *srcInfo); @@ -662,7 +668,7 @@ public: // Functions: // TODO - review uses of getWriteableDevice(), can these be converted to getDevice() ihipDevice_t *getWriteableDevice() const { return _device; }; - std::string toString() const; + std::string toString() const; public: // Data // The NULL stream is used if no other stream is specified. @@ -751,7 +757,7 @@ inline std::ostream& operator<<(std::ostream& os, const hipEvent_t& e) inline std::ostream& operator<<(std::ostream& os, const ihipCtx_t* c) { - os << "ctx:" << static_cast (c) + os << "ctx:" << static_cast (c) << ".dev:" << c->getDevice()->_deviceId; return os; } diff --git a/hipamd/src/hip_memory.cpp b/hipamd/src/hip_memory.cpp index e59e6c261d..ee05c6b00a 100644 --- a/hipamd/src/hip_memory.cpp +++ b/hipamd/src/hip_memory.cpp @@ -126,7 +126,7 @@ hipError_t hipMalloc(void** ptr, size_t sizeBytes) int peerCnt=0; { LockedAccessor_CtxCrit_t crit(ctx->criticalData()); - // the peerCnt always stores self so make sure the trace actually + // the peerCnt always stores self so make sure the trace actually peerCnt = crit->peerCnt(); if (peerCnt > 1) { hsa_amd_agents_allow_access(crit->peerCnt(), crit->peerAgents(), NULL, *ptr); @@ -154,8 +154,8 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) if(ctx){ // am_alloc requires writeable __acc, perhaps could be refactored? auto device = ctx->getWriteableDevice(); - // If HIP_COHERENT_HOST_ALLOC is defined, we always alloc coherent host system memroy - #ifdef HIP_COHERENT_HOST_ALLOC + // If HIP_COHERENT_HOST_ALLOC is defined, we always alloc coherent host system memroy +#if HIP_COHERENT_HOST_ALLOC *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); if(sizeBytes < 1 && (*ptr == NULL)){ hip_status = hipErrorMemoryAllocation; @@ -163,7 +163,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) hc::am_memtracker_update(*ptr, device->_deviceId, amHostCoherent); } tprintf(DB_MEM, " %s: pinned ptr=%p\n", __func__, *ptr); - #else +#else if((flags == hipHostMallocDefault)|| (flags == hipHostMallocPortable)){ *ptr = hc::am_alloc(sizeBytes, device->_acc, amHostPinned); if (sizeBytes < 1 && (*ptr == NULL)) { @@ -189,7 +189,7 @@ hipError_t hipHostMalloc(void** ptr, size_t sizeBytes, unsigned int flags) tprintf(DB_MEM, "allocated pinned host ptr=%p on dev=%d, allow access to %d peer(s)\n", *ptr, device->_deviceId, peerCnt); } } - #endif //HIP_COHERENT_HOST_ALLOC +#endif //HIP_COHERENT_HOST_ALLOC } return ihipLogStatus(hip_status); }