From f7987aaa33ef0ca44f78ff4d55a4dafb078299b2 Mon Sep 17 00:00:00 2001 From: Ben Sander Date: Sun, 11 Sep 2016 06:50:20 -0500 Subject: [PATCH] Move isLargeBar to UnpinnedCopyEngine constructor. Change-Id: I7a7d3a40b1d4e0c6ec856658a6a70e5e70d287ce [ROCm/hip commit: 442d74f027b6809c106ee3b4f5ae7905c46983ce] --- .../include/hcc_detail/unpinned_copy_engine.h | 12 ++++++++---- projects/hip/src/hip_hcc.cpp | 16 ++++++++++++---- projects/hip/src/unpinned_copy_engine.cpp | 15 +++++++++------ 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/projects/hip/include/hcc_detail/unpinned_copy_engine.h b/projects/hip/include/hcc_detail/unpinned_copy_engine.h index 653beb89ee..678d714981 100644 --- a/projects/hip/include/hcc_detail/unpinned_copy_engine.h +++ b/projects/hip/include/hcc_detail/unpinned_copy_engine.h @@ -21,7 +21,7 @@ THE SOFTWARE. #ifndef STAGING_BUFFER_H #define STAGING_BUFFER_H -#include "hsa.h" +#include "hsa/hsa.h" //------------------------------------------------------------------------------------------------- @@ -43,18 +43,19 @@ struct UnpinnedCopyEngine { static const int _max_buffers = 4; - UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,int thresholdH2D_directStaging,int thresholdH2D_stagingPinInPlace,int thresholdD2H) ; + UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, + bool isLargeBar, int thresholdH2D_directStaging, int thresholdH2D_stagingPinInPlace, int thresholdD2H) ; ~UnpinnedCopyEngine(); // Use hueristic to choose best copy algorithm - void CopyHostToDevice(CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + void CopyHostToDevice(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); void CopyDeviceToHost(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); // Specific H2D copy algorithm implementations: void CopyHostToDeviceStaging(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); void CopyHostToDevicePinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); - void CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); + void CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor); // Specific D2H copy algorithm implementations: @@ -72,6 +73,9 @@ private: size_t _bufferSize; // Size of the buffers. int _numBuffers; + // True if system supports large-bar and thus can benefit from CPU directly performing copy operation. + bool _isLargeBar; + char *_pinnedStagingBuffer[_max_buffers]; hsa_signal_t _completionSignal[_max_buffers]; hsa_signal_t _completionSignal2[_max_buffers]; // P2P needs another set of signals. diff --git a/projects/hip/src/hip_hcc.cpp b/projects/hip/src/hip_hcc.cpp index 78285cefa0..294f8d8e89 100644 --- a/projects/hip/src/hip_hcc.cpp +++ b/projects/hip/src/hip_hcc.cpp @@ -675,8 +675,16 @@ ihipDevice_t::ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerato initProperties(&_props); - _stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD); - _stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD); + _stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS, + _isLargeBar, + HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING, + HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE, + HIP_D2H_MEM_TRANSFER_THRESHOLD); + _stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS, + _isLargeBar, + HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING, + HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE, + HIP_D2H_MEM_TRANSFER_THRESHOLD); _primaryCtx = new ihipCtx_t(this, deviceCnt, hipDeviceMapHost); } @@ -925,7 +933,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop) FindDevicePool(); int access=checkAccess(g_cpu_agent, gpu_pool_); - if (0!= access){ + if (0 != access){ _isLargeBar= 1; } else { _isLargeBar=0; @@ -1745,7 +1753,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const } else if (HIP_PININPLACE) { copyMode = UnpinnedCopyEngine::UsePinInPlace; } - device->_stagingBuffer[0]->CopyHostToDevice(copyMode, device->_isLargeBar, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL); + device->_stagingBuffer[0]->CopyHostToDevice(copyMode, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL); // The copy waits for inputs and then completes before returning so can reset queue to empty: this->wait(crit, true); } diff --git a/projects/hip/src/unpinned_copy_engine.cpp b/projects/hip/src/unpinned_copy_engine.cpp index f446220e7a..abeb5910e4 100644 --- a/projects/hip/src/unpinned_copy_engine.cpp +++ b/projects/hip/src/unpinned_copy_engine.cpp @@ -19,7 +19,7 @@ THE SOFTWARE. #include -#include "hsa_ext_amd.h" +#include #include "hcc_detail/unpinned_copy_engine.h" @@ -62,11 +62,14 @@ hsa_status_t findGlobalPool(hsa_amd_memory_pool_t pool, void* data) { } //------------------------------------------------------------------------------------------------- -UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, int thresholdH2DDirectStaging,int thresholdH2DStagingPinInPlace,int thresholdD2H) : +UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, + bool isLargeBar, int thresholdH2DDirectStaging, + int thresholdH2DStagingPinInPlace, int thresholdD2H) : _hsaAgent(hsaAgent), _cpuAgent(cpuAgent), _bufferSize(bufferSize), _numBuffers(numBuffers > _max_buffers ? _max_buffers : numBuffers), + _isLargeBar(isLargeBar), _hipH2DTransferThresholdDirectOrStaging(thresholdH2DDirectStaging), _hipH2DTransferThresholdStagingOrPininplace(thresholdH2DStagingPinInPlace), _hipD2HTransferThreshold(thresholdD2H) @@ -160,9 +163,9 @@ void UnpinnedCopyEngine::CopyHostToDevicePinInPlace(void* dst, const void* src, // Copy using simple memcpy. Only works on large-bar systems. -void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) +void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) { - if (!isLargeBar) { + if (!_isLargeBar) { THROW_ERROR (hipErrorInvalidValue); } @@ -172,10 +175,10 @@ void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const -void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) +void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor) { if (copyMode == ChooseBest) { - if (isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) { + if (_isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) { copyMode = UseMemcpy; } else if (sizeBytes > _hipH2DTransferThresholdStagingOrPininplace) { copyMode = UsePinInPlace;