Move isLargeBar to UnpinnedCopyEngine constructor.

Change-Id: I7a7d3a40b1d4e0c6ec856658a6a70e5e70d287ce


[ROCm/hip commit: 442d74f027]
Этот коммит содержится в:
Ben Sander
2016-09-11 06:50:20 -05:00
родитель 7935c8b929
Коммит f7987aaa33
3 изменённых файлов: 29 добавлений и 14 удалений
+8 -4
Просмотреть файл
@@ -21,7 +21,7 @@ THE SOFTWARE.
#ifndef STAGING_BUFFER_H
#define STAGING_BUFFER_H
#include "hsa.h"
#include "hsa/hsa.h"
//-------------------------------------------------------------------------------------------------
@@ -43,18 +43,19 @@ struct UnpinnedCopyEngine {
static const int _max_buffers = 4;
UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,int thresholdH2D_directStaging,int thresholdH2D_stagingPinInPlace,int thresholdD2H) ;
UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,
bool isLargeBar, int thresholdH2D_directStaging, int thresholdH2D_stagingPinInPlace, int thresholdD2H) ;
~UnpinnedCopyEngine();
// Use hueristic to choose best copy algorithm
void CopyHostToDevice(CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
void CopyHostToDevice(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
void CopyDeviceToHost(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
// Specific H2D copy algorithm implementations:
void CopyHostToDeviceStaging(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
void CopyHostToDevicePinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
void CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
void CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
// Specific D2H copy algorithm implementations:
@@ -72,6 +73,9 @@ private:
size_t _bufferSize; // Size of the buffers.
int _numBuffers;
// True if system supports large-bar and thus can benefit from CPU directly performing copy operation.
bool _isLargeBar;
char *_pinnedStagingBuffer[_max_buffers];
hsa_signal_t _completionSignal[_max_buffers];
hsa_signal_t _completionSignal2[_max_buffers]; // P2P needs another set of signals.
+12 -4
Просмотреть файл
@@ -675,8 +675,16 @@ ihipDevice_t::ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerato
initProperties(&_props);
_stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD);
_stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD);
_stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,
_isLargeBar,
HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,
HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,
HIP_D2H_MEM_TRANSFER_THRESHOLD);
_stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,
_isLargeBar,
HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,
HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,
HIP_D2H_MEM_TRANSFER_THRESHOLD);
_primaryCtx = new ihipCtx_t(this, deviceCnt, hipDeviceMapHost);
}
@@ -925,7 +933,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
FindDevicePool();
int access=checkAccess(g_cpu_agent, gpu_pool_);
if (0!= access){
if (0 != access){
_isLargeBar= 1;
} else {
_isLargeBar=0;
@@ -1745,7 +1753,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
} else if (HIP_PININPLACE) {
copyMode = UnpinnedCopyEngine::UsePinInPlace;
}
device->_stagingBuffer[0]->CopyHostToDevice(copyMode, device->_isLargeBar, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
device->_stagingBuffer[0]->CopyHostToDevice(copyMode, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
// The copy waits for inputs and then completes before returning so can reset queue to empty:
this->wait(crit, true);
}
+9 -6
Просмотреть файл
@@ -19,7 +19,7 @@ THE SOFTWARE.
#include <hc_am.hpp>
#include "hsa_ext_amd.h"
#include <hsa/hsa_ext_amd.h>
#include "hcc_detail/unpinned_copy_engine.h"
@@ -62,11 +62,14 @@ hsa_status_t findGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
}
//-------------------------------------------------------------------------------------------------
UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, int thresholdH2DDirectStaging,int thresholdH2DStagingPinInPlace,int thresholdD2H) :
UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,
bool isLargeBar, int thresholdH2DDirectStaging,
int thresholdH2DStagingPinInPlace, int thresholdD2H) :
_hsaAgent(hsaAgent),
_cpuAgent(cpuAgent),
_bufferSize(bufferSize),
_numBuffers(numBuffers > _max_buffers ? _max_buffers : numBuffers),
_isLargeBar(isLargeBar),
_hipH2DTransferThresholdDirectOrStaging(thresholdH2DDirectStaging),
_hipH2DTransferThresholdStagingOrPininplace(thresholdH2DStagingPinInPlace),
_hipD2HTransferThreshold(thresholdD2H)
@@ -160,9 +163,9 @@ void UnpinnedCopyEngine::CopyHostToDevicePinInPlace(void* dst, const void* src,
// Copy using simple memcpy. Only works on large-bar systems.
void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
{
if (!isLargeBar) {
if (!_isLargeBar) {
THROW_ERROR (hipErrorInvalidValue);
}
@@ -172,10 +175,10 @@ void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const
void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
{
if (copyMode == ChooseBest) {
if (isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) {
if (_isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) {
copyMode = UseMemcpy;
} else if (sizeBytes > _hipH2DTransferThresholdStagingOrPininplace) {
copyMode = UsePinInPlace;