Move isLargeBar to UnpinnedCopyEngine constructor.
Change-Id: I7a7d3a40b1d4e0c6ec856658a6a70e5e70d287ce
[ROCm/hip commit: 442d74f027]
Этот коммит содержится в:
@@ -21,7 +21,7 @@ THE SOFTWARE.
|
||||
#ifndef STAGING_BUFFER_H
|
||||
#define STAGING_BUFFER_H
|
||||
|
||||
#include "hsa.h"
|
||||
#include "hsa/hsa.h"
|
||||
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
@@ -43,18 +43,19 @@ struct UnpinnedCopyEngine {
|
||||
|
||||
static const int _max_buffers = 4;
|
||||
|
||||
UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,int thresholdH2D_directStaging,int thresholdH2D_stagingPinInPlace,int thresholdD2H) ;
|
||||
UnpinnedCopyEngine(hsa_agent_t hsaAgent,hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,
|
||||
bool isLargeBar, int thresholdH2D_directStaging, int thresholdH2D_stagingPinInPlace, int thresholdD2H) ;
|
||||
~UnpinnedCopyEngine();
|
||||
|
||||
// Use hueristic to choose best copy algorithm
|
||||
void CopyHostToDevice(CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
void CopyHostToDevice(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
void CopyDeviceToHost(CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
|
||||
|
||||
// Specific H2D copy algorithm implementations:
|
||||
void CopyHostToDeviceStaging(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
void CopyHostToDevicePinInPlace(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
void CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
void CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor);
|
||||
|
||||
|
||||
// Specific D2H copy algorithm implementations:
|
||||
@@ -72,6 +73,9 @@ private:
|
||||
size_t _bufferSize; // Size of the buffers.
|
||||
int _numBuffers;
|
||||
|
||||
// True if system supports large-bar and thus can benefit from CPU directly performing copy operation.
|
||||
bool _isLargeBar;
|
||||
|
||||
char *_pinnedStagingBuffer[_max_buffers];
|
||||
hsa_signal_t _completionSignal[_max_buffers];
|
||||
hsa_signal_t _completionSignal2[_max_buffers]; // P2P needs another set of signals.
|
||||
|
||||
@@ -675,8 +675,16 @@ ihipDevice_t::ihipDevice_t(unsigned deviceId, unsigned deviceCnt, hc::accelerato
|
||||
|
||||
initProperties(&_props);
|
||||
|
||||
_stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD);
|
||||
_stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,HIP_D2H_MEM_TRANSFER_THRESHOLD);
|
||||
_stagingBuffer[0] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,
|
||||
_isLargeBar,
|
||||
HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,
|
||||
HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,
|
||||
HIP_D2H_MEM_TRANSFER_THRESHOLD);
|
||||
_stagingBuffer[1] = new UnpinnedCopyEngine(_hsaAgent,g_cpu_agent, HIP_STAGING_SIZE*1024, HIP_STAGING_BUFFERS,
|
||||
_isLargeBar,
|
||||
HIP_H2D_MEM_TRANSFER_THRESHOLD_DIRECT_OR_STAGING,
|
||||
HIP_H2D_MEM_TRANSFER_THRESHOLD_STAGING_OR_PININPLACE,
|
||||
HIP_D2H_MEM_TRANSFER_THRESHOLD);
|
||||
|
||||
_primaryCtx = new ihipCtx_t(this, deviceCnt, hipDeviceMapHost);
|
||||
}
|
||||
@@ -925,7 +933,7 @@ hipError_t ihipDevice_t::initProperties(hipDeviceProp_t* prop)
|
||||
|
||||
FindDevicePool();
|
||||
int access=checkAccess(g_cpu_agent, gpu_pool_);
|
||||
if (0!= access){
|
||||
if (0 != access){
|
||||
_isLargeBar= 1;
|
||||
} else {
|
||||
_isLargeBar=0;
|
||||
@@ -1745,7 +1753,7 @@ void ihipStream_t::copySync(LockedAccessor_StreamCrit_t &crit, void* dst, const
|
||||
} else if (HIP_PININPLACE) {
|
||||
copyMode = UnpinnedCopyEngine::UsePinInPlace;
|
||||
}
|
||||
device->_stagingBuffer[0]->CopyHostToDevice(copyMode, device->_isLargeBar, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
device->_stagingBuffer[0]->CopyHostToDevice(copyMode, dst, src, sizeBytes, depSignalCnt ? &depSignal : NULL);
|
||||
// The copy waits for inputs and then completes before returning so can reset queue to empty:
|
||||
this->wait(crit, true);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ THE SOFTWARE.
|
||||
|
||||
#include <hc_am.hpp>
|
||||
|
||||
#include "hsa_ext_amd.h"
|
||||
#include <hsa/hsa_ext_amd.h>
|
||||
|
||||
#include "hcc_detail/unpinned_copy_engine.h"
|
||||
|
||||
@@ -62,11 +62,14 @@ hsa_status_t findGlobalPool(hsa_amd_memory_pool_t pool, void* data) {
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------------
|
||||
UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers, int thresholdH2DDirectStaging,int thresholdH2DStagingPinInPlace,int thresholdD2H) :
|
||||
UnpinnedCopyEngine::UnpinnedCopyEngine(hsa_agent_t hsaAgent, hsa_agent_t cpuAgent, size_t bufferSize, int numBuffers,
|
||||
bool isLargeBar, int thresholdH2DDirectStaging,
|
||||
int thresholdH2DStagingPinInPlace, int thresholdD2H) :
|
||||
_hsaAgent(hsaAgent),
|
||||
_cpuAgent(cpuAgent),
|
||||
_bufferSize(bufferSize),
|
||||
_numBuffers(numBuffers > _max_buffers ? _max_buffers : numBuffers),
|
||||
_isLargeBar(isLargeBar),
|
||||
_hipH2DTransferThresholdDirectOrStaging(thresholdH2DDirectStaging),
|
||||
_hipH2DTransferThresholdStagingOrPininplace(thresholdH2DStagingPinInPlace),
|
||||
_hipD2HTransferThreshold(thresholdD2H)
|
||||
@@ -160,9 +163,9 @@ void UnpinnedCopyEngine::CopyHostToDevicePinInPlace(void* dst, const void* src,
|
||||
|
||||
|
||||
// Copy using simple memcpy. Only works on large-bar systems.
|
||||
void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
|
||||
void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
|
||||
{
|
||||
if (!isLargeBar) {
|
||||
if (!_isLargeBar) {
|
||||
THROW_ERROR (hipErrorInvalidValue);
|
||||
}
|
||||
|
||||
@@ -172,10 +175,10 @@ void UnpinnedCopyEngine::CopyHostToDeviceMemcpy(int isLargeBar, void* dst, const
|
||||
|
||||
|
||||
|
||||
void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, int isLargeBar,void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
|
||||
void UnpinnedCopyEngine::CopyHostToDevice(UnpinnedCopyEngine::CopyMode copyMode, void* dst, const void* src, size_t sizeBytes, hsa_signal_t *waitFor)
|
||||
{
|
||||
if (copyMode == ChooseBest) {
|
||||
if (isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) {
|
||||
if (_isLargeBar && (sizeBytes < _hipH2DTransferThresholdDirectOrStaging)) {
|
||||
copyMode = UseMemcpy;
|
||||
} else if (sizeBytes > _hipH2DTransferThresholdStagingOrPininplace) {
|
||||
copyMode = UsePinInPlace;
|
||||
|
||||
Ссылка в новой задаче
Block a user