Initial support for xgmi sdma queues
Change-Id: I1aee379c7b9eede5f4b913cf2f9af3abb32e5baa
[ROCm/ROCR-Runtime commit: 8864c188b4]
Этот коммит содержится в:
@@ -60,7 +60,7 @@ class BlitKernel : public core::Blit {
|
||||
/// @param agent Pointer to the agent that will execute the AQL packets.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) override;
|
||||
hsa_status_t Initialize(const core::Agent& agent);
|
||||
|
||||
/// @brief Marks the blit kernel object as invalid and uncouples its link with
|
||||
/// the underlying AQL kernel queue. Use of the blit object
|
||||
|
||||
@@ -64,6 +64,7 @@ class BlitSdmaBase : public core::Blit {
|
||||
static const size_t kMaxSingleCopySize;
|
||||
static const size_t kMaxSingleFillSize;
|
||||
virtual bool isSDMA() const override { return true; }
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi) = 0;
|
||||
virtual hsa_status_t SubmitCopyRectCommand(const hsa_pitched_ptr_t* dst,
|
||||
const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src,
|
||||
@@ -78,7 +79,7 @@ class BlitSdmaBase : public core::Blit {
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset>
|
||||
class BlitSdma : public BlitSdmaBase {
|
||||
public:
|
||||
explicit BlitSdma(bool copy_direction);
|
||||
BlitSdma();
|
||||
|
||||
virtual ~BlitSdma() override;
|
||||
|
||||
@@ -88,7 +89,7 @@ class BlitSdma : public BlitSdmaBase {
|
||||
/// @param agent Pointer to the agent that will execute the PM4 commands.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) override;
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi) override;
|
||||
|
||||
/// @brief Marks the queue object as invalid and uncouples its link with
|
||||
/// the underlying compute device's control block. Use of queue object
|
||||
@@ -249,10 +250,6 @@ class BlitSdma : public BlitSdmaBase {
|
||||
|
||||
static const uint32_t trap_command_size_;
|
||||
|
||||
// Flag to indicate if sDMA queue is used for H2D copy operations
|
||||
// true if used for H2D operations, false otherwise
|
||||
const bool sdma_h2d_;
|
||||
|
||||
// Max copy size of a single linear copy command packet.
|
||||
size_t max_single_linear_copy_size_;
|
||||
|
||||
|
||||
@@ -351,7 +351,7 @@ class GpuAgent : public GpuAgentInt {
|
||||
// @brief Create SDMA blit object.
|
||||
//
|
||||
// @retval NULL if SDMA blit creation and initialization failed.
|
||||
core::Blit* CreateBlitSdma(bool h2d);
|
||||
core::Blit* CreateBlitSdma(bool use_xgmi);
|
||||
|
||||
// @brief Create Kernel blit object using provided compute queue.
|
||||
//
|
||||
@@ -405,9 +405,13 @@ class GpuAgent : public GpuAgentInt {
|
||||
size_t scratch_per_thread_;
|
||||
|
||||
// @brief Blit interfaces for each data path.
|
||||
enum BlitEnum { BlitHostToDev, BlitDevToHost, BlitDevToDev, BlitCount };
|
||||
enum BlitEnum { BlitDevToDev, BlitHostToDev, BlitDevToHost, DefaultBlitCount };
|
||||
|
||||
lazy_ptr<core::Blit> blits_[BlitCount];
|
||||
// Blit objects managed by an instance of GpuAgent
|
||||
std::vector<lazy_ptr<core::Blit>> blits_;
|
||||
|
||||
// List of agents connected via xGMI
|
||||
std::vector<const core::Agent*> xgmi_peer_list_;
|
||||
|
||||
// @brief AQL queues for cache management and blit compute usage.
|
||||
enum QueueEnum {
|
||||
@@ -490,6 +494,16 @@ class GpuAgent : public GpuAgentInt {
|
||||
// @retval True if the memory pool for end timestamp object is initialized.
|
||||
bool InitEndTsPool();
|
||||
|
||||
// Bind index of peer device that is connected via xGMI links
|
||||
lazy_ptr<core::Blit>& GetXgmiBlit(const core::Agent& peer_agent);
|
||||
|
||||
// Bind the Blit object that will drive the copy operation
|
||||
// across PCIe links (H2D or D2H) or is within same device D2D
|
||||
lazy_ptr<core::Blit>& GetPcieBlit(const core::Agent& dst_agent, const core::Agent& src_agent);
|
||||
|
||||
// Bind the Blit object that will drive the copy operation
|
||||
lazy_ptr<core::Blit>& GetBlitObject(const core::Agent& dst_agent, const core::Agent& src_agent);
|
||||
|
||||
// @brief Alternative aperture base address. Only on KV.
|
||||
uintptr_t ape1_base_;
|
||||
|
||||
|
||||
@@ -53,13 +53,6 @@ class Blit {
|
||||
explicit Blit() {}
|
||||
virtual ~Blit() {}
|
||||
|
||||
/// @brief Initialize a blit object.
|
||||
///
|
||||
/// @param agent Pointer to the agent that will execute the blit commands.
|
||||
///
|
||||
/// @return hsa_status_t
|
||||
virtual hsa_status_t Initialize(const core::Agent& agent) = 0;
|
||||
|
||||
/// @brief Marks the blit object as invalid and uncouples its link with
|
||||
/// the underlying compute device's control block. Use of blit object
|
||||
/// once it has been release is illegal and any behavior is indeterminate
|
||||
|
||||
@@ -100,13 +100,12 @@ template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::trap_command_size_ = sizeof(SDMA_PKT_TRAP);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset>
|
||||
BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::BlitSdma(bool copy_direction)
|
||||
BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::BlitSdma()
|
||||
: agent_(NULL),
|
||||
queue_start_addr_(NULL),
|
||||
parity_(false),
|
||||
cached_reserve_index_(0),
|
||||
cached_commit_index_(0),
|
||||
sdma_h2d_(copy_direction),
|
||||
platform_atomic_support_(true),
|
||||
hdp_flush_support_(false) {
|
||||
std::memset(&queue_resource_, 0, sizeof(queue_resource_));
|
||||
@@ -117,7 +116,7 @@ BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::~BlitSdma() {}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::Initialize(
|
||||
const core::Agent& agent) {
|
||||
const core::Agent& agent, bool use_xgmi) {
|
||||
if (queue_start_addr_ != NULL) {
|
||||
// Already initialized.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -159,8 +158,10 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::Initial
|
||||
|
||||
// Access kernel driver to initialize the queue control block
|
||||
// This call binds user mode queue object to underlying compute
|
||||
// device.
|
||||
const HSA_QUEUE_TYPE kQueueType_ = HSA_QUEUE_SDMA;
|
||||
// device. ROCr creates queues that are of two kinds: PCIe optimized
|
||||
// and xGMI optimized. Which queue to create is indicated via input
|
||||
// boolean flag
|
||||
const HSA_QUEUE_TYPE kQueueType_ = use_xgmi ? HSA_QUEUE_SDMA_XGMI : HSA_QUEUE_SDMA;
|
||||
if (HSAKMT_STATUS_SUCCESS != hsaKmtCreateQueue(agent_->node_id(), kQueueType_, 100,
|
||||
HSA_QUEUE_PRIORITY_MAXIMUM, queue_start_addr_,
|
||||
kQueueSize, NULL, &queue_resource_)) {
|
||||
@@ -319,9 +320,9 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::SubmitC
|
||||
command_addr += timestamp_command_size_;
|
||||
}
|
||||
|
||||
// Determine if a Hdp flush cmd is required at the top of cmd stream
|
||||
// Issue a Hdp flush cmd
|
||||
if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
|
||||
if ((HwIndexMonotonic) && (hdp_flush_support_) && (sdma_h2d_ == false)) {
|
||||
if ((HwIndexMonotonic) && (hdp_flush_support_)) {
|
||||
BuildHdpFlushCommand(command_addr);
|
||||
command_addr += flush_command_size_;
|
||||
}
|
||||
@@ -331,14 +332,6 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset>::SubmitC
|
||||
memcpy(command_addr, cmd, cmd_size);
|
||||
command_addr += cmd_size;
|
||||
|
||||
// Determine if a Hdp flush cmd is required at the end of cmd stream
|
||||
if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
|
||||
if ((HwIndexMonotonic) && (hdp_flush_support_) && (sdma_h2d_)) {
|
||||
BuildHdpFlushCommand(command_addr);
|
||||
command_addr += flush_command_size_;
|
||||
}
|
||||
}
|
||||
|
||||
if (profiling_enabled) {
|
||||
assert(IsMultipleOf(end_ts_addr, 32));
|
||||
BuildGetGlobalTimestampCommand(command_addr,
|
||||
|
||||
@@ -77,7 +77,6 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
: GpuAgentInt(node),
|
||||
properties_(node_props),
|
||||
current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT),
|
||||
blits_(),
|
||||
queues_(),
|
||||
local_region_(NULL),
|
||||
is_kv_device_(false),
|
||||
@@ -138,9 +137,9 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props)
|
||||
}
|
||||
|
||||
GpuAgent::~GpuAgent() {
|
||||
for (int i = 0; i < BlitCount; ++i) {
|
||||
if (blits_[i] != nullptr) {
|
||||
hsa_status_t status = blits_[i]->Destroy(*this);
|
||||
for (auto& blit : blits_) {
|
||||
if (blit.created()) {
|
||||
hsa_status_t status = blit->Destroy(*this);
|
||||
assert(status == HSA_STATUS_SUCCESS);
|
||||
}
|
||||
}
|
||||
@@ -537,16 +536,16 @@ core::Queue* GpuAgent::CreateInterceptibleQueue() {
|
||||
return queue;
|
||||
}
|
||||
|
||||
core::Blit* GpuAgent::CreateBlitSdma(bool h2d) {
|
||||
core::Blit* sdma;
|
||||
core::Blit* GpuAgent::CreateBlitSdma(bool use_xgmi) {
|
||||
amd::BlitSdmaBase* sdma;
|
||||
|
||||
if (isa_->GetMajorVersion() <= 8) {
|
||||
sdma = new BlitSdmaV2V3(h2d);
|
||||
sdma = new BlitSdmaV2V3();
|
||||
} else {
|
||||
sdma = new BlitSdmaV4(h2d);
|
||||
sdma = new BlitSdmaV4();
|
||||
}
|
||||
|
||||
if (sdma->Initialize(*this) != HSA_STATUS_SUCCESS) {
|
||||
if (sdma->Initialize(*this, use_xgmi) != HSA_STATUS_SUCCESS) {
|
||||
sdma->Destroy(*this);
|
||||
delete sdma;
|
||||
sdma = NULL;
|
||||
@@ -582,14 +581,14 @@ void GpuAgent::InitDma() {
|
||||
queues_[QueueUtility].reset(queue_lambda);
|
||||
|
||||
// Decide which engine to use for blits.
|
||||
auto blit_lambda = [this](bool h2d, lazy_ptr<core::Queue>& queue) {
|
||||
auto blit_lambda = [this](bool use_xgmi, lazy_ptr<core::Queue>& queue) {
|
||||
const std::string& sdma_override = core::Runtime::runtime_singleton_->flag().enable_sdma();
|
||||
|
||||
bool use_sdma = (isa_->GetMajorVersion() != 8);
|
||||
if (sdma_override.size() != 0) use_sdma = (sdma_override == "1");
|
||||
|
||||
if (use_sdma && (HSA_PROFILE_BASE == profile_)) {
|
||||
auto ret = CreateBlitSdma(h2d);
|
||||
auto ret = CreateBlitSdma(use_xgmi);
|
||||
if (ret != nullptr) return ret;
|
||||
}
|
||||
|
||||
@@ -599,20 +598,45 @@ void GpuAgent::InitDma() {
|
||||
return ret;
|
||||
};
|
||||
|
||||
blits_[BlitHostToDev].reset([blit_lambda, this]() { return blit_lambda(true, queues_[QueueBlitOnly]); });
|
||||
blits_[BlitDevToHost].reset([blit_lambda, this]() { return blit_lambda(false, queues_[QueueUtility]); });
|
||||
// Determine and instantiate the number of blit objects to
|
||||
// engage. The total number is sum of three plus number of
|
||||
// sdma-xgmi engines
|
||||
uint32_t blit_cnt_ = DefaultBlitCount + properties_.NumSdmaXgmiEngines;
|
||||
blits_.resize(blit_cnt_);
|
||||
|
||||
// Initialize blit objects used for D2D, H2D, D2H, and
|
||||
// P2P copy operations.
|
||||
// -- Blit at index BlitDevToDev(0) deals with copies within
|
||||
// local framebuffer and always engages a Blit Kernel
|
||||
// -- Blit at index BlitHostToDev(1) deals with copies from
|
||||
// Host to Device (H2D) and could engage either a Blit
|
||||
// Kernel or sDMA
|
||||
// -- Blit at index BlitDevToHost(2) deals with copies from
|
||||
// Device to Host (D2H) and Peer to Peer (P2P) over PCIe.
|
||||
// It could engage either a Blit Kernel or sDMA
|
||||
// -- Blit at index DefaultBlitCount(3) and beyond deal
|
||||
// exclusively P2P over xGMI links
|
||||
blits_[BlitDevToDev].reset([this]() {
|
||||
auto ret = CreateBlitKernel((*queues_[QueueUtility]).get());
|
||||
if (ret == nullptr)
|
||||
throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Blit creation failed.");
|
||||
return ret;
|
||||
});
|
||||
blits_[BlitHostToDev].reset(
|
||||
[blit_lambda, this]() { return blit_lambda(false, queues_[QueueBlitOnly]); });
|
||||
blits_[BlitDevToHost].reset(
|
||||
[blit_lambda, this]() { return blit_lambda(false, queues_[QueueUtility]); });
|
||||
|
||||
// XGMI engines.
|
||||
for (uint32_t idx = DefaultBlitCount; idx < blit_cnt_; idx++) {
|
||||
blits_[idx].reset([blit_lambda, this]() { return blit_lambda(true, queues_[QueueUtility]); });
|
||||
}
|
||||
}
|
||||
|
||||
void GpuAgent::PreloadBlits() {
|
||||
blits_[BlitHostToDev].touch();
|
||||
blits_[BlitDevToHost].touch();
|
||||
blits_[BlitDevToDev].touch();
|
||||
for (auto& blit : blits_) {
|
||||
blit.touch();
|
||||
}
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::PostToolsInit() {
|
||||
@@ -633,15 +657,8 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
|
||||
size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) {
|
||||
lazy_ptr<core::Blit>& blit =
|
||||
(src_agent.device_type() == core::Agent::kAmdCpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdGpuDevice)
|
||||
? blits_[BlitHostToDev]
|
||||
: (src_agent.device_type() == core::Agent::kAmdGpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdCpuDevice)
|
||||
? blits_[BlitDevToHost]
|
||||
: (src_agent.node_id() == dst_agent.node_id())
|
||||
? blits_[BlitDevToDev] : blits_[BlitDevToHost];
|
||||
// Bind the Blit object that will drive this copy operation
|
||||
lazy_ptr<core::Blit>& blit = GetBlitObject(dst_agent, src_agent);
|
||||
|
||||
if (profiling_enabled()) {
|
||||
// Track the agent so we could translate the resulting timestamp to system
|
||||
@@ -688,9 +705,9 @@ hsa_status_t GpuAgent::EnableDmaProfiling(bool enable) {
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
for (int i = 0; i < BlitCount; ++i) {
|
||||
if (blits_[i].created()) {
|
||||
const hsa_status_t stat = blits_[i]->EnableProfiling(enable);
|
||||
for (auto& blit : blits_) {
|
||||
if (blit.created()) {
|
||||
const hsa_status_t stat = blit->EnableProfiling(enable);
|
||||
if (stat != HSA_STATUS_SUCCESS) {
|
||||
return stat;
|
||||
}
|
||||
@@ -701,12 +718,10 @@ hsa_status_t GpuAgent::EnableDmaProfiling(bool enable) {
|
||||
}
|
||||
|
||||
hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||
|
||||
// agent, and vendor name size limit
|
||||
const size_t attribute_u = static_cast<size_t>(attribute);
|
||||
|
||||
|
||||
switch (attribute_u) {
|
||||
|
||||
// Build agent name by concatenating the Major, Minor and Stepping Ids
|
||||
// of devices compute capability with a prefix of "gfx"
|
||||
case HSA_AGENT_INFO_NAME: {
|
||||
@@ -878,7 +893,7 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const {
|
||||
case HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY:
|
||||
*((uint32_t*)value) = memory_max_frequency_;
|
||||
break;
|
||||
|
||||
|
||||
// The code copies HsaNodeProperties.MarketingName a Unicode string
|
||||
// which is encoded in UTF-16 as a 7-bit ASCII string
|
||||
case HSA_AMD_AGENT_INFO_PRODUCT_NAME: {
|
||||
@@ -1252,4 +1267,81 @@ void GpuAgent::InvalidateCodeCaches() {
|
||||
queues_[QueueUtility]->ExecutePM4(cache_inv, sizeof(cache_inv));
|
||||
}
|
||||
|
||||
lazy_ptr<core::Blit>& GpuAgent::GetXgmiBlit(const core::Agent& dst_agent) {
|
||||
// Determine if destination is a member xgmi peers list
|
||||
uint32_t xgmi_engine_cnt = properties_.NumSdmaXgmiEngines;
|
||||
assert((xgmi_engine_cnt > 0) && ("Illegal condition, should not happen"));
|
||||
|
||||
for (uint32_t idx = 0; idx < xgmi_peer_list_.size(); idx++) {
|
||||
uint64_t dst_handle = dst_agent.public_handle().handle;
|
||||
uint64_t peer_handle = xgmi_peer_list_[idx]->public_handle().handle;
|
||||
if (peer_handle == dst_handle) {
|
||||
return blits_[(idx % xgmi_engine_cnt) + DefaultBlitCount];
|
||||
}
|
||||
}
|
||||
|
||||
// Add agent to the xGMI neighbours list
|
||||
xgmi_peer_list_.push_back(&dst_agent);
|
||||
return blits_[((xgmi_peer_list_.size() - 1) % xgmi_engine_cnt) + DefaultBlitCount];
|
||||
}
|
||||
|
||||
lazy_ptr<core::Blit>& GpuAgent::GetPcieBlit(const core::Agent& dst_agent,
|
||||
const core::Agent& src_agent) {
|
||||
lazy_ptr<core::Blit>& blit =
|
||||
(src_agent.device_type() == core::Agent::kAmdCpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdGpuDevice)
|
||||
? blits_[BlitHostToDev]
|
||||
: (src_agent.device_type() == core::Agent::kAmdGpuDevice &&
|
||||
dst_agent.device_type() == core::Agent::kAmdCpuDevice)
|
||||
? blits_[BlitDevToHost] : blits_[BlitDevToHost];
|
||||
return blit;
|
||||
}
|
||||
|
||||
lazy_ptr<core::Blit>& GpuAgent::GetBlitObject(const core::Agent& dst_agent,
|
||||
const core::Agent& src_agent) {
|
||||
// At this point it is guaranteed that one of
|
||||
// the two devices is a GPU, potentially both
|
||||
assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) ||
|
||||
(dst_agent.device_type() == core::Agent::kAmdGpuDevice)) &&
|
||||
("Both devices are CPU agents which is not expected"));
|
||||
|
||||
// Determine if Src and Dst devices are same
|
||||
if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle)) {
|
||||
return blits_[BlitDevToDev];
|
||||
}
|
||||
|
||||
// Acquire Hive Id of Src and Dst devices
|
||||
uint64_t src_hive_id = src_agent.HiveId();
|
||||
uint64_t dst_hive_id = dst_agent.HiveId();
|
||||
|
||||
// Bind to a PCIe facing Blit object if the two
|
||||
// devices have different Hive Ids. This can occur
|
||||
// for following scenarios:
|
||||
//
|
||||
// Neither device claims membership in a Hive
|
||||
// srcId = 0 <-> dstId = 0;
|
||||
//
|
||||
// Src device claims membership in a Hive
|
||||
// srcId = 0x1926 <-> dstId = 0;
|
||||
//
|
||||
// Dst device claims membership in a Hive
|
||||
// srcId = 0 <-> dstId = 0x1123;
|
||||
//
|
||||
// Both device claims membership in a Hive
|
||||
// and the Hives are different
|
||||
// srcId = 0x1926 <-> dstId = 0x1123;
|
||||
//
|
||||
if ((dst_hive_id != src_hive_id) || (dst_hive_id == 0)) {
|
||||
return GetPcieBlit(dst_agent, src_agent);
|
||||
}
|
||||
|
||||
// Accommodates platforms where devices have xGMI
|
||||
// links but without sdmaXgmiEngines e.g. Vega 20
|
||||
if (properties_.NumSdmaXgmiEngines == 0) {
|
||||
return GetPcieBlit(dst_agent, src_agent);
|
||||
}
|
||||
|
||||
return GetXgmiBlit(dst_agent);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
@@ -255,10 +255,12 @@ hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle,
|
||||
core::Signal* out_signal_obj = core::Signal::Convert(completion_signal);
|
||||
IS_VALID(out_signal_obj);
|
||||
|
||||
bool rev_copy_dir = core::Runtime::runtime_singleton_->flag().rev_copy_dir();
|
||||
if (size > 0) {
|
||||
return core::Runtime::runtime_singleton_->CopyMemory(
|
||||
dst, *dst_agent, src, *src_agent, size, dep_signal_list,
|
||||
*out_signal_obj);
|
||||
dst, (rev_copy_dir ? *src_agent : *dst_agent),
|
||||
src, (rev_copy_dir ? *dst_agent : *src_agent),
|
||||
size, dep_signal_list, *out_signal_obj);
|
||||
}
|
||||
|
||||
return HSA_STATUS_SUCCESS;
|
||||
|
||||
@@ -464,8 +464,6 @@ hsa_status_t Runtime::CopyMemory(void* dst, core::Agent& dst_agent,
|
||||
(src_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice);
|
||||
if (dst_gpu || src_gpu) {
|
||||
core::Agent* copy_agent = (src_gpu) ? &src_agent : &dst_agent;
|
||||
if (flag_.rev_copy_dir() && dst_gpu && src_gpu)
|
||||
copy_agent = (copy_agent == &src_agent) ? &dst_agent : &src_agent;
|
||||
return copy_agent->DmaCopy(dst, dst_agent, src, src_agent, size, dep_signals,
|
||||
completion_signal);
|
||||
}
|
||||
|
||||
@@ -58,6 +58,19 @@ template <typename T> class lazy_ptr {
|
||||
|
||||
explicit lazy_ptr(std::function<T*()> Constructor) { Init(Constructor); }
|
||||
|
||||
lazy_ptr(lazy_ptr&& rhs) {
|
||||
obj = std::move(rhs.obj);
|
||||
func = std::move(rhs.func);
|
||||
}
|
||||
|
||||
lazy_ptr& operator=(lazy_ptr&& rhs) {
|
||||
obj = std::move(rhs.obj);
|
||||
func = std::move(rhs.func);
|
||||
}
|
||||
|
||||
lazy_ptr(lazy_ptr&) = delete;
|
||||
lazy_ptr& operator=(lazy_ptr&) = delete;
|
||||
|
||||
void reset(std::function<T*()> Constructor = nullptr) {
|
||||
obj.reset();
|
||||
func = Constructor;
|
||||
@@ -122,7 +135,6 @@ template <typename T> class lazy_ptr {
|
||||
}
|
||||
}
|
||||
|
||||
DISALLOW_COPY_AND_ASSIGN(lazy_ptr);
|
||||
};
|
||||
|
||||
#endif // HSA_RUNTIME_CORE_UTIL_LAZY_PTR_H_
|
||||
|
||||
Ссылка в новой задаче
Block a user