From 08e994db500367fcc0d0fc46b499468aab7e1a1e Mon Sep 17 00:00:00 2001 From: Ramesh Errabolu Date: Wed, 24 Jul 2019 19:28:24 -0500 Subject: [PATCH] Initial support for xgmi sdma queues Change-Id: I1aee379c7b9eede5f4b913cf2f9af3abb32e5baa [ROCm/ROCR-Runtime commit: 8864c188b4aed0b1a7d42cb338449b38ab188124] --- .../hsa-runtime/core/inc/amd_blit_kernel.h | 2 +- .../hsa-runtime/core/inc/amd_blit_sdma.h | 9 +- .../hsa-runtime/core/inc/amd_gpu_agent.h | 20 ++- .../runtime/hsa-runtime/core/inc/blit.h | 7 - .../core/runtime/amd_blit_sdma.cpp | 23 +-- .../core/runtime/amd_gpu_agent.cpp | 156 ++++++++++++++---- .../hsa-runtime/core/runtime/hsa_ext_amd.cpp | 6 +- .../hsa-runtime/core/runtime/runtime.cpp | 2 - .../runtime/hsa-runtime/core/util/lazy_ptr.h | 14 +- 9 files changed, 170 insertions(+), 69 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h index b7e63d0320..a4c58bc9b4 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_kernel.h @@ -60,7 +60,7 @@ class BlitKernel : public core::Blit { /// @param agent Pointer to the agent that will execute the AQL packets. /// /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) override; + hsa_status_t Initialize(const core::Agent& agent); /// @brief Marks the blit kernel object as invalid and uncouples its link with /// the underlying AQL kernel queue. Use of the blit object diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h index 181cd68764..756ff98270 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h @@ -64,6 +64,7 @@ class BlitSdmaBase : public core::Blit { static const size_t kMaxSingleCopySize; static const size_t kMaxSingleFillSize; virtual bool isSDMA() const override { return true; } + virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi) = 0; virtual hsa_status_t SubmitCopyRectCommand(const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, @@ -78,7 +79,7 @@ class BlitSdmaBase : public core::Blit { template class BlitSdma : public BlitSdmaBase { public: - explicit BlitSdma(bool copy_direction); + BlitSdma(); virtual ~BlitSdma() override; @@ -88,7 +89,7 @@ class BlitSdma : public BlitSdmaBase { /// @param agent Pointer to the agent that will execute the PM4 commands. /// /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) override; + virtual hsa_status_t Initialize(const core::Agent& agent, bool use_xgmi) override; /// @brief Marks the queue object as invalid and uncouples its link with /// the underlying compute device's control block. Use of queue object @@ -249,10 +250,6 @@ class BlitSdma : public BlitSdmaBase { static const uint32_t trap_command_size_; - // Flag to indicate if sDMA queue is used for H2D copy operations - // true if used for H2D operations, false otherwise - const bool sdma_h2d_; - // Max copy size of a single linear copy command packet. size_t max_single_linear_copy_size_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index 4f6a6dc2b7..6fd8be666d 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -351,7 +351,7 @@ class GpuAgent : public GpuAgentInt { // @brief Create SDMA blit object. // // @retval NULL if SDMA blit creation and initialization failed. - core::Blit* CreateBlitSdma(bool h2d); + core::Blit* CreateBlitSdma(bool use_xgmi); // @brief Create Kernel blit object using provided compute queue. // @@ -405,9 +405,13 @@ class GpuAgent : public GpuAgentInt { size_t scratch_per_thread_; // @brief Blit interfaces for each data path. - enum BlitEnum { BlitHostToDev, BlitDevToHost, BlitDevToDev, BlitCount }; + enum BlitEnum { BlitDevToDev, BlitHostToDev, BlitDevToHost, DefaultBlitCount }; - lazy_ptr blits_[BlitCount]; + // Blit objects managed by an instance of GpuAgent + std::vector> blits_; + + // List of agents connected via xGMI + std::vector xgmi_peer_list_; // @brief AQL queues for cache management and blit compute usage. enum QueueEnum { @@ -490,6 +494,16 @@ class GpuAgent : public GpuAgentInt { // @retval True if the memory pool for end timestamp object is initialized. bool InitEndTsPool(); + // Bind index of peer device that is connected via xGMI links + lazy_ptr& GetXgmiBlit(const core::Agent& peer_agent); + + // Bind the Blit object that will drive the copy operation + // across PCIe links (H2D or D2H) or is within same device D2D + lazy_ptr& GetPcieBlit(const core::Agent& dst_agent, const core::Agent& src_agent); + + // Bind the Blit object that will drive the copy operation + lazy_ptr& GetBlitObject(const core::Agent& dst_agent, const core::Agent& src_agent); + // @brief Alternative aperture base address. Only on KV. uintptr_t ape1_base_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/blit.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/blit.h index 571893615c..cfcbba51dd 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/blit.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/blit.h @@ -53,13 +53,6 @@ class Blit { explicit Blit() {} virtual ~Blit() {} - /// @brief Initialize a blit object. - /// - /// @param agent Pointer to the agent that will execute the blit commands. - /// - /// @return hsa_status_t - virtual hsa_status_t Initialize(const core::Agent& agent) = 0; - /// @brief Marks the blit object as invalid and uncouples its link with /// the underlying compute device's control block. Use of blit object /// once it has been release is illegal and any behavior is indeterminate diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index dc7b11068e..8bdd7b279f 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -100,13 +100,12 @@ template const uint32_t BlitSdma::trap_command_size_ = sizeof(SDMA_PKT_TRAP); template -BlitSdma::BlitSdma(bool copy_direction) +BlitSdma::BlitSdma() : agent_(NULL), queue_start_addr_(NULL), parity_(false), cached_reserve_index_(0), cached_commit_index_(0), - sdma_h2d_(copy_direction), platform_atomic_support_(true), hdp_flush_support_(false) { std::memset(&queue_resource_, 0, sizeof(queue_resource_)); @@ -117,7 +116,7 @@ BlitSdma::~BlitSdma() {} template hsa_status_t BlitSdma::Initialize( - const core::Agent& agent) { + const core::Agent& agent, bool use_xgmi) { if (queue_start_addr_ != NULL) { // Already initialized. return HSA_STATUS_SUCCESS; @@ -159,8 +158,10 @@ hsa_status_t BlitSdma::Initial // Access kernel driver to initialize the queue control block // This call binds user mode queue object to underlying compute - // device. - const HSA_QUEUE_TYPE kQueueType_ = HSA_QUEUE_SDMA; + // device. ROCr creates queues that are of two kinds: PCIe optimized + // and xGMI optimized. Which queue to create is indicated via input + // boolean flag + const HSA_QUEUE_TYPE kQueueType_ = use_xgmi ? HSA_QUEUE_SDMA_XGMI : HSA_QUEUE_SDMA; if (HSAKMT_STATUS_SUCCESS != hsaKmtCreateQueue(agent_->node_id(), kQueueType_, 100, HSA_QUEUE_PRIORITY_MAXIMUM, queue_start_addr_, kQueueSize, NULL, &queue_resource_)) { @@ -319,9 +320,9 @@ hsa_status_t BlitSdma::SubmitC command_addr += timestamp_command_size_; } - // Determine if a Hdp flush cmd is required at the top of cmd stream + // Issue a Hdp flush cmd if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) { - if ((HwIndexMonotonic) && (hdp_flush_support_) && (sdma_h2d_ == false)) { + if ((HwIndexMonotonic) && (hdp_flush_support_)) { BuildHdpFlushCommand(command_addr); command_addr += flush_command_size_; } @@ -331,14 +332,6 @@ hsa_status_t BlitSdma::SubmitC memcpy(command_addr, cmd, cmd_size); command_addr += cmd_size; - // Determine if a Hdp flush cmd is required at the end of cmd stream - if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) { - if ((HwIndexMonotonic) && (hdp_flush_support_) && (sdma_h2d_)) { - BuildHdpFlushCommand(command_addr); - command_addr += flush_command_size_; - } - } - if (profiling_enabled) { assert(IsMultipleOf(end_ts_addr, 32)); BuildGetGlobalTimestampCommand(command_addr, diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index 0704c88401..16c53e95a3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -77,7 +77,6 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) : GpuAgentInt(node), properties_(node_props), current_coherency_type_(HSA_AMD_COHERENCY_TYPE_COHERENT), - blits_(), queues_(), local_region_(NULL), is_kv_device_(false), @@ -138,9 +137,9 @@ GpuAgent::GpuAgent(HSAuint32 node, const HsaNodeProperties& node_props) } GpuAgent::~GpuAgent() { - for (int i = 0; i < BlitCount; ++i) { - if (blits_[i] != nullptr) { - hsa_status_t status = blits_[i]->Destroy(*this); + for (auto& blit : blits_) { + if (blit.created()) { + hsa_status_t status = blit->Destroy(*this); assert(status == HSA_STATUS_SUCCESS); } } @@ -537,16 +536,16 @@ core::Queue* GpuAgent::CreateInterceptibleQueue() { return queue; } -core::Blit* GpuAgent::CreateBlitSdma(bool h2d) { - core::Blit* sdma; +core::Blit* GpuAgent::CreateBlitSdma(bool use_xgmi) { + amd::BlitSdmaBase* sdma; if (isa_->GetMajorVersion() <= 8) { - sdma = new BlitSdmaV2V3(h2d); + sdma = new BlitSdmaV2V3(); } else { - sdma = new BlitSdmaV4(h2d); + sdma = new BlitSdmaV4(); } - if (sdma->Initialize(*this) != HSA_STATUS_SUCCESS) { + if (sdma->Initialize(*this, use_xgmi) != HSA_STATUS_SUCCESS) { sdma->Destroy(*this); delete sdma; sdma = NULL; @@ -582,14 +581,14 @@ void GpuAgent::InitDma() { queues_[QueueUtility].reset(queue_lambda); // Decide which engine to use for blits. - auto blit_lambda = [this](bool h2d, lazy_ptr& queue) { + auto blit_lambda = [this](bool use_xgmi, lazy_ptr& queue) { const std::string& sdma_override = core::Runtime::runtime_singleton_->flag().enable_sdma(); bool use_sdma = (isa_->GetMajorVersion() != 8); if (sdma_override.size() != 0) use_sdma = (sdma_override == "1"); if (use_sdma && (HSA_PROFILE_BASE == profile_)) { - auto ret = CreateBlitSdma(h2d); + auto ret = CreateBlitSdma(use_xgmi); if (ret != nullptr) return ret; } @@ -599,20 +598,45 @@ void GpuAgent::InitDma() { return ret; }; - blits_[BlitHostToDev].reset([blit_lambda, this]() { return blit_lambda(true, queues_[QueueBlitOnly]); }); - blits_[BlitDevToHost].reset([blit_lambda, this]() { return blit_lambda(false, queues_[QueueUtility]); }); + // Determine and instantiate the number of blit objects to + // engage. The total number is sum of three plus number of + // sdma-xgmi engines + uint32_t blit_cnt_ = DefaultBlitCount + properties_.NumSdmaXgmiEngines; + blits_.resize(blit_cnt_); + + // Initialize blit objects used for D2D, H2D, D2H, and + // P2P copy operations. + // -- Blit at index BlitDevToDev(0) deals with copies within + // local framebuffer and always engages a Blit Kernel + // -- Blit at index BlitHostToDev(1) deals with copies from + // Host to Device (H2D) and could engage either a Blit + // Kernel or sDMA + // -- Blit at index BlitDevToHost(2) deals with copies from + // Device to Host (D2H) and Peer to Peer (P2P) over PCIe. + // It could engage either a Blit Kernel or sDMA + // -- Blit at index DefaultBlitCount(3) and beyond deal + // exclusively P2P over xGMI links blits_[BlitDevToDev].reset([this]() { auto ret = CreateBlitKernel((*queues_[QueueUtility]).get()); if (ret == nullptr) throw AMD::hsa_exception(HSA_STATUS_ERROR_OUT_OF_RESOURCES, "Blit creation failed."); return ret; }); + blits_[BlitHostToDev].reset( + [blit_lambda, this]() { return blit_lambda(false, queues_[QueueBlitOnly]); }); + blits_[BlitDevToHost].reset( + [blit_lambda, this]() { return blit_lambda(false, queues_[QueueUtility]); }); + + // XGMI engines. + for (uint32_t idx = DefaultBlitCount; idx < blit_cnt_; idx++) { + blits_[idx].reset([blit_lambda, this]() { return blit_lambda(true, queues_[QueueUtility]); }); + } } void GpuAgent::PreloadBlits() { - blits_[BlitHostToDev].touch(); - blits_[BlitDevToHost].touch(); - blits_[BlitDevToDev].touch(); + for (auto& blit : blits_) { + blit.touch(); + } } hsa_status_t GpuAgent::PostToolsInit() { @@ -633,15 +657,8 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent, size_t size, std::vector& dep_signals, core::Signal& out_signal) { - lazy_ptr& blit = - (src_agent.device_type() == core::Agent::kAmdCpuDevice && - dst_agent.device_type() == core::Agent::kAmdGpuDevice) - ? blits_[BlitHostToDev] - : (src_agent.device_type() == core::Agent::kAmdGpuDevice && - dst_agent.device_type() == core::Agent::kAmdCpuDevice) - ? blits_[BlitDevToHost] - : (src_agent.node_id() == dst_agent.node_id()) - ? blits_[BlitDevToDev] : blits_[BlitDevToHost]; + // Bind the Blit object that will drive this copy operation + lazy_ptr& blit = GetBlitObject(dst_agent, src_agent); if (profiling_enabled()) { // Track the agent so we could translate the resulting timestamp to system @@ -688,9 +705,9 @@ hsa_status_t GpuAgent::EnableDmaProfiling(bool enable) { return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - for (int i = 0; i < BlitCount; ++i) { - if (blits_[i].created()) { - const hsa_status_t stat = blits_[i]->EnableProfiling(enable); + for (auto& blit : blits_) { + if (blit.created()) { + const hsa_status_t stat = blit->EnableProfiling(enable); if (stat != HSA_STATUS_SUCCESS) { return stat; } @@ -701,12 +718,10 @@ hsa_status_t GpuAgent::EnableDmaProfiling(bool enable) { } hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { - // agent, and vendor name size limit const size_t attribute_u = static_cast(attribute); - + switch (attribute_u) { - // Build agent name by concatenating the Major, Minor and Stepping Ids // of devices compute capability with a prefix of "gfx" case HSA_AGENT_INFO_NAME: { @@ -878,7 +893,7 @@ hsa_status_t GpuAgent::GetInfo(hsa_agent_info_t attribute, void* value) const { case HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY: *((uint32_t*)value) = memory_max_frequency_; break; - + // The code copies HsaNodeProperties.MarketingName a Unicode string // which is encoded in UTF-16 as a 7-bit ASCII string case HSA_AMD_AGENT_INFO_PRODUCT_NAME: { @@ -1252,4 +1267,81 @@ void GpuAgent::InvalidateCodeCaches() { queues_[QueueUtility]->ExecutePM4(cache_inv, sizeof(cache_inv)); } +lazy_ptr& GpuAgent::GetXgmiBlit(const core::Agent& dst_agent) { + // Determine if destination is a member xgmi peers list + uint32_t xgmi_engine_cnt = properties_.NumSdmaXgmiEngines; + assert((xgmi_engine_cnt > 0) && ("Illegal condition, should not happen")); + + for (uint32_t idx = 0; idx < xgmi_peer_list_.size(); idx++) { + uint64_t dst_handle = dst_agent.public_handle().handle; + uint64_t peer_handle = xgmi_peer_list_[idx]->public_handle().handle; + if (peer_handle == dst_handle) { + return blits_[(idx % xgmi_engine_cnt) + DefaultBlitCount]; + } + } + + // Add agent to the xGMI neighbours list + xgmi_peer_list_.push_back(&dst_agent); + return blits_[((xgmi_peer_list_.size() - 1) % xgmi_engine_cnt) + DefaultBlitCount]; +} + +lazy_ptr& GpuAgent::GetPcieBlit(const core::Agent& dst_agent, + const core::Agent& src_agent) { + lazy_ptr& blit = + (src_agent.device_type() == core::Agent::kAmdCpuDevice && + dst_agent.device_type() == core::Agent::kAmdGpuDevice) + ? blits_[BlitHostToDev] + : (src_agent.device_type() == core::Agent::kAmdGpuDevice && + dst_agent.device_type() == core::Agent::kAmdCpuDevice) + ? blits_[BlitDevToHost] : blits_[BlitDevToHost]; + return blit; +} + +lazy_ptr& GpuAgent::GetBlitObject(const core::Agent& dst_agent, + const core::Agent& src_agent) { + // At this point it is guaranteed that one of + // the two devices is a GPU, potentially both + assert(((src_agent.device_type() == core::Agent::kAmdGpuDevice) || + (dst_agent.device_type() == core::Agent::kAmdGpuDevice)) && + ("Both devices are CPU agents which is not expected")); + + // Determine if Src and Dst devices are same + if ((src_agent.public_handle().handle) == (dst_agent.public_handle().handle)) { + return blits_[BlitDevToDev]; + } + + // Acquire Hive Id of Src and Dst devices + uint64_t src_hive_id = src_agent.HiveId(); + uint64_t dst_hive_id = dst_agent.HiveId(); + + // Bind to a PCIe facing Blit object if the two + // devices have different Hive Ids. This can occur + // for following scenarios: + // + // Neither device claims membership in a Hive + // srcId = 0 <-> dstId = 0; + // + // Src device claims membership in a Hive + // srcId = 0x1926 <-> dstId = 0; + // + // Dst device claims membership in a Hive + // srcId = 0 <-> dstId = 0x1123; + // + // Both device claims membership in a Hive + // and the Hives are different + // srcId = 0x1926 <-> dstId = 0x1123; + // + if ((dst_hive_id != src_hive_id) || (dst_hive_id == 0)) { + return GetPcieBlit(dst_agent, src_agent); + } + + // Accommodates platforms where devices have xGMI + // links but without sdmaXgmiEngines e.g. Vega 20 + if (properties_.NumSdmaXgmiEngines == 0) { + return GetPcieBlit(dst_agent, src_agent); + } + + return GetXgmiBlit(dst_agent); +} + } // namespace diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp index c8cb00cba9..5fd678784c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/hsa_ext_amd.cpp @@ -255,10 +255,12 @@ hsa_status_t hsa_amd_memory_async_copy(void* dst, hsa_agent_t dst_agent_handle, core::Signal* out_signal_obj = core::Signal::Convert(completion_signal); IS_VALID(out_signal_obj); + bool rev_copy_dir = core::Runtime::runtime_singleton_->flag().rev_copy_dir(); if (size > 0) { return core::Runtime::runtime_singleton_->CopyMemory( - dst, *dst_agent, src, *src_agent, size, dep_signal_list, - *out_signal_obj); + dst, (rev_copy_dir ? *src_agent : *dst_agent), + src, (rev_copy_dir ? *dst_agent : *src_agent), + size, dep_signal_list, *out_signal_obj); } return HSA_STATUS_SUCCESS; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp index f7ff1e6f24..488c440830 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/runtime.cpp @@ -464,8 +464,6 @@ hsa_status_t Runtime::CopyMemory(void* dst, core::Agent& dst_agent, (src_agent.device_type() == core::Agent::DeviceType::kAmdGpuDevice); if (dst_gpu || src_gpu) { core::Agent* copy_agent = (src_gpu) ? &src_agent : &dst_agent; - if (flag_.rev_copy_dir() && dst_gpu && src_gpu) - copy_agent = (copy_agent == &src_agent) ? &dst_agent : &src_agent; return copy_agent->DmaCopy(dst, dst_agent, src, src_agent, size, dep_signals, completion_signal); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/util/lazy_ptr.h b/projects/rocr-runtime/runtime/hsa-runtime/core/util/lazy_ptr.h index 3e00b74db3..2b74b12748 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/util/lazy_ptr.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/util/lazy_ptr.h @@ -58,6 +58,19 @@ template class lazy_ptr { explicit lazy_ptr(std::function Constructor) { Init(Constructor); } + lazy_ptr(lazy_ptr&& rhs) { + obj = std::move(rhs.obj); + func = std::move(rhs.func); + } + + lazy_ptr& operator=(lazy_ptr&& rhs) { + obj = std::move(rhs.obj); + func = std::move(rhs.func); + } + + lazy_ptr(lazy_ptr&) = delete; + lazy_ptr& operator=(lazy_ptr&) = delete; + void reset(std::function Constructor = nullptr) { obj.reset(); func = Constructor; @@ -122,7 +135,6 @@ template class lazy_ptr { } } - DISALLOW_COPY_AND_ASSIGN(lazy_ptr); }; #endif // HSA_RUNTIME_CORE_UTIL_LAZY_PTR_H_