diff --git a/projects/rocr-runtime/rocrtst/suites/functional/agent_props.cc b/projects/rocr-runtime/rocrtst/suites/functional/agent_props.cc index 5bd4ab85b7..3342d048f9 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/agent_props.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/agent_props.cc @@ -144,6 +144,9 @@ void AgentPropTest::QueryAgentProp(hsa_agent_t agent, case HSA_DEVICE_TYPE_DSP: ss << "DSP) : "; break; + case HSA_DEVICE_TYPE_AIE: + ss << "AIE) : "; + break; } // Print the agent property diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc index 063f2b0198..1605846b05 100755 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_alignment.cc @@ -183,6 +183,9 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; } std::cout << std::endl; return; diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_allocation.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_allocation.cc index a2a69f58d0..a5d835805b 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_allocation.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_allocation.cc @@ -397,6 +397,9 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; } std::cout << std::endl; return; @@ -530,13 +533,13 @@ void MemoryAllocationTest::MemoryAllocateContiguousTest(hsa_agent_t agent, accessible_gpus.push_back(gpuIter); } - void* importedPtr; + void* importedPtr = nullptr; size_t importedSz; ASSERT_SUCCESS(hsa_amd_interop_map_buffer(accessible_gpus.size(), accessible_gpus.data(), dmabuf, 0, &importedSz, &importedPtr, 0, NULL)); - ASSERT_NE((uint64_t)importedPtr, 0); + ASSERT_NE(importedPtr, nullptr); ASSERT_EQ(importedSz, alloc_size); close(dmabuf); diff --git a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc index 198ae7bb9f..7f6f784919 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/memory_basic.cc @@ -189,6 +189,10 @@ void MemoryTest::MaxSingleAllocationTest(hsa_agent_t ag, case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; + } std::cout << std::endl; } @@ -321,6 +325,9 @@ void MemoryTest::MemAvailableTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool) { case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; } std::cout << std::endl; } diff --git a/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc b/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc index 3ef1a359ad..71aed9e8e1 100644 --- a/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc +++ b/projects/rocr-runtime/rocrtst/suites/functional/virtual_memory.cc @@ -736,7 +736,9 @@ void VirtMemoryTestBasic::GPUAccessToCPUMemoryTest(hsa_agent_t cpuAgent, hsa_age ASSERT_SUCCESS(hsa_amd_vmem_unmap(dev_data, sizeof(*dev_data))); ASSERT_SUCCESS(hsa_amd_vmem_handle_release(mem_handle)); - if (dev_data) ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data))); + if (dev_data) { + ASSERT_SUCCESS(hsa_amd_vmem_address_free(dev_data, sizeof(*dev_data))); + } if (host_data) hsa_memory_free(host_data); if (kernArgsVirt) { @@ -1070,7 +1072,7 @@ void VirtMemoryTestBasic::NonContiguousChunks(hsa_agent_t cpuAgent, hsa_agent_t size_t& granule_size = pool_i.alloc_granule; size_t alloc_size = granule_size * 512; - const uint64_t NUM_BUFFERS = 6; + const unsigned NUM_BUFFERS = 6; void* addr; void* addr_chunks[NUM_BUFFERS]; diff --git a/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc b/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc index 7fe61908f8..8d6b9607d2 100644 --- a/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc +++ b/projects/rocr-runtime/rocrtst/suites/negative/memory_allocate_negative_tests.cc @@ -170,6 +170,9 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; } std::cout << std::endl; return; diff --git a/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc index b0d940cdb4..2031f39634 100755 --- a/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc +++ b/projects/rocr-runtime/rocrtst/suites/stress/memory_concurrent_tests.cc @@ -242,6 +242,9 @@ static void PrintAgentNameAndType(hsa_agent_t agent) { case HSA_DEVICE_TYPE_DSP: std::cout << "DSP)"; break; + case HSA_DEVICE_TYPE_AIE: + std::cout << "AIE)"; + break; } std::cout << std::endl; return; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_agent.h index 0925a206b7..6bab6c68ab 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_agent.h @@ -95,13 +95,13 @@ public: // AIE agent methods. /// @brief Get the number of columns on this AIE agent. - int GetNumCols() const { return num_cols_; } - void SetNumCols(int num_cols) { num_cols_ = num_cols; } + uint32_t GetNumCols() const { return num_cols_; } + void SetNumCols(uint32_t num_cols) { num_cols_ = num_cols; } /// @brief Get the number of core tile rows on this AIE agent. - int GetNumCoreRows() const { return num_core_rows_; } - void SetNumCoreRows(int num_core_rows) { num_core_rows_ = num_core_rows; } + uint32_t GetNumCoreRows() const { return num_core_rows_; } + void SetNumCoreRows(uint32_t num_core_rows) { num_core_rows_ = num_core_rows; } /// @brief Get the number of core tiles on this AIE agent. - int GetNumCores() const { return num_cols_ * num_core_rows_; } + uint32_t GetNumCores() const { return num_cols_ * num_core_rows_; } private: /// @brief Query the driver to get the region list owned by this agent. @@ -123,10 +123,10 @@ private: const uint32_t max_queues_ = 1; /// @brief Number of columns in the AIE array. - int num_cols_ = 0; + uint32_t num_cols_ = 0; /// @brief Number of rows of core tiles in the AIE array. Not all rows in a /// column are cores. Some can be memory or shim tiles. - int num_core_rows_ = 0; + uint32_t num_core_rows_ = 0; }; } // namespace AMD diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h index 2c449c18d2..79f328ccb8 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h @@ -116,7 +116,7 @@ public: hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE, hsa_signal_t *signal = NULL) override; - uint32_t queue_id_ = INVALID_QUEUEID; + HSA_QUEUEID queue_id_ = INVALID_QUEUEID; /// @brief ID of AIE device on which this queue has been mapped. uint32_t node_id_ = std::numeric_limits::max(); /// @brief Queue size in bytes. diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aql_queue.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aql_queue.h index c446c0d872..9486eaad20 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aql_queue.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_aql_queue.h @@ -346,8 +346,6 @@ class AqlQueue : public core::Queue, private core::LocalSignal, public core::Doo static KernelMutex queue_lock_; return queue_lock_; } - // Async scratch single limit - may be modified after init - size_t async_scratch_single_limit_; static __forceinline int& rtti_id() { static int rtti_id_ = 0; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h index a145980e43..170d594c5e 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_gpu_agent.h @@ -507,14 +507,14 @@ class GpuAgent : public GpuAgentInt { hsa_status_t EnableDmaProfiling(bool enable) override; hsa_status_t PcSamplingIterateConfig(hsa_ven_amd_pcs_iterate_configuration_callback_t cb, - void* cb_data); - hsa_status_t PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& session); + void* cb_data) override; + hsa_status_t PcSamplingCreate(pcs::PcsRuntime::PcSamplingSession& session) override; hsa_status_t PcSamplingCreateFromId(HsaPcSamplingTraceId pcsId, - pcs::PcsRuntime::PcSamplingSession& session); - hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session); - hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session); - hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session); - hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session); + pcs::PcsRuntime::PcSamplingSession& session) override; + hsa_status_t PcSamplingDestroy(pcs::PcsRuntime::PcSamplingSession& session) override; + hsa_status_t PcSamplingStart(pcs::PcsRuntime::PcSamplingSession& session) override; + hsa_status_t PcSamplingStop(pcs::PcsRuntime::PcSamplingSession& session) override; + hsa_status_t PcSamplingFlush(pcs::PcsRuntime::PcSamplingSession& session) override; hsa_status_t PcSamplingFlushHostTrapDeviceBuffers(pcs::PcsRuntime::PcSamplingSession& session); static void PcSamplingThreadRun(void* agent); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_amd_tool_int.hpp b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_amd_tool_int.hpp index 566c01204f..80b717ab98 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_amd_tool_int.hpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/hsa_amd_tool_int.hpp @@ -4,7 +4,8 @@ #include "inc/hsa_amd_tool.h" #include "runtime.h" -namespace rocr::AMD::tool { +// namespace rocr::AMD::tool { // C++17 +namespace rocr { namespace AMD { namespace tool { using scratch_alloc_flag = hsa_amd_event_scratch_alloc_flag_t; @@ -135,7 +136,9 @@ __forceinline void notify_event_scratch_async_reclaim_end(const hsa_queue_t* que hsa_amd_tool_event_t{.scratch_async_reclaim_end = &event}); } - -} // namespace rocr::AMD::tool +// } // namespace rocr::AMD::tool +} // namespace rocr +} // namespace AMD +} // namespace tool #endif \ No newline at end of file diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index 723054b318..8a31c724a9 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -559,7 +559,7 @@ BlitSdma::SubmitCopyRe const bool isGFX12Plus = (agent_->isa()->GetMajorVersion() >= 12); // Common and GFX12 packet must match in size to use same code for vector/append. - static_assert(sizeof(SDMA_PKT_COPY_LINEAR_RECT) == sizeof(SDMA_PKT_COPY_LINEAR_RECT_GFX12)); + static_assert(sizeof(SDMA_PKT_COPY_LINEAR_RECT) == sizeof(SDMA_PKT_COPY_LINEAR_RECT_GFX12), ""); const uint max_pitch = 1 << (isGFX12Plus ? SDMA_PKT_COPY_LINEAR_RECT_GFX12::pitch_bits : SDMA_PKT_COPY_LINEAR_RECT::pitch_bits);