From 2e60df69e452abb7705fab7707f8cdce3c67c5df Mon Sep 17 00:00:00 2001 From: Besar Wicaksono Date: Fri, 10 Jun 2016 13:22:43 -0500 Subject: [PATCH] Add interrupt signal support to SDMA Change-Id: Ie2b192f3351a0c3bf1eb36ba9704825b18e6059b [ROCm/ROCR-Runtime commit: aee8ab6ef06839a33af4fe6b32f972c3e9066356] --- .../runtime/hsa-runtime/core/inc/agent.h | 6 +- .../hsa-runtime/core/inc/amd_blit_sdma.h | 4 ++ .../hsa-runtime/core/inc/interrupt_signal.h | 8 --- .../core/runtime/amd_blit_sdma.cpp | 55 ++++++++++++++++++- .../core/runtime/amd_gpu_agent.cpp | 7 --- .../core/runtime/interrupt_signal.cpp | 6 +- 6 files changed, 62 insertions(+), 24 deletions(-) diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h index 44f9c7af8c..41867eb3c3 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/agent.h @@ -107,7 +107,9 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { // // @param [in] type CPU or GPU or other. explicit Agent(uint32_t node_id, DeviceType type) - : node_id_(node_id), device_type_(uint32_t(type)) { + : node_id_(node_id), + device_type_(uint32_t(type)), + profiling_enabled_(false) { public_handle_ = Convert(this); } @@ -115,7 +117,7 @@ class Agent : public Checked<0xF6BC25EB17E6F917> { // // @param [in] type CPU or GPU or other. explicit Agent(uint32_t node_id, uint32_t type) - : node_id_(node_id), device_type_(type) { + : node_id_(node_id), device_type_(type), profiling_enabled_(false) { public_handle_ = Convert(this); } diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h index 670bbd4b6a..6212c3dcc2 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/amd_blit_sdma.h @@ -171,6 +171,8 @@ class BlitSdma : public core::Blit { void BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address); + void BuildTrapCommand(char* cmd_addr); + // Agent object owning the SDMA engine. GpuAgent* agent_; @@ -216,6 +218,8 @@ class BlitSdma : public core::Blit { uint32_t timestamp_command_size_; + uint32_t trap_command_size_; + // Max copy size of a single linear copy command packet. size_t max_single_linear_copy_size_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/interrupt_signal.h b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/interrupt_signal.h index bef9564bef..adbbb5070b 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/inc/interrupt_signal.h +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/inc/interrupt_signal.h @@ -165,10 +165,6 @@ class InterruptSignal : public Signal { /// @brief See base class Signal. __forceinline HsaEvent* EopEvent() { return event_; } - // TODO: work around for SDMA async copy. Bypass waiting on EOP - // event because SDMA copy does not handle interrupt yet. - __forceinline void DisableWaitEvent() { wait_on_event_ = false; } - /// @brief prevent throwing exceptions void* operator new(size_t size) { return malloc(size); } @@ -186,10 +182,6 @@ class InterruptSignal : public Signal { /// closes or not. bool free_event_; - // TODO: work around for SDMA async copy. Bypass waiting on EOP - // event because SDMA copy does not handle interrupt yet. - bool wait_on_event_; - /// Used to obtain a globally unique value (address) for rtti. static int rtti_id_; diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp index ece6a91bd3..b0b28b2c7c 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_blit_sdma.cpp @@ -60,6 +60,7 @@ namespace amd { const unsigned int SDMA_OP_COPY = 1; const unsigned int SDMA_OP_FENCE = 5; +const unsigned int SDMA_OP_TRAP = 6; const unsigned int SDMA_OP_POLL_REGMEM = 8; const unsigned int SDMA_OP_ATOMIC = 10; const unsigned int SDMA_OP_CONST_FILL = 11; @@ -341,6 +342,25 @@ typedef struct SDMA_PKT_TIMESTAMP_TAG { } SDMA_PKT_TIMESTAMP; +typedef struct SDMA_PKT_TRAP_TAG { + union { + struct { + unsigned int op : 8; + unsigned int sub_op : 8; + unsigned int reserved_0 : 16; + }; + unsigned int DW_0_DATA; + } HEADER_UNION; + + union { + struct { + unsigned int int_ctx : 28; + unsigned int reserved_1 : 4; + }; + unsigned int DW_1_DATA; + } INT_CONTEXT_UNION; +} SDMA_PKT_TRAP; + inline uint32_t ptrlow32(const void* p) { return static_cast(reinterpret_cast(p)); } @@ -390,6 +410,7 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) { poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM); atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC); timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP); + trap_command_size_ = sizeof(SDMA_PKT_TRAP); const uint32_t sync_command_size = fence_command_size_; const uint32_t max_num_copy_command = @@ -607,9 +628,16 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand( ? 2 * fence_command_size_ : fence_command_size_; + // If the signal is an interrupt signal, we also need to make SDMA engine to + // send interrupt packet to IH. + const size_t interrupt_command_size = + (out_signal.signal_.event_mailbox_ptr != 0) + ? (fence_command_size_ + trap_command_size_) + : 0; + const uint32_t total_command_size = total_poll_command_size + total_copy_command_size + sync_command_size + - total_timestamp_command_size; + total_timestamp_command_size + interrupt_command_size; char* command_addr = AcquireWriteAddress(total_command_size); char* const command_addr_temp = command_addr; @@ -652,9 +680,11 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand( command_addr += linear_copy_command_size_; } - // After transfer is completed, decrement the signal. + // After transfer is completed, decrement the signal value. if (platform_atomic_support_) { BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation()); + command_addr += atomic_command_size_; + } else { uint32_t* signal_value_location = reinterpret_cast(out_signal.ValueLocation()); @@ -666,6 +696,18 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand( BuildFenceCommand(command_addr, signal_value_location, static_cast(completion_signal_value)); + + command_addr += fence_command_size_; + } + + // Update mailbox event and send interrupt to IH. + if (out_signal.signal_.event_mailbox_ptr != 0) { + BuildFenceCommand(command_addr, reinterpret_cast( + out_signal.signal_.event_mailbox_ptr), + static_cast(out_signal.signal_.event_id)); + command_addr += fence_command_size_; + + BuildTrapCommand(command_addr); } ReleaseWriteAddress(command_addr_temp, total_command_size); @@ -982,4 +1024,13 @@ void BlitSdma::BuildGetGlobalTimestampCommand(char* cmd_addr, packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(write_address); packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(write_address); } + +void BlitSdma::BuildTrapCommand(char* cmd_addr) { + SDMA_PKT_TRAP* packet_addr = + reinterpret_cast(cmd_addr); + + memset(packet_addr, 0, sizeof(SDMA_PKT_TRAP)); + + packet_addr->HEADER_UNION.op = SDMA_OP_TRAP; +} } // namespace amd diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp index d278349744..14d789ec1a 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/amd_gpu_agent.cpp @@ -592,13 +592,6 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent, return HSA_STATUS_ERROR_OUT_OF_RESOURCES; } - // TODO: temporarily disable wait on thunk event if the out_signal - // is an interrupt signal object. Remove this when SDMA handle interrupt - // packet properly. - if (out_signal.EopEvent() != NULL) { - static_cast(out_signal).DisableWaitEvent(); - } - hsa_status_t stat = blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal); diff --git a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp index 67c95867dd..eb07bcc533 100644 --- a/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp +++ b/projects/rocr-runtime/runtime/hsa-runtime/core/runtime/interrupt_signal.cpp @@ -87,8 +87,6 @@ InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value, signal_.event_mailbox_ptr = 0; } signal_.kind = AMD_SIGNAL_KIND_USER; - - wait_on_event_ = true; } InterruptSignal::~InterruptSignal() { @@ -110,13 +108,11 @@ hsa_signal_value_t InterruptSignal::LoadAcquire() { } void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) { - wait_on_event_ = true; atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed); SetEvent(); } void InterruptSignal::StoreRelease(hsa_signal_value_t value) { - wait_on_event_ = true; atomic::Store(&signal_.value, int64_t(value), std::memory_order_release); SetEvent(); } @@ -181,7 +177,7 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed( value = atomic::Load(&signal_.value, std::memory_order_relaxed); return hsa_signal_value_t(value); } - if (wait_on_event_ && wait_hint != HSA_WAIT_STATE_ACTIVE) { + if (wait_hint != HSA_WAIT_STATE_ACTIVE) { uint32_t wait_ms; auto time_remaining = fast_timeout - (time - start_time); if ((timeout == -1) ||