Add interrupt signal support to SDMA
Change-Id: Ie2b192f3351a0c3bf1eb36ba9704825b18e6059b
[ROCm/ROCR-Runtime commit: aee8ab6ef0]
Este commit está contenido en:
cometido por
Gerrit Code Review
padre
84c47979ab
commit
2e60df69e4
@@ -107,7 +107,9 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, DeviceType type)
|
||||
: node_id_(node_id), device_type_(uint32_t(type)) {
|
||||
: node_id_(node_id),
|
||||
device_type_(uint32_t(type)),
|
||||
profiling_enabled_(false) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
@@ -115,7 +117,7 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
|
||||
//
|
||||
// @param [in] type CPU or GPU or other.
|
||||
explicit Agent(uint32_t node_id, uint32_t type)
|
||||
: node_id_(node_id), device_type_(type) {
|
||||
: node_id_(node_id), device_type_(type), profiling_enabled_(false) {
|
||||
public_handle_ = Convert(this);
|
||||
}
|
||||
|
||||
|
||||
@@ -171,6 +171,8 @@ class BlitSdma : public core::Blit {
|
||||
|
||||
void BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address);
|
||||
|
||||
void BuildTrapCommand(char* cmd_addr);
|
||||
|
||||
// Agent object owning the SDMA engine.
|
||||
GpuAgent* agent_;
|
||||
|
||||
@@ -216,6 +218,8 @@ class BlitSdma : public core::Blit {
|
||||
|
||||
uint32_t timestamp_command_size_;
|
||||
|
||||
uint32_t trap_command_size_;
|
||||
|
||||
// Max copy size of a single linear copy command packet.
|
||||
size_t max_single_linear_copy_size_;
|
||||
|
||||
|
||||
@@ -165,10 +165,6 @@ class InterruptSignal : public Signal {
|
||||
/// @brief See base class Signal.
|
||||
__forceinline HsaEvent* EopEvent() { return event_; }
|
||||
|
||||
// TODO: work around for SDMA async copy. Bypass waiting on EOP
|
||||
// event because SDMA copy does not handle interrupt yet.
|
||||
__forceinline void DisableWaitEvent() { wait_on_event_ = false; }
|
||||
|
||||
/// @brief prevent throwing exceptions
|
||||
void* operator new(size_t size) { return malloc(size); }
|
||||
|
||||
@@ -186,10 +182,6 @@ class InterruptSignal : public Signal {
|
||||
/// closes or not.
|
||||
bool free_event_;
|
||||
|
||||
// TODO: work around for SDMA async copy. Bypass waiting on EOP
|
||||
// event because SDMA copy does not handle interrupt yet.
|
||||
bool wait_on_event_;
|
||||
|
||||
/// Used to obtain a globally unique value (address) for rtti.
|
||||
static int rtti_id_;
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ namespace amd {
|
||||
|
||||
const unsigned int SDMA_OP_COPY = 1;
|
||||
const unsigned int SDMA_OP_FENCE = 5;
|
||||
const unsigned int SDMA_OP_TRAP = 6;
|
||||
const unsigned int SDMA_OP_POLL_REGMEM = 8;
|
||||
const unsigned int SDMA_OP_ATOMIC = 10;
|
||||
const unsigned int SDMA_OP_CONST_FILL = 11;
|
||||
@@ -341,6 +342,25 @@ typedef struct SDMA_PKT_TIMESTAMP_TAG {
|
||||
|
||||
} SDMA_PKT_TIMESTAMP;
|
||||
|
||||
typedef struct SDMA_PKT_TRAP_TAG {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 16;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int int_ctx : 28;
|
||||
unsigned int reserved_1 : 4;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} INT_CONTEXT_UNION;
|
||||
} SDMA_PKT_TRAP;
|
||||
|
||||
inline uint32_t ptrlow32(const void* p) {
|
||||
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
|
||||
}
|
||||
@@ -390,6 +410,7 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) {
|
||||
poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);
|
||||
timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP);
|
||||
trap_command_size_ = sizeof(SDMA_PKT_TRAP);
|
||||
|
||||
const uint32_t sync_command_size = fence_command_size_;
|
||||
const uint32_t max_num_copy_command =
|
||||
@@ -607,9 +628,16 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
|
||||
? 2 * fence_command_size_
|
||||
: fence_command_size_;
|
||||
|
||||
// If the signal is an interrupt signal, we also need to make SDMA engine to
|
||||
// send interrupt packet to IH.
|
||||
const size_t interrupt_command_size =
|
||||
(out_signal.signal_.event_mailbox_ptr != 0)
|
||||
? (fence_command_size_ + trap_command_size_)
|
||||
: 0;
|
||||
|
||||
const uint32_t total_command_size =
|
||||
total_poll_command_size + total_copy_command_size + sync_command_size +
|
||||
total_timestamp_command_size;
|
||||
total_timestamp_command_size + interrupt_command_size;
|
||||
|
||||
char* command_addr = AcquireWriteAddress(total_command_size);
|
||||
char* const command_addr_temp = command_addr;
|
||||
@@ -652,9 +680,11 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
|
||||
command_addr += linear_copy_command_size_;
|
||||
}
|
||||
|
||||
// After transfer is completed, decrement the signal.
|
||||
// After transfer is completed, decrement the signal value.
|
||||
if (platform_atomic_support_) {
|
||||
BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation());
|
||||
command_addr += atomic_command_size_;
|
||||
|
||||
} else {
|
||||
uint32_t* signal_value_location =
|
||||
reinterpret_cast<uint32_t*>(out_signal.ValueLocation());
|
||||
@@ -666,6 +696,18 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
|
||||
|
||||
BuildFenceCommand(command_addr, signal_value_location,
|
||||
static_cast<uint32_t>(completion_signal_value));
|
||||
|
||||
command_addr += fence_command_size_;
|
||||
}
|
||||
|
||||
// Update mailbox event and send interrupt to IH.
|
||||
if (out_signal.signal_.event_mailbox_ptr != 0) {
|
||||
BuildFenceCommand(command_addr, reinterpret_cast<uint32_t*>(
|
||||
out_signal.signal_.event_mailbox_ptr),
|
||||
static_cast<uint32_t>(out_signal.signal_.event_id));
|
||||
command_addr += fence_command_size_;
|
||||
|
||||
BuildTrapCommand(command_addr);
|
||||
}
|
||||
|
||||
ReleaseWriteAddress(command_addr_temp, total_command_size);
|
||||
@@ -982,4 +1024,13 @@ void BlitSdma::BuildGetGlobalTimestampCommand(char* cmd_addr,
|
||||
packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(write_address);
|
||||
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(write_address);
|
||||
}
|
||||
|
||||
void BlitSdma::BuildTrapCommand(char* cmd_addr) {
|
||||
SDMA_PKT_TRAP* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_TRAP*>(cmd_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_TRAP));
|
||||
|
||||
packet_addr->HEADER_UNION.op = SDMA_OP_TRAP;
|
||||
}
|
||||
} // namespace amd
|
||||
|
||||
@@ -592,13 +592,6 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
// TODO: temporarily disable wait on thunk event if the out_signal
|
||||
// is an interrupt signal object. Remove this when SDMA handle interrupt
|
||||
// packet properly.
|
||||
if (out_signal.EopEvent() != NULL) {
|
||||
static_cast<core::InterruptSignal&>(out_signal).DisableWaitEvent();
|
||||
}
|
||||
|
||||
hsa_status_t stat =
|
||||
blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal);
|
||||
|
||||
|
||||
@@ -87,8 +87,6 @@ InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value,
|
||||
signal_.event_mailbox_ptr = 0;
|
||||
}
|
||||
signal_.kind = AMD_SIGNAL_KIND_USER;
|
||||
|
||||
wait_on_event_ = true;
|
||||
}
|
||||
|
||||
InterruptSignal::~InterruptSignal() {
|
||||
@@ -110,13 +108,11 @@ hsa_signal_value_t InterruptSignal::LoadAcquire() {
|
||||
}
|
||||
|
||||
void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) {
|
||||
wait_on_event_ = true;
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
|
||||
SetEvent();
|
||||
}
|
||||
|
||||
void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
|
||||
wait_on_event_ = true;
|
||||
atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
|
||||
SetEvent();
|
||||
}
|
||||
@@ -181,7 +177,7 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
|
||||
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
|
||||
return hsa_signal_value_t(value);
|
||||
}
|
||||
if (wait_on_event_ && wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
|
||||
uint32_t wait_ms;
|
||||
auto time_remaining = fast_timeout - (time - start_time);
|
||||
if ((timeout == -1) ||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user