Add interrupt signal support to SDMA

Change-Id: Ie2b192f3351a0c3bf1eb36ba9704825b18e6059b


[ROCm/ROCR-Runtime commit: aee8ab6ef0]
Este commit está contenido en:
Besar Wicaksono
2016-06-10 13:22:43 -05:00
cometido por Gerrit Code Review
padre 84c47979ab
commit 2e60df69e4
Se han modificado 6 ficheros con 62 adiciones y 24 borrados
@@ -107,7 +107,9 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
//
// @param [in] type CPU or GPU or other.
explicit Agent(uint32_t node_id, DeviceType type)
: node_id_(node_id), device_type_(uint32_t(type)) {
: node_id_(node_id),
device_type_(uint32_t(type)),
profiling_enabled_(false) {
public_handle_ = Convert(this);
}
@@ -115,7 +117,7 @@ class Agent : public Checked<0xF6BC25EB17E6F917> {
//
// @param [in] type CPU or GPU or other.
explicit Agent(uint32_t node_id, uint32_t type)
: node_id_(node_id), device_type_(type) {
: node_id_(node_id), device_type_(type), profiling_enabled_(false) {
public_handle_ = Convert(this);
}
@@ -171,6 +171,8 @@ class BlitSdma : public core::Blit {
void BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address);
void BuildTrapCommand(char* cmd_addr);
// Agent object owning the SDMA engine.
GpuAgent* agent_;
@@ -216,6 +218,8 @@ class BlitSdma : public core::Blit {
uint32_t timestamp_command_size_;
uint32_t trap_command_size_;
// Max copy size of a single linear copy command packet.
size_t max_single_linear_copy_size_;
@@ -165,10 +165,6 @@ class InterruptSignal : public Signal {
/// @brief See base class Signal.
__forceinline HsaEvent* EopEvent() { return event_; }
// TODO: work around for SDMA async copy. Bypass waiting on EOP
// event because SDMA copy does not handle interrupt yet.
__forceinline void DisableWaitEvent() { wait_on_event_ = false; }
/// @brief prevent throwing exceptions
void* operator new(size_t size) { return malloc(size); }
@@ -186,10 +182,6 @@ class InterruptSignal : public Signal {
/// closes or not.
bool free_event_;
// TODO: work around for SDMA async copy. Bypass waiting on EOP
// event because SDMA copy does not handle interrupt yet.
bool wait_on_event_;
/// Used to obtain a globally unique value (address) for rtti.
static int rtti_id_;
@@ -60,6 +60,7 @@ namespace amd {
const unsigned int SDMA_OP_COPY = 1;
const unsigned int SDMA_OP_FENCE = 5;
const unsigned int SDMA_OP_TRAP = 6;
const unsigned int SDMA_OP_POLL_REGMEM = 8;
const unsigned int SDMA_OP_ATOMIC = 10;
const unsigned int SDMA_OP_CONST_FILL = 11;
@@ -341,6 +342,25 @@ typedef struct SDMA_PKT_TIMESTAMP_TAG {
} SDMA_PKT_TIMESTAMP;
typedef struct SDMA_PKT_TRAP_TAG {
union {
struct {
unsigned int op : 8;
unsigned int sub_op : 8;
unsigned int reserved_0 : 16;
};
unsigned int DW_0_DATA;
} HEADER_UNION;
union {
struct {
unsigned int int_ctx : 28;
unsigned int reserved_1 : 4;
};
unsigned int DW_1_DATA;
} INT_CONTEXT_UNION;
} SDMA_PKT_TRAP;
inline uint32_t ptrlow32(const void* p) {
return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(p));
}
@@ -390,6 +410,7 @@ hsa_status_t BlitSdma::Initialize(const core::Agent& agent) {
poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);
timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP);
trap_command_size_ = sizeof(SDMA_PKT_TRAP);
const uint32_t sync_command_size = fence_command_size_;
const uint32_t max_num_copy_command =
@@ -607,9 +628,16 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
? 2 * fence_command_size_
: fence_command_size_;
// If the signal is an interrupt signal, we also need to make SDMA engine to
// send interrupt packet to IH.
const size_t interrupt_command_size =
(out_signal.signal_.event_mailbox_ptr != 0)
? (fence_command_size_ + trap_command_size_)
: 0;
const uint32_t total_command_size =
total_poll_command_size + total_copy_command_size + sync_command_size +
total_timestamp_command_size;
total_timestamp_command_size + interrupt_command_size;
char* command_addr = AcquireWriteAddress(total_command_size);
char* const command_addr_temp = command_addr;
@@ -652,9 +680,11 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
command_addr += linear_copy_command_size_;
}
// After transfer is completed, decrement the signal.
// After transfer is completed, decrement the signal value.
if (platform_atomic_support_) {
BuildAtomicDecrementCommand(command_addr, out_signal.ValueLocation());
command_addr += atomic_command_size_;
} else {
uint32_t* signal_value_location =
reinterpret_cast<uint32_t*>(out_signal.ValueLocation());
@@ -666,6 +696,18 @@ hsa_status_t BlitSdma::SubmitLinearCopyCommand(
BuildFenceCommand(command_addr, signal_value_location,
static_cast<uint32_t>(completion_signal_value));
command_addr += fence_command_size_;
}
// Update mailbox event and send interrupt to IH.
if (out_signal.signal_.event_mailbox_ptr != 0) {
BuildFenceCommand(command_addr, reinterpret_cast<uint32_t*>(
out_signal.signal_.event_mailbox_ptr),
static_cast<uint32_t>(out_signal.signal_.event_id));
command_addr += fence_command_size_;
BuildTrapCommand(command_addr);
}
ReleaseWriteAddress(command_addr_temp, total_command_size);
@@ -982,4 +1024,13 @@ void BlitSdma::BuildGetGlobalTimestampCommand(char* cmd_addr,
packet_addr->ADDR_LO_UNION.addr_31_0 = ptrlow32(write_address);
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(write_address);
}
void BlitSdma::BuildTrapCommand(char* cmd_addr) {
SDMA_PKT_TRAP* packet_addr =
reinterpret_cast<SDMA_PKT_TRAP*>(cmd_addr);
memset(packet_addr, 0, sizeof(SDMA_PKT_TRAP));
packet_addr->HEADER_UNION.op = SDMA_OP_TRAP;
}
} // namespace amd
@@ -592,13 +592,6 @@ hsa_status_t GpuAgent::DmaCopy(void* dst, core::Agent& dst_agent,
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
}
// TODO: temporarily disable wait on thunk event if the out_signal
// is an interrupt signal object. Remove this when SDMA handle interrupt
// packet properly.
if (out_signal.EopEvent() != NULL) {
static_cast<core::InterruptSignal&>(out_signal).DisableWaitEvent();
}
hsa_status_t stat =
blit->SubmitLinearCopyCommand(dst, src, size, dep_signals, out_signal);
@@ -87,8 +87,6 @@ InterruptSignal::InterruptSignal(hsa_signal_value_t initial_value,
signal_.event_mailbox_ptr = 0;
}
signal_.kind = AMD_SIGNAL_KIND_USER;
wait_on_event_ = true;
}
InterruptSignal::~InterruptSignal() {
@@ -110,13 +108,11 @@ hsa_signal_value_t InterruptSignal::LoadAcquire() {
}
void InterruptSignal::StoreRelaxed(hsa_signal_value_t value) {
wait_on_event_ = true;
atomic::Store(&signal_.value, int64_t(value), std::memory_order_relaxed);
SetEvent();
}
void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
wait_on_event_ = true;
atomic::Store(&signal_.value, int64_t(value), std::memory_order_release);
SetEvent();
}
@@ -181,7 +177,7 @@ hsa_signal_value_t InterruptSignal::WaitRelaxed(
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
return hsa_signal_value_t(value);
}
if (wait_on_event_ && wait_hint != HSA_WAIT_STATE_ACTIVE) {
if (wait_hint != HSA_WAIT_STATE_ACTIVE) {
uint32_t wait_ms;
auto time_remaining = fast_timeout - (time - start_time);
if ((timeout == -1) ||