wsl/hsakmt: add sdma queue implementation
Signed-off-by: Flora Cui <flora.cui@amd.com> Reviewed-by: Shane Xiao <shane.xiao@amd.com> Part-of: <http://10.67.69.192/wsl/libhsakmt/-/merge_requests/17>
Este commit está contenido en:
+85
-11
@@ -44,6 +44,9 @@
|
||||
|
||||
#include <cinttypes>
|
||||
#include <condition_variable>
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
#include <utility>
|
||||
#include "inc/wddm/types.h"
|
||||
#include "inc/wddm/device.h"
|
||||
#include "inc/wddm/gpu_memory.h"
|
||||
@@ -61,6 +64,7 @@ class WDDMDevice;
|
||||
class WDDMQueue {
|
||||
public:
|
||||
WDDMQueue(WDDMDevice *device,
|
||||
uint64_t cmdbuf_addr,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true) :
|
||||
@@ -70,7 +74,7 @@ public:
|
||||
syncobj(NULL),
|
||||
sync_addr(NULL),
|
||||
cmdbuf(0),
|
||||
cmdbuf_addr(0),
|
||||
cmdbuf_addr(cmdbuf_addr),
|
||||
cmdbuf_size(cmdbuf_size),
|
||||
queue_engine(engine),
|
||||
use_hws(use_hws),
|
||||
@@ -80,8 +84,9 @@ public:
|
||||
|
||||
virtual ~WDDMQueue() { }
|
||||
|
||||
virtual hsa_status_t Init(void) = 0;
|
||||
virtual hsa_status_t Fini(void) = 0;
|
||||
virtual hsa_status_t Init(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual hsa_status_t Fini(void) { return HSA_STATUS_SUCCESS; }
|
||||
virtual void RingDoorbell() { }
|
||||
|
||||
hsa_status_t SwsInit(void);
|
||||
hsa_status_t SwsFini(void);
|
||||
@@ -250,17 +255,12 @@ private:
|
||||
class SDMAQueue : public WDDMQueue {
|
||||
public:
|
||||
SDMAQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws = true) :
|
||||
WDDMQueue(device, cmdbuf_size, engine, use_hws),
|
||||
rptr_next(0),
|
||||
ib_size(0),
|
||||
ib_start_addr(0) {
|
||||
bool use_hws = true);
|
||||
|
||||
}
|
||||
|
||||
virtual ~SDMAQueue() { }
|
||||
virtual ~SDMAQueue();
|
||||
|
||||
hsa_status_t Init(void);
|
||||
hsa_status_t Fini(void);
|
||||
@@ -272,10 +272,84 @@ public:
|
||||
device->CpuWait(&syncobj, &rptr_next, 1, false);
|
||||
}
|
||||
|
||||
uint64_t * GetRingWptr(void) { return &wptr_next_; }
|
||||
uint64_t * GetRingRptr(void) { return WDDMQueue::GetSyncAddr(); }
|
||||
uint64_t * GetDoorbellPtr() { return &doorbell_; }
|
||||
void RingDoorbell();
|
||||
|
||||
private:
|
||||
uint64_t wptr_next_;
|
||||
uint64_t wptr_pre_;
|
||||
uint64_t rptr_next;
|
||||
uint64_t doorbell_;
|
||||
std::queue<std::pair<uint64_t, uint64_t>> wptr_queue_;
|
||||
uint64_t ib_size;
|
||||
uint64_t ib_start_addr;
|
||||
|
||||
std::thread thread_;
|
||||
bool thread_stop_;
|
||||
std::mutex thread_cond_lock_;
|
||||
std::condition_variable thread_cond_;
|
||||
static void SdmaThread(SDMAQueue *queue);
|
||||
|
||||
struct SDMA_PKT_POLL_REGMEM {
|
||||
union {
|
||||
struct {
|
||||
unsigned int op : 8;
|
||||
unsigned int sub_op : 8;
|
||||
unsigned int reserved_0 : 10;
|
||||
unsigned int hdp_flush : 1;
|
||||
unsigned int reserved_1 : 1;
|
||||
unsigned int func : 3;
|
||||
unsigned int mem_poll : 1;
|
||||
};
|
||||
unsigned int DW_0_DATA;
|
||||
} HEADER_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_31_0 : 32;
|
||||
};
|
||||
unsigned int DW_1_DATA;
|
||||
} ADDR_LO_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int addr_63_32 : 32;
|
||||
};
|
||||
unsigned int DW_2_DATA;
|
||||
} ADDR_HI_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int value : 32;
|
||||
};
|
||||
unsigned int DW_3_DATA;
|
||||
} VALUE_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int mask : 32;
|
||||
};
|
||||
unsigned int DW_4_DATA;
|
||||
} MASK_UNION;
|
||||
|
||||
union {
|
||||
struct {
|
||||
unsigned int interval : 16;
|
||||
unsigned int retry_count : 12;
|
||||
unsigned int reserved_0 : 4;
|
||||
};
|
||||
unsigned int DW_5_DATA;
|
||||
} DW5_UNION;
|
||||
};
|
||||
const unsigned int SDMA_OP_POLL_REGMEM = 8;
|
||||
bool IsPollPacket(SDMA_PKT_POLL_REGMEM* pkt) {
|
||||
return pkt->HEADER_UNION.op == SDMA_OP_POLL_REGMEM &&
|
||||
pkt->HEADER_UNION.mem_poll == 1 &&
|
||||
pkt->HEADER_UNION.func == 3;
|
||||
}
|
||||
uint32_t WrapIntoRocrRing(uint64_t idx) { return (idx & (cmdbuf_size - 1)); }
|
||||
};
|
||||
|
||||
} // namespace thunk
|
||||
|
||||
+17
-5
@@ -63,15 +63,27 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtCreateQueue(
|
||||
bool use_hws = device_->IsHwsEnabled(queue_engine);
|
||||
auto queue_ = new wsl::thunk::ComputeQueue(
|
||||
device_, QueueAddress, pkg_num,
|
||||
reinterpret_cast<std::atomic<uint64_t> *>(QueueResource->Queue_write_ptr_aql),
|
||||
reinterpret_cast<std::atomic<uint64_t> *>(QueueResource->Queue_read_ptr_aql),
|
||||
reinterpret_cast<std::atomic<uint64_t> *>(
|
||||
QueueResource->Queue_write_ptr_aql),
|
||||
reinterpret_cast<std::atomic<uint64_t> *>(
|
||||
QueueResource->Queue_read_ptr_aql),
|
||||
QueueResource->ErrorReason, cmdbuf_size, queue_engine, use_hws);
|
||||
|
||||
QueueResource->QueueId = reinterpret_cast<HSA_QUEUEID>(queue_);
|
||||
// for doorbell_signal.hardware_doorbell_ptr
|
||||
QueueResource->Queue_DoorBell_aql = queue_->GetDoorbellPtr();
|
||||
} break;
|
||||
case HSA_QUEUE_SDMA:
|
||||
case HSA_QUEUE_SDMA: {
|
||||
uint32_t queue_engine = device_->GetSdmaEngine(0); // TODO:
|
||||
bool use_hws = device_->IsHwsEnabled(queue_engine);
|
||||
auto queue_ = new wsl::thunk::SDMAQueue(
|
||||
device_, QueueAddress, QueueSizeInBytes,
|
||||
queue_engine, use_hws);
|
||||
QueueResource->QueueId = reinterpret_cast<HSA_QUEUEID>(queue_);
|
||||
QueueResource->Queue_DoorBell_aql = queue_->GetDoorbellPtr();
|
||||
QueueResource->Queue_write_ptr_aql = queue_->GetRingWptr();
|
||||
QueueResource->Queue_read_ptr_aql = queue_->GetRingRptr();
|
||||
} break;
|
||||
default:
|
||||
assert(false);
|
||||
QueueResource->QueueId = 0;
|
||||
@@ -101,7 +113,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtUpdateQueue(
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtDestroyQueue(HSA_QUEUEID QueueId) {
|
||||
CHECK_DXG_OPEN();
|
||||
|
||||
auto queue_ = reinterpret_cast<wsl::thunk::ComputeQueue *>(QueueId);
|
||||
auto queue_ = reinterpret_cast<wsl::thunk::WDDMQueue *>(QueueId);
|
||||
|
||||
if (!queue_)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
@@ -165,7 +177,7 @@ HSAKMT_STATUS HSAKMTAPI hsaKmtAllocQueueGWS(HSA_QUEUEID QueueId, HSAuint32 nGWS,
|
||||
HSAKMT_STATUS HSAKMTAPI hsaKmtQueueRingDoorbell(HSA_QUEUEID QueueId) {
|
||||
CHECK_DXG_OPEN();
|
||||
|
||||
auto queue_ = reinterpret_cast<wsl::thunk::ComputeQueue *>(QueueId);
|
||||
auto queue_ = reinterpret_cast<wsl::thunk::WDDMQueue *>(QueueId);
|
||||
if (!queue_)
|
||||
return HSAKMT_STATUS_INVALID_PARAMETER;
|
||||
|
||||
|
||||
+11
-9
@@ -739,17 +739,19 @@ bool WDDMDevice::CreateQueue(WDDMQueue *queue) {
|
||||
if (!CreateContext(queue->queue_engine, &queue->context))
|
||||
return false;
|
||||
|
||||
GpuMemoryCreateInfo create_info{};
|
||||
create_info.size = queue->cmdbuf_size;
|
||||
create_info.domain = thunk_proxy::kSystem;
|
||||
|
||||
GpuMemory *gpu_mem = nullptr;
|
||||
auto code = CreateGpuMemory(create_info, &gpu_mem);
|
||||
if (code != ErrorCode::Success)
|
||||
goto err_out0;
|
||||
if (queue->cmdbuf_addr == 0) {
|
||||
GpuMemoryCreateInfo create_info{};
|
||||
create_info.size = queue->cmdbuf_size;
|
||||
create_info.domain = thunk_proxy::kSystem;
|
||||
|
||||
queue->cmdbuf = gpu_mem->GetGpuMemoryHandle();
|
||||
queue->cmdbuf_addr = gpu_mem->GpuAddress();
|
||||
auto code = CreateGpuMemory(create_info, &gpu_mem);
|
||||
if (code != ErrorCode::Success)
|
||||
goto err_out0;
|
||||
|
||||
queue->cmdbuf = gpu_mem->GetGpuMemoryHandle();
|
||||
queue->cmdbuf_addr = gpu_mem->GpuAddress();
|
||||
}
|
||||
|
||||
if (queue->Init())
|
||||
goto err_out1;
|
||||
|
||||
+105
-4
@@ -222,7 +222,7 @@ ComputeQueue::ComputeQueue(WDDMDevice *device,
|
||||
uint32_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws) :
|
||||
WDDMQueue(device, cmdbuf_size, engine, use_hws),
|
||||
WDDMQueue(device, 0, cmdbuf_size, engine, use_hws),
|
||||
ring(ring),
|
||||
ring_size(ring_size),
|
||||
ring_wptr(ring_wptr),
|
||||
@@ -240,9 +240,8 @@ ComputeQueue::ComputeQueue(WDDMDevice *device,
|
||||
scratch_size_per_wave_(0),
|
||||
scratch_size_(0),
|
||||
scratch_base_(nullptr) {
|
||||
|
||||
bool ret = device->CreateQueue(this);
|
||||
assert(ret);
|
||||
bool ret = device->CreateQueue(this);
|
||||
assert(ret);
|
||||
|
||||
GpuMemoryCreateInfo create_info{};
|
||||
create_info.size = PAGE_SIZE;
|
||||
@@ -950,6 +949,108 @@ hsa_status_t ComputeQueue::Process(void) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
void SDMAQueue::SdmaThread(SDMAQueue *queue) {
|
||||
// This timing system is used for sleeping this Thread
|
||||
// when one packet is invalid for about 2 seconds.
|
||||
std::chrono::steady_clock::time_point start_time, time;
|
||||
// Set the polling timeout value for 2 seconds
|
||||
const std::chrono::milliseconds kMaxElapsed(2000);
|
||||
bool sleep = false;
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
|
||||
while (true) {
|
||||
if (!queue->wptr_queue_.empty()) {
|
||||
uint64_t start = queue->wptr_queue_.front().first;
|
||||
uint64_t end = queue->wptr_queue_.front().second;
|
||||
queue->wptr_queue_.pop();
|
||||
debug_print("SDMA: wptr %lx %lx\n", start, end);
|
||||
|
||||
SDMA_PKT_POLL_REGMEM* poll_pkt = reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(queue->cmdbuf_addr + queue->WrapIntoRocrRing(start));
|
||||
SDMA_PKT_POLL_REGMEM* poll_next_pkt = poll_pkt + 1;
|
||||
while (queue->IsPollPacket(poll_pkt) && queue->IsPollPacket(poll_next_pkt)) {
|
||||
uint64_t poll_addr;
|
||||
uint64_t poll_val;
|
||||
if (poll_pkt->ADDR_LO_UNION.addr_31_0 > poll_next_pkt->ADDR_LO_UNION.addr_31_0) {
|
||||
poll_addr = poll_next_pkt->ADDR_LO_UNION.addr_31_0 |
|
||||
(uint64_t)poll_next_pkt->ADDR_HI_UNION.addr_63_32 << 32;
|
||||
poll_val = poll_next_pkt->VALUE_UNION.value |
|
||||
(uint64_t)poll_pkt->VALUE_UNION.value << 32;
|
||||
} else {
|
||||
poll_addr = poll_pkt->ADDR_LO_UNION.addr_31_0 |
|
||||
(uint64_t)poll_pkt->ADDR_HI_UNION.addr_63_32 << 32;
|
||||
poll_val = poll_pkt->VALUE_UNION.value |
|
||||
(uint64_t)poll_next_pkt->VALUE_UNION.value << 32;
|
||||
}
|
||||
amd_signal_t* signal = (amd_signal_t*)((char*)poll_addr - offsetof(amd_signal_t, value));
|
||||
uint64_t signal_handle = reinterpret_cast<uint64_t>(signal);
|
||||
debug_print("SDMA: poll signal %#lx addr %#lx val %d\n", signal_handle, poll_addr, poll_val);
|
||||
hsa_signal_t hsa_signal = {signal_handle};
|
||||
hsa_signal_value_t value =
|
||||
fn_hsa_signal_wait_relaxed(hsa_signal, HSA_SIGNAL_CONDITION_EQ, poll_val, UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
|
||||
assert(value == poll_val);
|
||||
|
||||
poll_pkt += 2;
|
||||
poll_next_pkt += 2;
|
||||
}
|
||||
queue->PreparePacket(queue->WrapIntoRocrRing(start), end - start);
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
queue->Submit();
|
||||
} else {
|
||||
time = std::chrono::steady_clock::now();
|
||||
if (time - start_time > kMaxElapsed)
|
||||
sleep = true;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lock(queue->thread_cond_lock_);
|
||||
if (sleep && queue->wptr_queue_.empty()) {
|
||||
while (!queue->thread_stop_ && queue->wptr_queue_.empty()) {
|
||||
queue->thread_cond_.wait(lock);
|
||||
}
|
||||
if (queue->thread_stop_)
|
||||
break;
|
||||
sleep = false;
|
||||
start_time = std::chrono::steady_clock::now();
|
||||
}
|
||||
}
|
||||
debug_print("sdma thread exit\n");
|
||||
}
|
||||
|
||||
SDMAQueue::SDMAQueue(WDDMDevice *device,
|
||||
void *ring,
|
||||
uint64_t cmdbuf_size,
|
||||
uint32_t engine,
|
||||
bool use_hws) :
|
||||
WDDMQueue(device, reinterpret_cast<uint64_t>(ring), cmdbuf_size, engine, use_hws),
|
||||
wptr_next_(0),
|
||||
wptr_pre_(0),
|
||||
rptr_next(0),
|
||||
thread_stop_(false),
|
||||
ib_size(0),
|
||||
ib_start_addr(0) {
|
||||
bool ret = device->CreateQueue(this);
|
||||
assert(ret);
|
||||
|
||||
thread_ = std::thread(SdmaThread, this);
|
||||
}
|
||||
|
||||
SDMAQueue::~SDMAQueue() {
|
||||
thread_cond_lock_.lock();
|
||||
thread_stop_ = true;
|
||||
thread_cond_lock_.unlock();
|
||||
thread_cond_.notify_one();
|
||||
thread_.join();
|
||||
|
||||
device->DestroyQueue(this);
|
||||
}
|
||||
|
||||
void SDMAQueue::RingDoorbell() {
|
||||
debug_print("SDMA: ringdoorbell %#llx %#llx\n", wptr_pre_, wptr_next_);
|
||||
|
||||
wptr_queue_.emplace(wptr_pre_, wptr_next_);
|
||||
thread_cond_.notify_one();
|
||||
wptr_pre_ = wptr_next_;
|
||||
}
|
||||
|
||||
hsa_status_t SDMAQueue::Init(void) {
|
||||
hsa_status_t ret = use_hws ? HwsInit() : SwsInit();
|
||||
if (ret)
|
||||
|
||||
Referencia en una nueva incidencia
Block a user