rocr: Remove SDMA code for gfx7 and gfx8
Remove deprecated SDMA code for gfx7 and gfx8 asics
Этот коммит содержится в:
коммит произвёл
Yat Sin, David
родитель
5285c24657
Коммит
d3f70910e1
@@ -73,11 +73,7 @@ class BlitSdmaBase : public core::Blit {
|
||||
core::Signal& out_signal) = 0;
|
||||
};
|
||||
|
||||
// RingIndexTy: 32/64-bit monotonic ring index, counting in bytes.
|
||||
// HwIndexMonotonic: true if SDMA HW index is monotonic, false if it wraps at end of ring.
|
||||
// SizeToCountOffset: value added to size (in bytes) to form SDMA command count field.
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
class BlitSdma : public BlitSdmaBase {
|
||||
template <bool useGCR> class BlitSdma : public BlitSdmaBase {
|
||||
public:
|
||||
BlitSdma();
|
||||
|
||||
@@ -163,9 +159,9 @@ class BlitSdma : public BlitSdmaBase {
|
||||
/// could be written. NULL if input size is greater than the size of queue
|
||||
/// buffer.
|
||||
|
||||
char* AcquireWriteAddress(uint32_t cmd_size, RingIndexTy& curr_index);
|
||||
char* AcquireWriteAddress(uint32_t cmd_size, uint64_t& curr_index);
|
||||
|
||||
void UpdateWriteAndDoorbellRegister(RingIndexTy curr_index, RingIndexTy new_index);
|
||||
void UpdateWriteAndDoorbellRegister(uint64_t curr_index, uint64_t new_index);
|
||||
|
||||
/// @brief Updates the Write Register of compute device to the end of
|
||||
/// SDMA packet written into queue buffer. The update to Write Register
|
||||
@@ -178,16 +174,16 @@ class BlitSdma : public BlitSdmaBase {
|
||||
/// @param curr_index Index passed back from AcquireWriteAddress.
|
||||
///
|
||||
/// @param cmd_size Command packet size in bytes.
|
||||
void ReleaseWriteAddress(RingIndexTy curr_index, uint32_t cmd_size);
|
||||
void ReleaseWriteAddress(uint64_t curr_index, uint32_t cmd_size);
|
||||
|
||||
/// @brief Writes NO-OP words into queue buffer in case writing a command
|
||||
/// causes the queue buffer to wrap.
|
||||
///
|
||||
/// @param curr_index Index to begin padding from.
|
||||
void PadRingToEnd(RingIndexTy curr_index);
|
||||
void PadRingToEnd(uint64_t curr_index);
|
||||
|
||||
uint32_t WrapIntoRing(RingIndexTy index);
|
||||
bool CanWriteUpto(RingIndexTy upto_index);
|
||||
uint32_t WrapIntoRing(uint64_t index);
|
||||
bool CanWriteUpto(uint64_t upto_index);
|
||||
|
||||
/// @brief Build fence command
|
||||
void BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
|
||||
@@ -265,8 +261,8 @@ class BlitSdma : public BlitSdmaBase {
|
||||
HsaQueueResource queue_resource_;
|
||||
|
||||
// Monotonic ring indices, in bytes, tracking written and submitted commands.
|
||||
RingIndexTy cached_reserve_index_;
|
||||
RingIndexTy cached_commit_index_;
|
||||
uint64_t cached_reserve_index_;
|
||||
uint64_t cached_commit_index_;
|
||||
|
||||
static const uint32_t linear_copy_command_size_;
|
||||
|
||||
@@ -314,21 +310,11 @@ class BlitSdma : public BlitSdmaBase {
|
||||
size_t min_submission_size_;
|
||||
};
|
||||
|
||||
// Ring indices are 32-bit.
|
||||
// HW ring indices are not monotonic (wrap at end of ring).
|
||||
// Count fields of SDMA commands are 0-based.
|
||||
typedef BlitSdma<uint32_t, false, 0, false> BlitSdmaV2V3;
|
||||
|
||||
// Ring indices are 64-bit.
|
||||
// HW ring indices are monotonic (do not wrap at end of ring).
|
||||
// Count fields of SDMA commands are 1-based.
|
||||
typedef BlitSdma<uint64_t, true, -1, false> BlitSdmaV4;
|
||||
typedef BlitSdma<false> BlitSdmaV4;
|
||||
|
||||
// Ring indices are 64-bit.
|
||||
// HW ring indices are monotonic (do not wrap at end of ring).
|
||||
// Count fields of SDMA commands are 1-based.
|
||||
// SDMA is connected to gL2.
|
||||
typedef BlitSdma<uint64_t, true, -1, true> BlitSdmaV5;
|
||||
typedef BlitSdma<true> BlitSdmaV5;
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
@@ -77,44 +77,33 @@ const size_t BlitSdmaBase::kMaxSingleCopySize = SDMA_PKT_COPY_LINEAR::kMaxSize_;
|
||||
const size_t BlitSdmaBase::kMaxSingleFillSize = SDMA_PKT_CONSTANT_FILL::kMaxSize_;
|
||||
|
||||
// Initialize size of various sDMA commands use by this module
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::linear_copy_command_size_ = sizeof(SDMA_PKT_COPY_LINEAR);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::linear_copy_command_size_ = sizeof(SDMA_PKT_COPY_LINEAR);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::fill_command_size_ = sizeof(SDMA_PKT_CONSTANT_FILL);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::fill_command_size_ = sizeof(SDMA_PKT_CONSTANT_FILL);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::fence_command_size_ = sizeof(SDMA_PKT_FENCE);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::fence_command_size_ = sizeof(SDMA_PKT_FENCE);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::poll_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::flush_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::flush_command_size_ = sizeof(SDMA_PKT_POLL_REGMEM);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::atomic_command_size_ = sizeof(SDMA_PKT_ATOMIC);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP);
|
||||
template <bool useGCR>
|
||||
const uint32_t BlitSdma<useGCR>::timestamp_command_size_ = sizeof(SDMA_PKT_TIMESTAMP);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::trap_command_size_ = sizeof(SDMA_PKT_TRAP);
|
||||
template <bool useGCR> const uint32_t BlitSdma<useGCR>::trap_command_size_ = sizeof(SDMA_PKT_TRAP);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
const uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::gcr_command_size_ = sizeof(SDMA_PKT_GCR);
|
||||
template <bool useGCR> const uint32_t BlitSdma<useGCR>::gcr_command_size_ = sizeof(SDMA_PKT_GCR);
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BlitSdma()
|
||||
template <bool useGCR>
|
||||
BlitSdma<useGCR>::BlitSdma()
|
||||
: agent_(NULL),
|
||||
queue_start_addr_(NULL),
|
||||
bytes_queued_(0),
|
||||
@@ -129,12 +118,11 @@ BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BlitSdma()
|
||||
std::memset(&queue_resource_, 0, sizeof(queue_resource_));
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::~BlitSdma() {}
|
||||
template <bool useGCR> BlitSdma<useGCR>::~BlitSdma() {}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::Initialize(
|
||||
const core::Agent& agent, bool use_xgmi, size_t linear_copy_size_override, int rec_eng) {
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::Initialize(const core::Agent& agent, bool use_xgmi,
|
||||
size_t linear_copy_size_override, int rec_eng) {
|
||||
if (queue_start_addr_ != NULL) {
|
||||
// Already initialized.
|
||||
return HSA_STATUS_SUCCESS;
|
||||
@@ -201,7 +189,7 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
|
||||
}
|
||||
|
||||
cached_reserve_index_ = *reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_write_ptr);
|
||||
cached_reserve_index_ = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_write_ptr);
|
||||
cached_commit_index_ = cached_reserve_index_;
|
||||
|
||||
if (core::g_use_interrupt_wait) {
|
||||
@@ -218,9 +206,7 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::Destroy(
|
||||
const core::Agent& agent) {
|
||||
template <bool useGCR> hsa_status_t BlitSdma<useGCR>::Destroy(const core::Agent& agent) {
|
||||
// Release all allocated resources and reset them to zero.
|
||||
|
||||
if (queue_resource_.QueueId != 0) {
|
||||
@@ -245,9 +231,8 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::SubmitBlockingCommand(const void* cmd, size_t cmd_size,
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitBlockingCommand(const void* cmd, size_t cmd_size,
|
||||
uint64_t size) {
|
||||
ScopedAcquire<KernelMutex> lock(&lock_);
|
||||
|
||||
@@ -278,11 +263,11 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::SubmitCommand(
|
||||
const void* cmd, size_t cmd_size, uint64_t size, const std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal, std::vector<core::Signal*>& gang_signals) {
|
||||
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitCommand(const void* cmd, size_t cmd_size, uint64_t size,
|
||||
const std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal,
|
||||
std::vector<core::Signal*>& gang_signals) {
|
||||
uint32_t num_poll_command = 0;
|
||||
|
||||
// Cached copy of dep_signals[i]->LoadRelaxed
|
||||
@@ -355,9 +340,7 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
// Add space for acquire or release Hdp flush command
|
||||
uint32_t flush_cmd_size = 0;
|
||||
if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
|
||||
if ((HwIndexMonotonic) && (hdp_flush_support_)) {
|
||||
flush_cmd_size = flush_command_size_;
|
||||
}
|
||||
if (hdp_flush_support_) flush_cmd_size = flush_command_size_;
|
||||
}
|
||||
|
||||
// Add space for cache flush.
|
||||
@@ -368,7 +351,7 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
const uint32_t pad_size = total_command_size < min_submission_size_ ?
|
||||
min_submission_size_ - total_command_size : 0;
|
||||
|
||||
RingIndexTy curr_index;
|
||||
uint64_t curr_index;
|
||||
char* command_addr;
|
||||
uint64_t prior_bytes, post_bytes;
|
||||
{
|
||||
@@ -426,7 +409,7 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
|
||||
// Issue a Hdp flush cmd
|
||||
if (core::Runtime::runtime_singleton_->flag().enable_sdma_hdp_flush()) {
|
||||
if ((HwIndexMonotonic) && (hdp_flush_support_)) {
|
||||
if (hdp_flush_support_) {
|
||||
BuildHdpFlushCommand(command_addr);
|
||||
command_addr += flush_command_size_;
|
||||
bytes_written_[wrapped_index] = prior_bytes;
|
||||
@@ -542,9 +525,8 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>:
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size) {
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size) {
|
||||
// Break the copy into multiple copy operation incase the copy size exceeds
|
||||
// the SDMA linear copy limit.
|
||||
const size_t max_copy_size = max_single_linear_copy_size_ ? max_single_linear_copy_size_ :
|
||||
@@ -557,9 +539,8 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
return SubmitBlockingCommand(&buff[0], buff.size() * sizeof(SDMA_PKT_COPY_LINEAR), size);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size,
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitLinearCopyCommand(void* dst, const void* src, size_t size,
|
||||
std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal,
|
||||
std::vector<core::Signal*>& gang_signals) {
|
||||
@@ -577,9 +558,8 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
out_signal, gang_signals);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t
|
||||
BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::SubmitCopyRectCommand(
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitCopyRectCommand(
|
||||
const hsa_pitched_ptr_t* dst, const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range, std::vector<core::Signal*>& dep_signals,
|
||||
core::Signal& out_signal) {
|
||||
@@ -653,9 +633,8 @@ BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::SubmitCopyRe
|
||||
out_signal, gang_signals);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::SubmitLinearFillCommand(void* ptr, uint32_t value, size_t count) {
|
||||
template <bool useGCR>
|
||||
hsa_status_t BlitSdma<useGCR>::SubmitLinearFillCommand(void* ptr, uint32_t value, size_t count) {
|
||||
const size_t size = count * sizeof(uint32_t);
|
||||
|
||||
const uint32_t num_fill_command = (size + kMaxSingleFillSize - 1) / kMaxSingleFillSize;
|
||||
@@ -666,15 +645,12 @@ hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
return SubmitBlockingCommand(&buff[0], buff.size() * sizeof(SDMA_PKT_CONSTANT_FILL), size);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
hsa_status_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::EnableProfiling(
|
||||
bool enable) {
|
||||
template <bool useGCR> hsa_status_t BlitSdma<useGCR>::EnableProfiling(bool enable) {
|
||||
return HSA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
char* BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::AcquireWriteAddress(
|
||||
uint32_t cmd_size, RingIndexTy& curr_index) {
|
||||
template <bool useGCR>
|
||||
char* BlitSdma<useGCR>::AcquireWriteAddress(uint32_t cmd_size, uint64_t& curr_index) {
|
||||
// Ring is full when all but one byte is written.
|
||||
if (cmd_size >= kQueueSize) {
|
||||
return nullptr;
|
||||
@@ -692,7 +668,7 @@ char* BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::Acquir
|
||||
}
|
||||
|
||||
// Check whether the engine has finished using this region.
|
||||
const RingIndexTy new_index = curr_index + cmd_size;
|
||||
const uint64_t new_index = curr_index + cmd_size;
|
||||
|
||||
if (CanWriteUpto(new_index) == false) {
|
||||
// Wait for read index to move and try again.
|
||||
@@ -713,10 +689,8 @@ char* BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::Acquir
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::UpdateWriteAndDoorbellRegister(RingIndexTy curr_index,
|
||||
RingIndexTy new_index) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::UpdateWriteAndDoorbellRegister(uint64_t curr_index, uint64_t new_index) {
|
||||
while (true) {
|
||||
// Make sure that the address before ::curr_index is already released.
|
||||
// Otherwise the CP may read invalid packets.
|
||||
@@ -725,21 +699,19 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
// TODO: remove when sdma wpointer issue is resolved.
|
||||
// Wait until the SDMA engine finish processing all packets before
|
||||
// updating the wptr and doorbell.
|
||||
while (WrapIntoRing(*reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_read_ptr)) !=
|
||||
while (WrapIntoRing(*reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr)) !=
|
||||
WrapIntoRing(curr_index)) {
|
||||
os::YieldThread();
|
||||
}
|
||||
}
|
||||
|
||||
// Update write pointer and doorbell register.
|
||||
*reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_write_ptr) =
|
||||
(HwIndexMonotonic ? new_index : WrapIntoRing(new_index));
|
||||
*reinterpret_cast<uint64_t*>(queue_resource_.Queue_write_ptr) = new_index;
|
||||
|
||||
// Ensure write pointer is visible to GPU before doorbell.
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
|
||||
*reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_DoorBell) =
|
||||
(HwIndexMonotonic ? new_index : WrapIntoRing(new_index));
|
||||
*reinterpret_cast<uint64_t*>(queue_resource_.Queue_DoorBell) = new_index;
|
||||
|
||||
atomic::Store(&cached_commit_index_, new_index, std::memory_order_release);
|
||||
break;
|
||||
@@ -750,9 +722,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
}
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::ReleaseWriteAddress(
|
||||
RingIndexTy curr_index, uint32_t cmd_size) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::ReleaseWriteAddress(uint64_t curr_index, uint32_t cmd_size) {
|
||||
if (cmd_size > kQueueSize) {
|
||||
assert(false && "cmd_addr is outside the queue buffer range");
|
||||
return;
|
||||
@@ -761,11 +732,9 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::Release
|
||||
UpdateWriteAndDoorbellRegister(curr_index, curr_index + cmd_size);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::PadRingToEnd(
|
||||
RingIndexTy curr_index) {
|
||||
template <bool useGCR> void BlitSdma<useGCR>::PadRingToEnd(uint64_t curr_index) {
|
||||
// Reserve region from here to the end of the ring.
|
||||
RingIndexTy new_index = curr_index + (kQueueSize - WrapIntoRing(curr_index));
|
||||
uint64_t new_index = curr_index + (kQueueSize - WrapIntoRing(curr_index));
|
||||
|
||||
// Check whether the engine has finished using this region.
|
||||
if (CanWriteUpto(new_index) == false) {
|
||||
@@ -786,37 +755,22 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::PadRing
|
||||
}
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
uint32_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::WrapIntoRing(
|
||||
RingIndexTy index) {
|
||||
template <bool useGCR> uint32_t BlitSdma<useGCR>::WrapIntoRing(uint64_t index) {
|
||||
return index & (kQueueSize - 1);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
bool BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::CanWriteUpto(
|
||||
RingIndexTy upto_index) {
|
||||
template <bool useGCR> bool BlitSdma<useGCR>::CanWriteUpto(uint64_t upto_index) {
|
||||
// Get/calculate the monotonic read index.
|
||||
RingIndexTy hw_read_index = *reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_read_ptr);
|
||||
RingIndexTy read_index;
|
||||
|
||||
if (HwIndexMonotonic) {
|
||||
read_index = hw_read_index;
|
||||
} else {
|
||||
// Calculate distance from commit index to HW read index.
|
||||
// Commit index is always < kQueueSize away from HW read index.
|
||||
RingIndexTy commit_index = atomic::Load(&cached_commit_index_, std::memory_order_relaxed);
|
||||
RingIndexTy dist_to_read_index = WrapIntoRing(commit_index - hw_read_index);
|
||||
read_index = commit_index - dist_to_read_index;
|
||||
}
|
||||
uint64_t hw_read_index = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr);
|
||||
|
||||
// Check whether the read pointer has passed the given index.
|
||||
// At most we can submit (kQueueSize - 1) bytes at a time.
|
||||
return (upto_index - read_index) < kQueueSize;
|
||||
return (upto_index - hw_read_index) < kQueueSize;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFenceCommand(
|
||||
char* fence_command_addr, uint32_t* fence, uint32_t fence_value) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildFenceCommand(char* fence_command_addr, uint32_t* fence,
|
||||
uint32_t fence_value) {
|
||||
assert(fence_command_addr != NULL);
|
||||
SDMA_PKT_FENCE* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_FENCE*>(fence_command_addr);
|
||||
@@ -836,9 +790,9 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFe
|
||||
packet_addr->DATA_UNION.data = fence_value;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildCopyCommand(
|
||||
char* cmd_addr, uint32_t num_copy_command, void* dst, const void* src, size_t size) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildCopyCommand(char* cmd_addr, uint32_t num_copy_command, void* dst,
|
||||
const void* src, size_t size) {
|
||||
size_t cur_size = 0;
|
||||
const size_t max_copy_size = max_single_linear_copy_size_ ? max_single_linear_copy_size_ :
|
||||
kMaxSingleCopySize;
|
||||
@@ -858,9 +812,9 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildCo
|
||||
packet_addr->HEADER_UNION.sub_op = SDMA_SUBOP_COPY_LINEAR;
|
||||
|
||||
if (max_copy_size == (1 << 30) -1)
|
||||
packet_addr->COUNT_UNION.count_ext.count = copy_size + SizeToCountOffset;
|
||||
packet_addr->COUNT_UNION.count_ext.count = copy_size - 1; /* count is 1-based */
|
||||
else
|
||||
packet_addr->COUNT_UNION.count.count = copy_size + SizeToCountOffset;
|
||||
packet_addr->COUNT_UNION.count.count = copy_size - 1; /* count is 1-based */
|
||||
|
||||
packet_addr->SRC_ADDR_LO_UNION.src_addr_31_0 = ptrlow32(cur_src);
|
||||
packet_addr->SRC_ADDR_HI_UNION.src_addr_63_32 = ptrhigh32(cur_src);
|
||||
@@ -881,11 +835,12 @@ Elements are coded by the log2 of the element size in bytes (ie. element 0=1 byt
|
||||
This routine breaks a large rect into tiles that can be handled by hardware. Pitches and offsets
|
||||
must be representable in terms of elements in all tiles of the copy.
|
||||
*/
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildCopyRectCommand(
|
||||
const std::function<void*(size_t)>& append, const hsa_pitched_ptr_t* dst,
|
||||
const hsa_dim3_t* dst_offset, const hsa_pitched_ptr_t* src, const hsa_dim3_t* src_offset,
|
||||
const hsa_dim3_t* range) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildCopyRectCommand(const std::function<void*(size_t)>& append,
|
||||
const hsa_pitched_ptr_t* dst,
|
||||
const hsa_dim3_t* dst_offset,
|
||||
const hsa_pitched_ptr_t* src,
|
||||
const hsa_dim3_t* src_offset, const hsa_dim3_t* range) {
|
||||
// Returns the index of the first set bit (ie log2 of the largest power of 2 that evenly divides
|
||||
// width), the largest element that perfectly covers width.
|
||||
// width | 16 ensures that we don't return a higher element than is supported and avoids
|
||||
@@ -1029,9 +984,9 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildCo
|
||||
}
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFillCommand(
|
||||
char* cmd_addr, uint32_t num_fill_command, void* ptr, uint32_t value, size_t count) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildFillCommand(char* cmd_addr, uint32_t num_fill_command, void* ptr,
|
||||
uint32_t value, size_t count) {
|
||||
char* cur_ptr = reinterpret_cast<char*>(ptr);
|
||||
const uint32_t maxDwordCount = kMaxSingleFillSize / sizeof(uint32_t);
|
||||
SDMA_PKT_CONSTANT_FILL* packet_addr = reinterpret_cast<SDMA_PKT_CONSTANT_FILL*>(cmd_addr);
|
||||
@@ -1050,7 +1005,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFi
|
||||
|
||||
packet_addr->DATA_UNION.src_data_31_0 = value;
|
||||
|
||||
packet_addr->COUNT_UNION.count = (fill_count + SizeToCountOffset) * sizeof(uint32_t);
|
||||
/* count is 1-based */
|
||||
packet_addr->COUNT_UNION.count = (fill_count - 1) * sizeof(uint32_t);
|
||||
|
||||
packet_addr++;
|
||||
cur_ptr += fill_count * sizeof(uint32_t);
|
||||
@@ -1059,9 +1015,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildFi
|
||||
assert(count == 0 && "SDMA fill command count error.");
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildPollCommand(
|
||||
char* cmd_addr, void* addr, uint32_t reference) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildPollCommand(char* cmd_addr, void* addr, uint32_t reference) {
|
||||
SDMA_PKT_POLL_REGMEM* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(cmd_addr);
|
||||
|
||||
@@ -1081,9 +1036,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildPo
|
||||
packet_addr->DW5_UNION.retry_count = 0xfff; // Retry forever.
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::BuildAtomicDecrementCommand(char* cmd_addr, void* addr) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildAtomicDecrementCommand(char* cmd_addr, void* addr) {
|
||||
SDMA_PKT_ATOMIC* packet_addr = reinterpret_cast<SDMA_PKT_ATOMIC*>(cmd_addr);
|
||||
|
||||
memset(packet_addr, 0, sizeof(SDMA_PKT_ATOMIC));
|
||||
@@ -1098,9 +1052,8 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
packet_addr->SRC_DATA_HI_UNION.src_data_63_32 = 0xffffffff;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
useGCR>::BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address) {
|
||||
template <bool useGCR>
|
||||
void BlitSdma<useGCR>::BuildGetGlobalTimestampCommand(char* cmd_addr, void* write_address) {
|
||||
SDMA_PKT_TIMESTAMP* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_TIMESTAMP*>(cmd_addr);
|
||||
|
||||
@@ -1113,9 +1066,7 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset,
|
||||
packet_addr->ADDR_HI_UNION.addr_63_32 = ptrhigh32(write_address);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildTrapCommand(
|
||||
char* cmd_addr, uint32_t event_id) {
|
||||
template <bool useGCR> void BlitSdma<useGCR>::BuildTrapCommand(char* cmd_addr, uint32_t event_id) {
|
||||
SDMA_PKT_TRAP* packet_addr =
|
||||
reinterpret_cast<SDMA_PKT_TRAP*>(cmd_addr);
|
||||
|
||||
@@ -1125,17 +1076,13 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildTr
|
||||
packet_addr->INT_CONTEXT_UNION.int_ctx = event_id;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildHdpFlushCommand(
|
||||
char* cmd_addr) {
|
||||
template <bool useGCR> void BlitSdma<useGCR>::BuildHdpFlushCommand(char* cmd_addr) {
|
||||
assert(cmd_addr != NULL);
|
||||
SDMA_PKT_POLL_REGMEM* addr = reinterpret_cast<SDMA_PKT_POLL_REGMEM*>(cmd_addr);
|
||||
memcpy(addr, &hdp_flush_cmd, flush_command_size_);
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildGCRCommand(
|
||||
char* cmd_addr, bool invalidate) {
|
||||
template <bool useGCR> void BlitSdma<useGCR>::BuildGCRCommand(char* cmd_addr, bool invalidate) {
|
||||
assert(cmd_addr != NULL);
|
||||
assert(useGCR && "Unsupported SDMA command - GCR.");
|
||||
SDMA_PKT_GCR* addr = reinterpret_cast<SDMA_PKT_GCR*>(cmd_addr);
|
||||
@@ -1154,25 +1101,16 @@ void BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::BuildGC
|
||||
addr->WORD2_UNION.GCR_CONTROL_GL2_RANGE = 0;
|
||||
}
|
||||
|
||||
template <typename RingIndexTy, bool HwIndexMonotonic, int SizeToCountOffset, bool useGCR>
|
||||
uint64_t BlitSdma<RingIndexTy, HwIndexMonotonic, SizeToCountOffset, useGCR>::PendingBytes() {
|
||||
RingIndexTy commit = atomic::Load(&cached_commit_index_, std::memory_order_acquire);
|
||||
RingIndexTy hw_read_index = *reinterpret_cast<RingIndexTy*>(queue_resource_.Queue_read_ptr);
|
||||
RingIndexTy read;
|
||||
if (HwIndexMonotonic) {
|
||||
read = hw_read_index;
|
||||
} else {
|
||||
RingIndexTy dist_to_read_index = WrapIntoRing(commit - hw_read_index);
|
||||
read = commit - dist_to_read_index;
|
||||
}
|
||||
template <bool useGCR> uint64_t BlitSdma<useGCR>::PendingBytes() {
|
||||
uint64_t commit = atomic::Load(&cached_commit_index_, std::memory_order_acquire);
|
||||
uint64_t hw_read_index = *reinterpret_cast<uint64_t*>(queue_resource_.Queue_read_ptr);
|
||||
|
||||
if (commit == read) return 0;
|
||||
return bytes_queued_ - bytes_written_[WrapIntoRing(read)];
|
||||
if (commit == hw_read_index) return 0;
|
||||
return bytes_queued_ - bytes_written_[WrapIntoRing(hw_read_index)];
|
||||
}
|
||||
|
||||
template class BlitSdma<uint32_t, false, 0, false>;
|
||||
template class BlitSdma<uint64_t, true, -1, false>;
|
||||
template class BlitSdma<uint64_t, true, -1, true>;
|
||||
template class BlitSdma<false>;
|
||||
template class BlitSdma<true>;
|
||||
|
||||
} // namespace amd
|
||||
} // namespace rocr
|
||||
|
||||
@@ -711,10 +711,6 @@ core::Blit* GpuAgent::CreateBlitSdma(bool use_xgmi, int rec_eng) {
|
||||
const size_t copy_size_overrides[2] = {0x3fffff, 0x3fffffff};
|
||||
|
||||
switch (isa_->GetMajorVersion()) {
|
||||
case 7:
|
||||
case 8:
|
||||
sdma = new BlitSdmaV2V3();
|
||||
break;
|
||||
case 9:
|
||||
sdma = new BlitSdmaV4();
|
||||
copy_size_override = (isa_->GetMinorVersion() == 0 && isa_->GetStepping() == 10) ?
|
||||
|
||||
Ссылка в новой задаче
Block a user