SWDEV-561708 Counted queue size from env var (#2844)
* SWDEV-561708 Counted queue size from env var * use counted_queue_size for test * remove rocrtst changes; add a const for default queue size * Remove env var from test; use queue->size * Improve env var documentation * Correct type
Цей коміт міститься в:
@@ -718,6 +718,7 @@ void CountedQueuesTest::CountedQueuesOverflowWrapAroundTest() {
|
|||||||
|
|
||||||
// To verify that after the queue has been used up, next index wraps around
|
// To verify that after the queue has been used up, next index wraps around
|
||||||
std::atomic<uint64_t> maxIndexSeen{0};
|
std::atomic<uint64_t> maxIndexSeen{0};
|
||||||
|
std::atomic<uint32_t> countedQueueSize{0};
|
||||||
|
|
||||||
auto func = [&]() {
|
auto func = [&]() {
|
||||||
// local dest buffer for each user application
|
// local dest buffer for each user application
|
||||||
@@ -746,6 +747,8 @@ void CountedQueuesTest::CountedQueuesOverflowWrapAroundTest() {
|
|||||||
uint32_t queue_size = queue->size; // should be 16384
|
uint32_t queue_size = queue->size; // should be 16384
|
||||||
const uint32_t queue_mask = queue_size - 1; // used for index wraparound
|
const uint32_t queue_mask = queue_size - 1; // used for index wraparound
|
||||||
|
|
||||||
|
countedQueueSize.store(queue_size);
|
||||||
|
|
||||||
struct __attribute__((aligned(16))) local_args_t {
|
struct __attribute__((aligned(16))) local_args_t {
|
||||||
uint32_t* dstArray;
|
uint32_t* dstArray;
|
||||||
uint32_t* srcArray;
|
uint32_t* srcArray;
|
||||||
@@ -847,9 +850,9 @@ void CountedQueuesTest::CountedQueuesOverflowWrapAroundTest() {
|
|||||||
th.join();
|
th.join();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify value of max seen index
|
// Verify value of max seen index based on counted queue size
|
||||||
uint64_t maxId = maxIndexSeen.load();
|
uint64_t maxId = maxIndexSeen.load();
|
||||||
EXPECT_EQ(maxId, (16384 + 5) * kThreads - 1);
|
EXPECT_EQ(maxId, (countedQueueSize.load() + 5) * kThreads - 1);
|
||||||
|
|
||||||
hsa_amd_memory_pool_free(shared_src_buffer);
|
hsa_amd_memory_pool_free(shared_src_buffer);
|
||||||
}
|
}
|
||||||
@@ -53,6 +53,7 @@ class CountedQueuePoolManager {
|
|||||||
|
|
||||||
core::Agent* agent_; // pointer to the gpu agent that owns this pool
|
core::Agent* agent_; // pointer to the gpu agent that owns this pool
|
||||||
uint32_t max_hw_queues_;
|
uint32_t max_hw_queues_;
|
||||||
|
size_t counted_queue_size_;
|
||||||
std::mutex mutex_;
|
std::mutex mutex_;
|
||||||
|
|
||||||
// Pool of hw queues by priority on the agent
|
// Pool of hw queues by priority on the agent
|
||||||
|
|||||||
+3
-4
@@ -11,11 +11,10 @@
|
|||||||
namespace rocr {
|
namespace rocr {
|
||||||
namespace core {
|
namespace core {
|
||||||
|
|
||||||
constexpr size_t DEFAULT_QUEUE_SIZE = 16384;
|
|
||||||
|
|
||||||
CountedQueuePoolManager::CountedQueuePoolManager(core::Agent* agent) : agent_(agent) {
|
CountedQueuePoolManager::CountedQueuePoolManager(core::Agent* agent) : agent_(agent) {
|
||||||
// Read in GPU_MAX_HW_QUEUES flag value
|
// Read in GPU_MAX_HW_QUEUES and HSA_COUNTED_QUEUE_SIZE flags
|
||||||
max_hw_queues_ = core::Runtime::runtime_singleton_->flag().cp_queues_limit();
|
max_hw_queues_ = core::Runtime::runtime_singleton_->flag().cp_queues_limit();
|
||||||
|
counted_queue_size_ = core::Runtime::runtime_singleton_->flag().counted_queue_size();
|
||||||
}
|
}
|
||||||
|
|
||||||
hsa_status_t CountedQueuePoolManager::AcquireQueue(
|
hsa_status_t CountedQueuePoolManager::AcquireQueue(
|
||||||
@@ -78,7 +77,7 @@ core::Queue* CountedQueuePoolManager::FindOrCreateHardwareQueue(
|
|||||||
// Create a new hardware queue
|
// Create a new hardware queue
|
||||||
core::Queue* cmd_queue = nullptr;
|
core::Queue* cmd_queue = nullptr;
|
||||||
hsa_status_t status =
|
hsa_status_t status =
|
||||||
agent_->QueueCreate(DEFAULT_QUEUE_SIZE, type, 0, callback, data, 0, 0, &cmd_queue);
|
agent_->QueueCreate(counted_queue_size_, type, 0, callback, data, 0, 0, &cmd_queue);
|
||||||
if (status != HSA_STATUS_SUCCESS) return nullptr;
|
if (status != HSA_STATUS_SUCCESS) return nullptr;
|
||||||
|
|
||||||
status = cmd_queue->SetPriority(priority);
|
status = cmd_queue->SetPriority(priority);
|
||||||
|
|||||||
@@ -54,6 +54,9 @@
|
|||||||
|
|
||||||
namespace rocr {
|
namespace rocr {
|
||||||
|
|
||||||
|
constexpr size_t DEFAULT_COUNTED_QUEUE_SIZE = 16384;
|
||||||
|
constexpr uint32_t DEFAULT_GPU_HW_QUEUES_MAX = 4;
|
||||||
|
|
||||||
class Flag {
|
class Flag {
|
||||||
public:
|
public:
|
||||||
enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
|
enum SDMA_OVERRIDE { SDMA_DISABLE, SDMA_ENABLE, SDMA_DEFAULT };
|
||||||
@@ -306,8 +309,16 @@ class Flag {
|
|||||||
core_dump_disable_ = (var == "1");
|
core_dump_disable_ = (var == "1");
|
||||||
|
|
||||||
core_dump_pattern_ = os::GetEnvVar("HSA_COREDUMP_PATTERN");
|
core_dump_pattern_ = os::GetEnvVar("HSA_COREDUMP_PATTERN");
|
||||||
|
|
||||||
|
// This limits the maximum number of hardware queues that can be created per
|
||||||
|
// priority level for counted queues on every GPU agent. By default, the limit is set to 4.
|
||||||
var = os::GetEnvVar("GPU_MAX_HW_QUEUES");
|
var = os::GetEnvVar("GPU_MAX_HW_QUEUES");
|
||||||
cp_queues_limit_ = var.empty() ? 4 : atoi(var.c_str());
|
cp_queues_limit_ = var.empty() ? DEFAULT_GPU_HW_QUEUES_MAX : atoi(var.c_str());
|
||||||
|
|
||||||
|
// This allows configuring the size of counted queues created through
|
||||||
|
// hsa_amd_counted_queue_acquire API. If not set, default queue size is set to 16384.
|
||||||
|
var = os::GetEnvVar("HSA_COUNTED_QUEUE_SIZE");
|
||||||
|
counted_queue_size_ = var.empty() ? DEFAULT_COUNTED_QUEUE_SIZE : atoi(var.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_masks(uint32_t maxGpu, uint32_t maxCU) {
|
void parse_masks(uint32_t maxGpu, uint32_t maxCU) {
|
||||||
@@ -430,6 +441,8 @@ class Flag {
|
|||||||
|
|
||||||
uint32_t cp_queues_limit() const { return cp_queues_limit_; }
|
uint32_t cp_queues_limit() const { return cp_queues_limit_; }
|
||||||
|
|
||||||
|
size_t counted_queue_size() const { return counted_queue_size_; }
|
||||||
|
|
||||||
bool dev_mem_queue_buf() const { return dev_mem_queue_buf_; }
|
bool dev_mem_queue_buf() const { return dev_mem_queue_buf_; }
|
||||||
|
|
||||||
uint32_t signal_abort_timeout() const { return signal_abort_timeout_; }
|
uint32_t signal_abort_timeout() const { return signal_abort_timeout_; }
|
||||||
@@ -550,6 +563,7 @@ class Flag {
|
|||||||
std::string core_dump_pattern_;
|
std::string core_dump_pattern_;
|
||||||
|
|
||||||
uint32_t cp_queues_limit_;
|
uint32_t cp_queues_limit_;
|
||||||
|
size_t counted_queue_size_;
|
||||||
|
|
||||||
// Map GPU index post RVD to its default cu mask.
|
// Map GPU index post RVD to its default cu mask.
|
||||||
std::map<uint32_t, std::vector<uint32_t>> cu_mask_;
|
std::map<uint32_t, std::vector<uint32_t>> cu_mask_;
|
||||||
|
|||||||
@@ -3793,6 +3793,7 @@ hsa_status_t HSA_API hsa_amd_ais_file_read(hsa_amd_ais_file_handle_t handle, voi
|
|||||||
* @param[in] agent Agent where to create the queue
|
* @param[in] agent Agent where to create the queue
|
||||||
*
|
*
|
||||||
* @param[in] type For future use. HSA_QUEUE_TYPE_MULTI is the only valid option.
|
* @param[in] type For future use. HSA_QUEUE_TYPE_MULTI is the only valid option.
|
||||||
|
* HSA_QUEUE_TYPE_COOPERATIVE queues are not supported.
|
||||||
*
|
*
|
||||||
* @param[in] priority Associated priority. The GPU_MAX_HW_QUEUES limit is counted for each priority
|
* @param[in] priority Associated priority. The GPU_MAX_HW_QUEUES limit is counted for each priority
|
||||||
*
|
*
|
||||||
|
|||||||
Посилання в новій задачі
Заблокувати користувача