From 474eb81053239f647e7220ce45ea61eb69b0b9ab Mon Sep 17 00:00:00 2001 From: "Li, Star" Date: Mon, 6 Jan 2025 09:21:08 +0800 Subject: [PATCH] Fix unit issue in policy feature (#78) 1. For temperature the unit in milli Celsius 2. For power the unit in microwatts. 3. Fix second register call to rdcd doesn't functional because start flag Co-authored-by: Chao Fei [ROCm/rdc commit: bd7d7c99c1c457f95360db9cb8c35fedf8ed9ee3] --- projects/rdc/include/rdc/rdc.h | 2 +- projects/rdc/rdci/src/RdciPolicySubSystem.cc | 32 +++++++++++++++----- projects/rdc/server/src/rdc_api_service.cc | 13 +++++--- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/projects/rdc/include/rdc/rdc.h b/projects/rdc/include/rdc/rdc.h index 03c31d822a..1e99f79489 100644 --- a/projects/rdc/include/rdc/rdc.h +++ b/projects/rdc/include/rdc/rdc.h @@ -593,7 +593,7 @@ typedef enum { RDC_POLICY_COND_FIRST = 0, RDC_POLICY_COND_MAX_PAGE_RETRIED = RDC_POLICY_COND_FIRST, //!< Max number of page retired RDC_POLICY_COND_THERMAL, //!< Temperature threshold, millidegree Celsius - RDC_POLICY_COND_POWER, //!< Power threshold, unit milliwatt + RDC_POLICY_COND_POWER, //!< Power threshold, unit microwatt RDC_POLICY_COND_LAST = RDC_POLICY_COND_POWER, RDC_POLICY_COND_MAX } rdc_policy_condition_type_t; diff --git a/projects/rdc/rdci/src/RdciPolicySubSystem.cc b/projects/rdc/rdci/src/RdciPolicySubSystem.cc index d4e12c8c9b..c8bb6fa15e 100644 --- a/projects/rdc/rdci/src/RdciPolicySubSystem.cc +++ b/projects/rdc/rdci/src/RdciPolicySubSystem.cc @@ -185,8 +185,18 @@ int rdc_policy_callback(rdc_policy_callback_response_t* userData) { return 0; } + int64_t value = userData->value; + int64_t threshold = userData->condition.value; + if (userData->condition.type == RDC_POLICY_COND_THERMAL) { + value /= 1000; + threshold /= 1000; + } else if (userData->condition.type == RDC_POLICY_COND_POWER) { + value /= 1000000; + threshold /= 1000000; + } + std::cout << "A " << condition_type_to_str(userData->condition.type) << " exceeds the threshold " - << userData->condition.value << " with the value " << userData->value << std::endl; + << threshold << " with the value " << value << std::endl; last_time = now; // update the last time return 0; } @@ -216,10 +226,10 @@ void RdciPolicySubSystem::process() { policy.condition = {RDC_POLICY_COND_MAX_PAGE_RETRIED, option.second}; break; case POLICY_OPT_TEMP: - policy.condition = {RDC_POLICY_COND_THERMAL, option.second}; + policy.condition = {RDC_POLICY_COND_THERMAL, option.second * 1000}; break; case POLICY_OPT_POWER: - policy.condition = {RDC_POLICY_COND_POWER, option.second}; + policy.condition = {RDC_POLICY_COND_POWER, option.second * 1000000}; break; case POLICY_OPT_ACTION: if (option.second == 0) { @@ -264,15 +274,21 @@ void RdciPolicySubSystem::process() { if (policies[i].condition.type == RDC_POLICY_COND_MAX_PAGE_RETRIED) { std::cout << "| Page Retirement\t " << "| " << policies[i].condition.value; + if (policies[i].condition.value < 100) { + std::cout << "\t"; + } } else if (policies[i].condition.type == RDC_POLICY_COND_THERMAL) { std::cout << "| Temperature Limit\t " - << "| " << policies[i].condition.value; + << "| " << policies[i].condition.value / 1000; + if (policies[i].condition.value / 1000 < 100) { + std::cout << "\t"; + } } else if (policies[i].condition.type == RDC_POLICY_COND_POWER) { std::cout << "| Power Limit \t " - << "| " << policies[i].condition.value; - } - if (policies[i].condition.value < 100) { - std::cout << "\t"; + << "| " << policies[i].condition.value / 1000000; + if (policies[i].condition.value / 1000000 < 100) { + std::cout << "\t"; + } } if (policies[i].action == 0) { std::cout << "\t\t| Notify\t\t|\n"; diff --git a/projects/rdc/server/src/rdc_api_service.cc b/projects/rdc/server/src/rdc_api_service.cc index 9fe50df4d4..626a7511d0 100644 --- a/projects/rdc/server/src/rdc_api_service.cc +++ b/projects/rdc/server/src/rdc_api_service.cc @@ -886,11 +886,13 @@ int RdcAPIServiceImpl::PolicyCallback(rdc_policy_callback_response_t* userData) return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents"); } - policy_thread_context* data = new policy_thread_context; - data->mutex = PTHREAD_MUTEX_INITIALIZER; - data->cond = PTHREAD_COND_INITIALIZER; - data->start = true; - policy_threads_.insert(std::make_pair(request->group_id(), data)); + if (policy_threads_.size() == 0) { + policy_thread_context* data = new policy_thread_context; + data->mutex = PTHREAD_MUTEX_INITIALIZER; + data->cond = PTHREAD_COND_INITIALIZER; + data->start = true; + policy_threads_.insert(std::make_pair(request->group_id(), data)); + } auto updater = std::async(std::launch::async, [this, request, writer]() { rdc_status_t result = rdc_policy_register(rdc_handle_, request->group_id(), PolicyCallback); @@ -898,6 +900,7 @@ int RdcAPIServiceImpl::PolicyCallback(rdc_policy_callback_response_t* userData) auto it = policy_threads_.find(request->group_id()); if (it != policy_threads_.end()) { policy_thread_context* ctx = it->second; + ctx->start = true; while (ctx->start) { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts);