Fix unit issue in policy feature (#78)

1. For temperature the unit in milli Celsius
2. For power the unit in microwatts.
3. Fix second register call to rdcd doesn't functional because start flag

Co-authored-by: Chao Fei <chao.fei@amd.com>

[ROCm/rdc commit: bd7d7c99c1]
Bu işleme şunda yer alıyor:
Li, Star
2025-01-06 09:21:08 +08:00
işlemeyi yapan: GitHub
ebeveyn 20f3ba845c
işleme 474eb81053
3 değiştirilmiş dosya ile 33 ekleme ve 14 silme
+1 -1
Dosyayı Görüntüle
@@ -593,7 +593,7 @@ typedef enum {
RDC_POLICY_COND_FIRST = 0,
RDC_POLICY_COND_MAX_PAGE_RETRIED = RDC_POLICY_COND_FIRST, //!< Max number of page retired
RDC_POLICY_COND_THERMAL, //!< Temperature threshold, millidegree Celsius
RDC_POLICY_COND_POWER, //!< Power threshold, unit milliwatt
RDC_POLICY_COND_POWER, //!< Power threshold, unit microwatt
RDC_POLICY_COND_LAST = RDC_POLICY_COND_POWER,
RDC_POLICY_COND_MAX
} rdc_policy_condition_type_t;
+24 -8
Dosyayı Görüntüle
@@ -185,8 +185,18 @@ int rdc_policy_callback(rdc_policy_callback_response_t* userData) {
return 0;
}
int64_t value = userData->value;
int64_t threshold = userData->condition.value;
if (userData->condition.type == RDC_POLICY_COND_THERMAL) {
value /= 1000;
threshold /= 1000;
} else if (userData->condition.type == RDC_POLICY_COND_POWER) {
value /= 1000000;
threshold /= 1000000;
}
std::cout << "A " << condition_type_to_str(userData->condition.type) << " exceeds the threshold "
<< userData->condition.value << " with the value " << userData->value << std::endl;
<< threshold << " with the value " << value << std::endl;
last_time = now; // update the last time
return 0;
}
@@ -216,10 +226,10 @@ void RdciPolicySubSystem::process() {
policy.condition = {RDC_POLICY_COND_MAX_PAGE_RETRIED, option.second};
break;
case POLICY_OPT_TEMP:
policy.condition = {RDC_POLICY_COND_THERMAL, option.second};
policy.condition = {RDC_POLICY_COND_THERMAL, option.second * 1000};
break;
case POLICY_OPT_POWER:
policy.condition = {RDC_POLICY_COND_POWER, option.second};
policy.condition = {RDC_POLICY_COND_POWER, option.second * 1000000};
break;
case POLICY_OPT_ACTION:
if (option.second == 0) {
@@ -264,15 +274,21 @@ void RdciPolicySubSystem::process() {
if (policies[i].condition.type == RDC_POLICY_COND_MAX_PAGE_RETRIED) {
std::cout << "| Page Retirement\t "
<< "| " << policies[i].condition.value;
if (policies[i].condition.value < 100) {
std::cout << "\t";
}
} else if (policies[i].condition.type == RDC_POLICY_COND_THERMAL) {
std::cout << "| Temperature Limit\t "
<< "| " << policies[i].condition.value;
<< "| " << policies[i].condition.value / 1000;
if (policies[i].condition.value / 1000 < 100) {
std::cout << "\t";
}
} else if (policies[i].condition.type == RDC_POLICY_COND_POWER) {
std::cout << "| Power Limit \t "
<< "| " << policies[i].condition.value;
}
if (policies[i].condition.value < 100) {
std::cout << "\t";
<< "| " << policies[i].condition.value / 1000000;
if (policies[i].condition.value / 1000000 < 100) {
std::cout << "\t";
}
}
if (policies[i].action == 0) {
std::cout << "\t\t| Notify\t\t|\n";
+8 -5
Dosyayı Görüntüle
@@ -886,11 +886,13 @@ int RdcAPIServiceImpl::PolicyCallback(rdc_policy_callback_response_t* userData)
return ::grpc::Status(::grpc::StatusCode::INTERNAL, "Empty contents");
}
policy_thread_context* data = new policy_thread_context;
data->mutex = PTHREAD_MUTEX_INITIALIZER;
data->cond = PTHREAD_COND_INITIALIZER;
data->start = true;
policy_threads_.insert(std::make_pair(request->group_id(), data));
if (policy_threads_.size() == 0) {
policy_thread_context* data = new policy_thread_context;
data->mutex = PTHREAD_MUTEX_INITIALIZER;
data->cond = PTHREAD_COND_INITIALIZER;
data->start = true;
policy_threads_.insert(std::make_pair(request->group_id(), data));
}
auto updater = std::async(std::launch::async, [this, request, writer]() {
rdc_status_t result = rdc_policy_register(rdc_handle_, request->group_id(), PolicyCallback);
@@ -898,6 +900,7 @@ int RdcAPIServiceImpl::PolicyCallback(rdc_policy_callback_response_t* userData)
auto it = policy_threads_.find(request->group_id());
if (it != policy_threads_.end()) {
policy_thread_context* ctx = it->second;
ctx->start = true;
while (ctx->start) {
struct timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);