Disable bulk fetch. Add environment variable to enable it
RDC can optimize by bulk fetching multiple metrics using a single rocm_smi call. However, currently this is not completely supported in all ASIC generations. By default disable this for now. Set environment variable RDC_BULK_FETCH_ENABLED=TRUE to enable RDC bulk fetch. BUG: SWDEV-289316 Change-Id: Ibb55514f198356dccf5f47bb0fd2d53c17acb251
Este commit está contenido en:
@@ -70,6 +70,7 @@ class RdcSmiLib : public RdcTelemetry, public RdcDiagnostic {
|
||||
|
||||
private:
|
||||
RdcMetricFetcherPtr metric_fetcher_;
|
||||
bool bulk_fetch_enabled_;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<RdcSmiLib> RdcSmiLibPtr;
|
||||
|
||||
@@ -607,7 +607,7 @@ rdc_status_t RdcMetricFetcherImpl::delete_rsmi_handle(RdcFieldKey fk) {
|
||||
}
|
||||
|
||||
rdc_status_t RdcMetricFetcherImpl::acquire_rsmi_handle(RdcFieldKey fk) {
|
||||
rdc_status_t ret;
|
||||
rdc_status_t ret = RDC_ST_OK;
|
||||
|
||||
auto get_evnt_handle = [&](rsmi_event_group_t grp) {
|
||||
rsmi_event_handle_t handle;
|
||||
|
||||
@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <functional>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/impl/RdcSmiLib.h"
|
||||
@@ -29,7 +31,15 @@ THE SOFTWARE.
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
RdcSmiLib::RdcSmiLib(const RdcMetricFetcherPtr& mf): metric_fetcher_(mf) {
|
||||
RdcSmiLib::RdcSmiLib(const RdcMetricFetcherPtr& mf): metric_fetcher_(mf),
|
||||
bulk_fetch_enabled_(false) { // Disable bulk fetch by default.
|
||||
char* bulk_env = getenv("RDC_BULK_FETCH_ENABLED");
|
||||
if (bulk_env != nullptr && strcasecmp(bulk_env, "true") == 0) {
|
||||
RDC_LOG(RDC_DEBUG, "Bulk fetch enabled.");
|
||||
bulk_fetch_enabled_ = true;
|
||||
} else {
|
||||
RDC_LOG(RDC_DEBUG, "Bulk fetch disabled.");
|
||||
}
|
||||
}
|
||||
|
||||
// Bulk fetch wrapper for the rocm_smi_lib. This will be replaced after
|
||||
@@ -46,15 +56,17 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields,
|
||||
|
||||
// Bulk fetch fields
|
||||
std::vector<rdc_gpu_field_value_t> bulk_results;
|
||||
rdc_status_t status = metric_fetcher_->bulk_fetch_smi_fields(
|
||||
fields, fields_count, bulk_results);
|
||||
RDC_LOG(RDC_DEBUG, "Bulk fetched " << bulk_results.size()
|
||||
if (bulk_fetch_enabled_) {
|
||||
rdc_status_t status = metric_fetcher_->bulk_fetch_smi_fields(
|
||||
fields, fields_count, bulk_results);
|
||||
RDC_LOG(RDC_DEBUG, "Bulk fetched " << bulk_results.size()
|
||||
<< " fields from rocm_smi_lib which return " << status);
|
||||
if (bulk_results.size() > 0) {
|
||||
rdc_status_t status = callback(&bulk_results[0],
|
||||
if (bulk_results.size() > 0) {
|
||||
rdc_status_t status = callback(&bulk_results[0],
|
||||
bulk_results.size(), user_data);
|
||||
if (status != RDC_ST_OK) {
|
||||
if (status != RDC_ST_OK) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Referencia en una nueva incidencia
Block a user