Disable bulk fetch. Add environment variable to enable it

RDC can optimize by bulk fetching multiple metrics using a single
rocm_smi call. However, currently this is not completely supported in
all ASIC generations. By default disable this for now.

Set environment variable RDC_BULK_FETCH_ENABLED=TRUE to enable
RDC bulk fetch.

BUG: SWDEV-289316

Change-Id: Ibb55514f198356dccf5f47bb0fd2d53c17acb251
Este commit está contenido en:
Bill(Shuzhou) Liu
2021-06-04 13:48:06 -04:00
padre eab3625d65
commit 673f5a4ee1
Se han modificado 3 ficheros con 21 adiciones y 8 borrados
+1
Ver fichero
@@ -70,6 +70,7 @@ class RdcSmiLib : public RdcTelemetry, public RdcDiagnostic {
private:
RdcMetricFetcherPtr metric_fetcher_;
bool bulk_fetch_enabled_;
};
typedef std::shared_ptr<RdcSmiLib> RdcSmiLibPtr;
+1 -1
Ver fichero
@@ -607,7 +607,7 @@ rdc_status_t RdcMetricFetcherImpl::delete_rsmi_handle(RdcFieldKey fk) {
}
rdc_status_t RdcMetricFetcherImpl::acquire_rsmi_handle(RdcFieldKey fk) {
rdc_status_t ret;
rdc_status_t ret = RDC_ST_OK;
auto get_evnt_handle = [&](rsmi_event_group_t grp) {
rsmi_event_handle_t handle;
+19 -7
Ver fichero
@@ -20,6 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
#include <functional>
#include <stdlib.h>
#include <strings.h>
#include "rdc_lib/rdc_common.h"
#include "rdc_lib/RdcLogger.h"
#include "rdc_lib/impl/RdcSmiLib.h"
@@ -29,7 +31,15 @@ THE SOFTWARE.
namespace amd {
namespace rdc {
RdcSmiLib::RdcSmiLib(const RdcMetricFetcherPtr& mf): metric_fetcher_(mf) {
RdcSmiLib::RdcSmiLib(const RdcMetricFetcherPtr& mf): metric_fetcher_(mf),
bulk_fetch_enabled_(false) { // Disable bulk fetch by default.
char* bulk_env = getenv("RDC_BULK_FETCH_ENABLED");
if (bulk_env != nullptr && strcasecmp(bulk_env, "true") == 0) {
RDC_LOG(RDC_DEBUG, "Bulk fetch enabled.");
bulk_fetch_enabled_ = true;
} else {
RDC_LOG(RDC_DEBUG, "Bulk fetch disabled.");
}
}
// Bulk fetch wrapper for the rocm_smi_lib. This will be replaced after
@@ -46,15 +56,17 @@ rdc_status_t RdcSmiLib::rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields,
// Bulk fetch fields
std::vector<rdc_gpu_field_value_t> bulk_results;
rdc_status_t status = metric_fetcher_->bulk_fetch_smi_fields(
fields, fields_count, bulk_results);
RDC_LOG(RDC_DEBUG, "Bulk fetched " << bulk_results.size()
if (bulk_fetch_enabled_) {
rdc_status_t status = metric_fetcher_->bulk_fetch_smi_fields(
fields, fields_count, bulk_results);
RDC_LOG(RDC_DEBUG, "Bulk fetched " << bulk_results.size()
<< " fields from rocm_smi_lib which return " << status);
if (bulk_results.size() > 0) {
rdc_status_t status = callback(&bulk_results[0],
if (bulk_results.size() > 0) {
rdc_status_t status = callback(&bulk_results[0],
bulk_results.size(), user_data);
if (status != RDC_ST_OK) {
if (status != RDC_ST_OK) {
return status;
}
}
}