rdc_field_t replaces uint32_t; centralize field data
Make the RDC use the new rdc_field_t enum instead of uint32_t.
This will help prevent invalid field types from being passed in.
Also, centralize where data related to fields is kept. This will
reduce the number of places where changes are required each
time a new field is added.
Finally, cleaned up several cpplint issues.
Change-Id: I48e4512e18c164411d8b09ae3d4bed99fba359ec
[ROCm/rdc commit: 5950ebadc4]
Este cometimento está contido em:
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
FLD_DESC_ENT(RDC_FI_INVALID, "Unknown/Invalid field", "INVALID")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_COUNT, "GPU count in the system", "GPU_COUNT")
|
||||
FLD_DESC_ENT(RDC_FI_DEV_NAME, "Name of the device", "DEV_NAME")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_CLOCK, "Current GPU clock frequencies.", "GPU_CLOCK")
|
||||
FLD_DESC_ENT(RDC_FI_MEM_CLOCK, "Current Memory clock frequencies.", "MEM_CLOCK")
|
||||
FLD_DESC_ENT(RDC_FI_MEMORY_TEMP, "Memory temperature in millidegrees Celsius.", "MEMORY_TEMP")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_TEMP, "GPU temperature in millidegrees Celsius.", "GPU_TEMP")
|
||||
FLD_DESC_ENT(RDC_FI_POWER_USAGE, "Power usage in microwatts.", "POWER_USAGE")
|
||||
FLD_DESC_ENT(RDC_FI_PCIE_TX, "PCIe Tx utilization in bytes/second.", "PCIE_TX")
|
||||
FLD_DESC_ENT(RDC_FI_PCIE_RX, "PCIe Rx utilization in bytes/second.", "PCIE_RX")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_UTIL, "GPU busy percentage.", "GPU_UTIL")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_USAGE,
|
||||
"Memory usage of the GPU instance in bytes.", "GPU_MEMORY_USAGE")
|
||||
FLD_DESC_ENT(RDC_FI_GPU_MEMORY_TOTAL, "Total memory of the GPU instance", "GPU_MEMORY_TOTAL")
|
||||
FLD_DESC_ENT(RDC_FI_ECC_CORRECT_TOTAL, "Accumulated correctable ECC errors.", "ECC_CORRECT")
|
||||
FLD_DESC_ENT(RDC_FI_ECC_UNCORRECT_TOTAL,
|
||||
"Accumulated uncorrectable ECC errors.", "ECC_UNCORRECT")
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#include <assert.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rdc/rdc.h"
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
#define FLD_DESC_ENT(ID, DESC, LABEL) \
|
||||
{static_cast<uint32_t>(ID), {#ID, (DESC), (LABEL)}},
|
||||
static const fld_id2name_map_t field_id_to_descript = {
|
||||
#include "common/rdc_field_data.data"
|
||||
};
|
||||
#undef FLD_DESC_ENT
|
||||
|
||||
#define FLD_DESC_ENT(ID, DESC, LABEL) {#ID, (ID)},
|
||||
static fld_name2id_map_t field_name_to_id = {
|
||||
#include "common/rdc_field_data.data" // NOLINT
|
||||
};
|
||||
#undef FLD_DESC_ENT
|
||||
|
||||
|
||||
|
||||
amd::rdc::fld_id2name_map_t &
|
||||
get_field_id_description_from_id(void) {
|
||||
return field_id_to_descript;
|
||||
}
|
||||
|
||||
bool get_field_id_from_name(const std::string name, rdc_field_t *value) {
|
||||
assert(value != nullptr);
|
||||
auto id = field_name_to_id.find(name);
|
||||
if (id == field_name_to_id.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*value = static_cast<rdc_field_t>(id->second);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_field_valid(rdc_field_t field_id) {
|
||||
if (field_id == RDC_FI_INVALID) {
|
||||
return false;
|
||||
}
|
||||
return field_id_to_descript.find(static_cast<uint32_t>(field_id)) !=
|
||||
field_id_to_descript.end();
|
||||
}
|
||||
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef COMMON_RDC_FIELDS_SUPPORTED_H_
|
||||
#define COMMON_RDC_FIELDS_SUPPORTED_H_
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
typedef struct {
|
||||
std::string enum_name;
|
||||
std::string description;
|
||||
std::string label;
|
||||
} field_id_descript;
|
||||
|
||||
typedef const std::unordered_map<uint32_t, const field_id_descript>
|
||||
fld_id2name_map_t;
|
||||
typedef std::unordered_map<std::string, uint32_t> fld_name2id_map_t;
|
||||
|
||||
bool get_field_id_from_name(const std::string name, rdc_field_t *value);
|
||||
fld_id2name_map_t & get_field_id_description_from_id(void); // NOLINT
|
||||
bool is_field_valid(rdc_field_t field_id);
|
||||
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // COMMON_RDC_FIELDS_SUPPORTED_H_
|
||||
@@ -129,7 +129,7 @@ int main(int, char **) {
|
||||
|
||||
// Create the field groups to monitor POWER and TEMP
|
||||
rdc_field_grp_t field_group_id;
|
||||
uint32_t field_ids[2];
|
||||
rdc_field_t field_ids[2];
|
||||
field_ids[0] = RDC_FI_GPU_MEMORY_USAGE;
|
||||
field_ids[1] = RDC_FI_POWER_USAGE;
|
||||
result = rdc_group_field_create(rdc_handle, 2,
|
||||
|
||||
@@ -128,77 +128,46 @@ typedef enum {
|
||||
#define RDC_MAX_NUM_FIELD_GROUPS 64
|
||||
|
||||
/**
|
||||
* Memory usage of the GPU instance
|
||||
* These enums are used to specify a particular field to be retrieved.
|
||||
*/
|
||||
#define RDC_FI_GPU_MEMORY_USAGE 525
|
||||
typedef enum {
|
||||
RDC_FI_INVALID = 0, //!< Invalid field value
|
||||
//!< @brief Identifier fields
|
||||
RDC_FI_GPU_COUNT = 1, //!< GPU count in the system
|
||||
RDC_FI_DEV_NAME, //!< Name of the device
|
||||
|
||||
/**
|
||||
* Total memory of the GPU instance
|
||||
*/
|
||||
#define RDC_FI_GPU_MEMORY_TOTAL 580
|
||||
/*
|
||||
* @brief Frequency related fields
|
||||
*/
|
||||
RDC_FI_GPU_CLOCK = 100, //!< The current clock for the GPU
|
||||
RDC_FI_MEM_CLOCK, //!< Clock for the memory
|
||||
|
||||
/**
|
||||
* Power usage for the device
|
||||
*/
|
||||
#define RDC_FI_POWER_USAGE 155
|
||||
/*
|
||||
* @brief Physical monitor fields
|
||||
*/
|
||||
RDC_FI_MEMORY_TEMP = 200, //!< Memory temperature for the device
|
||||
RDC_FI_GPU_TEMP, //!< Current temperature for the device
|
||||
RDC_FI_POWER_USAGE = 300, //!< Power usage for the device
|
||||
|
||||
/**
|
||||
* The current clock for the GPU
|
||||
*/
|
||||
#define RDC_FI_GPU_CLOCK 100
|
||||
/*
|
||||
* @brief PCIe related fields
|
||||
*/
|
||||
RDC_FI_PCIE_TX = 400, //!< PCIe Tx utilization information
|
||||
RDC_FI_PCIE_RX, //!< PCIe Rx utilization information
|
||||
|
||||
/**
|
||||
* Clock for the memory
|
||||
*/
|
||||
#define RDC_FI_MEM_CLOCK 101
|
||||
|
||||
/**
|
||||
* PCIe Tx utilization information
|
||||
*/
|
||||
#define RDC_FI_PCIE_TX 200
|
||||
|
||||
/**
|
||||
* PCIe Rx utilization information
|
||||
*/
|
||||
#define RDC_FI_PCIE_RX 201
|
||||
|
||||
|
||||
/**
|
||||
* GPU Utilization
|
||||
*/
|
||||
#define RDC_FI_GPU_UTIL 203
|
||||
|
||||
/**
|
||||
* Accumulated correctable ECC errors
|
||||
*/
|
||||
#define RDC_FI_ECC_CORRECT_TOTAL 312
|
||||
|
||||
/**
|
||||
* Accumulated uncorrectable ECC errors
|
||||
*/
|
||||
#define RDC_FI_ECC_UNCORRECT_TOTAL 313
|
||||
|
||||
/**
|
||||
* Memory temperature for the device
|
||||
*/
|
||||
#define RDC_FI_MEMORY_TEMP 140
|
||||
|
||||
/**
|
||||
* Current temperature for the device
|
||||
*/
|
||||
#define RDC_FI_GPU_TEMP 150
|
||||
|
||||
|
||||
/**
|
||||
* GPU count in the system
|
||||
*/
|
||||
#define RDC_FI_GPU_COUNT 4
|
||||
|
||||
/**
|
||||
* Name of the device
|
||||
*/
|
||||
#define RDC_FI_DEV_NAME 50
|
||||
/*
|
||||
* @brief GPU usage related fields
|
||||
*/
|
||||
RDC_FI_GPU_UTIL = 500, //!< GPU Utilization
|
||||
RDC_FI_GPU_MEMORY_USAGE, //!< Memory usage of the GPU instance
|
||||
RDC_FI_GPU_MEMORY_TOTAL, //!< Total memory of the GPU instance
|
||||
|
||||
/**
|
||||
* @brief ECC related fields
|
||||
*/
|
||||
RDC_FI_ECC_CORRECT_TOTAL = 600, //!< Accumulated correctable ECC errors
|
||||
RDC_FI_ECC_UNCORRECT_TOTAL, //!< Accumulated uncorrectable ECC errors
|
||||
} rdc_field_t;
|
||||
|
||||
/**
|
||||
* @brief handlers used in various rdc calls
|
||||
@@ -273,7 +242,7 @@ typedef struct {
|
||||
* @brief The structure to store the field value
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t field_id; //!< The field id of the value
|
||||
rdc_field_t field_id; //!< The field id of the value
|
||||
int status; //!< RDC_ST_OK or error status
|
||||
uint64_t ts; //!< Timestamp in usec since 1970
|
||||
rdc_field_type_t type; //!< The field type
|
||||
@@ -294,7 +263,7 @@ typedef struct {
|
||||
/**
|
||||
* The list of fields in the group
|
||||
*/
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
} rdc_field_group_info_t;
|
||||
|
||||
/**
|
||||
@@ -647,7 +616,7 @@ rdc_status_t rdc_group_gpu_destroy(rdc_handle_t p_rdc_handle,
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle,
|
||||
uint32_t num_field_ids, uint32_t* field_ids,
|
||||
uint32_t num_field_ids, rdc_field_t* field_ids,
|
||||
const char* field_group_name, rdc_field_grp_t* rdc_field_group_id);
|
||||
|
||||
/**
|
||||
@@ -743,7 +712,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle,
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle,
|
||||
uint32_t gpu_index, uint32_t field, rdc_field_value* value);
|
||||
uint32_t gpu_index, rdc_field_t field, rdc_field_value* value);
|
||||
|
||||
/**
|
||||
* @brief Request a history cached field of a GPU
|
||||
@@ -767,7 +736,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle,
|
||||
* @retval ::RDC_ST_OK is returned upon successful call.
|
||||
*/
|
||||
rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle,
|
||||
uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp,
|
||||
uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value);
|
||||
|
||||
/**
|
||||
@@ -806,7 +775,7 @@ const char* rdc_status_string(rdc_status_t status);
|
||||
*
|
||||
* @retval The string to describe the field.
|
||||
*/
|
||||
const char* field_id_string(uint32_t field_id);
|
||||
const char* field_id_string(rdc_field_t field_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCCACHEMANAGER_H_
|
||||
#define RDC_LIB_RDCCACHEMANAGER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
@@ -36,13 +36,13 @@ namespace rdc {
|
||||
class RdcCacheManager {
|
||||
public:
|
||||
virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index,
|
||||
uint32_t field, rdc_field_value* value) = 0;
|
||||
rdc_field_t field, rdc_field_value* value) = 0;
|
||||
virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) = 0;
|
||||
virtual rdc_status_t rdc_update_cache(uint32_t gpu_index,
|
||||
const rdc_field_value& value) = 0;
|
||||
virtual rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id,
|
||||
virtual rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id,
|
||||
uint64_t max_keep_samples, double max_keep_age) = 0;
|
||||
virtual std::string get_cache_stats() = 0;
|
||||
|
||||
@@ -69,4 +69,4 @@ typedef std::shared_ptr<RdcCacheManager> RdcCacheManagerPtr;
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_RDCCACHEMANAGER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCCACHEMANAGER_H_
|
||||
|
||||
@@ -20,8 +20,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RDCI_INCLUDE_RDCEXCEPTION_H_
|
||||
#define RDCI_INCLUDE_RDCEXCEPTION_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCEXCEPTION_H_
|
||||
#define INCLUDE_RDC_LIB_RDCEXCEPTION_H_
|
||||
|
||||
#include <exception>
|
||||
#include <string>
|
||||
@@ -45,5 +45,5 @@ class RdcException : public std::exception {
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDCI_INCLUDE_RDCEXCEPTION_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCEXCEPTION_H_
|
||||
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
#define RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
#define INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
|
||||
#include <memory>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
@@ -45,7 +45,7 @@ class RdcGroupSettings {
|
||||
|
||||
|
||||
virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) = 0;
|
||||
virtual rdc_status_t rdc_group_field_destroy(
|
||||
rdc_field_grp_t rdc_field_group_id) = 0;
|
||||
@@ -64,4 +64,4 @@ const uint32_t JOB_FIELD_ID = 0;
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCGROUPSETTINGS_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCHANDLER_H_
|
||||
#define RDC_LIB_RDCHANDLER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCHANDLER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCHANDLER_H_
|
||||
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc/rdc.h"
|
||||
@@ -52,7 +52,7 @@ class RdcHandler {
|
||||
virtual rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId,
|
||||
uint32_t gpu_index) = 0;
|
||||
virtual rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) = 0;
|
||||
virtual rdc_status_t rdc_group_field_get_info(
|
||||
rdc_field_grp_t rdc_field_group_id,
|
||||
@@ -73,9 +73,9 @@ class RdcHandler {
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples) = 0;
|
||||
virtual rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index,
|
||||
uint32_t field, rdc_field_value* value) = 0;
|
||||
rdc_field_t field, rdc_field_value* value) = 0;
|
||||
virtual rdc_status_t rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) = 0;
|
||||
virtual rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id) = 0;
|
||||
@@ -89,4 +89,4 @@ class RdcHandler {
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_RDCHANDLER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCHANDLER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
#define RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
#include <dlfcn.h>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
@@ -83,4 +83,4 @@ template<typename T> rdc_status_t RdcLibraryLoader::load(const char* filename,
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCLIBRARYLOADER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCLOGGER_H_
|
||||
#define RDC_LIB_RDCLOGGER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCLOGGER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCLOGGER_H_
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <chrono> // NOLINT
|
||||
@@ -56,4 +56,4 @@ class RdcLogger {
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_RDCLOGGER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCLOGGER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCMETRICFETCHER_H_
|
||||
#define RDC_LIB_RDCMETRICFETCHER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_
|
||||
|
||||
#include <memory>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
@@ -33,8 +33,7 @@ namespace rdc {
|
||||
class RdcMetricFetcher {
|
||||
public:
|
||||
virtual rdc_status_t fetch_smi_field(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value) = 0;
|
||||
virtual bool is_field_valid(uint32_t field_id) const = 0;
|
||||
rdc_field_t field_id, rdc_field_value* value) = 0;
|
||||
virtual ~RdcMetricFetcher() {}
|
||||
};
|
||||
|
||||
@@ -43,4 +42,4 @@ typedef std::shared_ptr<RdcMetricFetcher> RdcMetricFetcherPtr;
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_RDCMETRICFETCHER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
#define RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
#define INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
@@ -39,4 +39,4 @@ typedef std::shared_ptr<RdcMetricsUpdater> RdcMetricsUpdaterPtr;
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_RDCWATCHTABLE_H_
|
||||
#define RDC_LIB_RDCWATCHTABLE_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDCWATCHTABLE_H_
|
||||
#define INCLUDE_RDC_LIB_RDCWATCHTABLE_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
@@ -57,4 +57,4 @@ typedef std::shared_ptr<RdcWatchTable> RdcWatchTablePtr;
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_RDCWATCHTABLE_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDCWATCHTABLE_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <mutex> // NOLINT(build/c++11)
|
||||
@@ -80,13 +80,13 @@ typedef std::map<std::string, RdcJobStatsCacheEntry> RdcJobStatsCache;
|
||||
class RdcCacheManagerImpl: public RdcCacheManager {
|
||||
public:
|
||||
rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index,
|
||||
uint32_t field, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_get_value_since(uint32_t gpu_index, uint32_t field,
|
||||
uint64_t since_time_stamp, uint64_t *next_since_time_stamp,
|
||||
rdc_field_value* value) override;
|
||||
rdc_field_t field, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_get_value_since(uint32_t gpu_index,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_update_cache(uint32_t gpu_index,
|
||||
const rdc_field_value& value) override;
|
||||
rdc_status_t evict_cache(uint32_t gpu_index, uint32_t field_id,
|
||||
rdc_status_t evict_cache(uint32_t gpu_index, rdc_field_t field_id,
|
||||
uint64_t max_keep_samples, double max_keep_age) override;
|
||||
std::string get_cache_stats() override;
|
||||
|
||||
@@ -120,4 +120,4 @@ class RdcCacheManagerImpl: public RdcCacheManager {
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCCACHEMANAGERIMPL_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
#define RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
|
||||
#include <future> // NOLINT(build/c++11)
|
||||
#include "rdc_lib/RdcHandler.h"
|
||||
@@ -57,7 +57,7 @@ class RdcEmbeddedHandler: public RdcHandler {
|
||||
rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId,
|
||||
uint32_t gpu_index) override;
|
||||
rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) override;
|
||||
rdc_status_t rdc_group_field_get_info(
|
||||
rdc_field_grp_t rdc_field_group_id,
|
||||
@@ -78,9 +78,9 @@ class RdcEmbeddedHandler: public RdcHandler {
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples) override;
|
||||
rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index,
|
||||
uint32_t field, rdc_field_value* value) override;
|
||||
rdc_field_t field, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id) override;
|
||||
@@ -108,4 +108,4 @@ extern "C" {
|
||||
amd::rdc::RdcHandler *make_handler(rdc_operation_mode_t op_mode);
|
||||
}
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCEMBEDDEDHANDLER_H_
|
||||
|
||||
@@ -19,9 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
|
||||
#include <memory>
|
||||
#include <map>
|
||||
@@ -46,7 +45,7 @@ class RdcGroupSettingsImpl: public RdcGroupSettings {
|
||||
rdc_gpu_group_t group_id_list[], uint32_t* count) override;
|
||||
|
||||
rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) override;
|
||||
rdc_status_t rdc_group_field_destroy(
|
||||
rdc_field_grp_t rdc_field_group_id) override;
|
||||
@@ -71,4 +70,4 @@ class RdcGroupSettingsImpl: public RdcGroupSettings {
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCGROUPSETTINGSIMPL_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
|
||||
#include <mutex> // NOLINT(build/c++11)
|
||||
#include <future> // NOLINT(build/c++11)
|
||||
@@ -52,18 +52,17 @@ struct MetricTask {
|
||||
class RdcMetricFetcherImpl: public RdcMetricFetcher {
|
||||
public:
|
||||
rdc_status_t fetch_smi_field(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value) override;
|
||||
bool is_field_valid(uint32_t field_id) const override;
|
||||
rdc_field_t field_id, rdc_field_value* value) override;
|
||||
RdcMetricFetcherImpl();
|
||||
~RdcMetricFetcherImpl();
|
||||
private:
|
||||
uint64_t now();
|
||||
void get_ecc_error(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value);
|
||||
rdc_field_t field_id, rdc_field_value* value);
|
||||
|
||||
//!< return true if starting async_get
|
||||
bool async_get_pcie_throughput(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value);
|
||||
rdc_field_t field_id, rdc_field_value* value);
|
||||
void get_pcie_throughput(const RdcFieldKey& key);
|
||||
|
||||
//!< Async metric retreive
|
||||
@@ -78,4 +77,4 @@ class RdcMetricFetcherImpl: public RdcMetricFetcher {
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICFETCHERIMPL_H_
|
||||
|
||||
@@ -19,10 +19,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
|
||||
#include <future>
|
||||
#include <future> // NOLINT(build/c++11)
|
||||
#include <memory>
|
||||
#include "rdc_lib/RdcMetricsUpdater.h"
|
||||
#include "rdc_lib/RdcWatchTable.h"
|
||||
@@ -46,4 +46,4 @@ class RdcMetricsUpdaterImpl: public RdcMetricsUpdater {
|
||||
} // namespace rdc
|
||||
} // namespace amd
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
#define RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <memory>
|
||||
#include "rdc.grpc.pb.h" // NOLINT
|
||||
@@ -53,7 +53,7 @@ class RdcStandaloneHandler: public RdcHandler {
|
||||
rdc_status_t rdc_group_gpu_add(rdc_gpu_group_t groupId,
|
||||
uint32_t gpu_index) override;
|
||||
rdc_status_t rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) override;
|
||||
rdc_status_t rdc_group_field_get_info(
|
||||
rdc_field_grp_t rdc_field_group_id,
|
||||
@@ -74,9 +74,9 @@ class RdcStandaloneHandler: public RdcHandler {
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples) override;
|
||||
rdc_status_t rdc_field_get_latest_value(uint32_t gpu_index,
|
||||
uint32_t field, rdc_field_value* value) override;
|
||||
rdc_field_t field, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) override;
|
||||
rdc_status_t rdc_field_unwatch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id) override;
|
||||
@@ -107,4 +107,4 @@ extern "C" {
|
||||
const char* root_ca, const char* client_cert, const char* client_key);
|
||||
}
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCSTANDALONEHANDLER_H_
|
||||
|
||||
@@ -19,8 +19,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
#ifndef RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
#define RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
#ifndef INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
#define INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
@@ -86,7 +86,8 @@ class RdcWatchTableImpl : public RdcWatchTable {
|
||||
|
||||
private:
|
||||
//!< Helper function to Update the fields_in_table when unwatch tables
|
||||
rdc_status_t update_field_in_table_when_unwatch(const RdcFieldKey& entry);
|
||||
rdc_status_t update_field_in_table_when_unwatch(
|
||||
const RdcFieldGroupKey& entry);
|
||||
|
||||
//!< Helper function to clean up the watch table and cache
|
||||
void clean_up();
|
||||
@@ -99,15 +100,15 @@ class RdcWatchTableImpl : public RdcWatchTable {
|
||||
rdc_field_grp_t field_group_id,
|
||||
std::vector<RdcFieldKey> & fields); // NOLINT
|
||||
|
||||
bool is_job_watch_field(uint32_t gpu_index, uint32_t field_id,
|
||||
std::string& job_id) const; // NOLINT
|
||||
bool is_job_watch_field(uint32_t gpu_index, rdc_field_t field_id,
|
||||
std::string& job_id) const; // NOLINT
|
||||
|
||||
RdcGroupSettingsPtr group_settings_;
|
||||
RdcCacheManagerPtr cache_mgr_;
|
||||
RdcMetricFetcherPtr metric_fetcher_;
|
||||
|
||||
//!< The watch table to store the watch settings.
|
||||
std::map<RdcFieldKey, FieldSettings> watch_table_;
|
||||
std::map<RdcFieldGroupKey, FieldSettings> watch_table_;
|
||||
|
||||
//!< <job_id, gpu_group_id> pairs
|
||||
std::map<std::string, JobWatchTableEntry> job_watch_table_;
|
||||
@@ -129,4 +130,4 @@ class RdcWatchTableImpl : public RdcWatchTable {
|
||||
} // namespace amd
|
||||
|
||||
|
||||
#endif // RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
#endif // INCLUDE_RDC_LIB_IMPL_RDCWATCHTABLEIMPL_H_
|
||||
|
||||
@@ -20,12 +20,14 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef RDC_LIB_RDC_COMMON_H_
|
||||
#define RDC_LIB_RDC_COMMON_H_
|
||||
#ifndef INCLUDE_RDC_LIB_RDC_COMMON_H_
|
||||
#define INCLUDE_RDC_LIB_RDC_COMMON_H_
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
#define RDC_ERROR 0
|
||||
#define RDC_INFO 1
|
||||
#define RDC_DEBUG 2
|
||||
@@ -40,7 +42,10 @@ THE SOFTWARE.
|
||||
} while (0)
|
||||
|
||||
//<! The key to identify the field with <gpu_id, field_id>
|
||||
typedef std::pair<uint32_t, uint32_t> RdcFieldKey;
|
||||
typedef std::pair<uint32_t, rdc_field_t> RdcFieldKey;
|
||||
|
||||
//<! The key to identify the field with <gpu_id, field_group_id>
|
||||
typedef std::pair<uint32_t, uint32_t> RdcFieldGroupKey;
|
||||
|
||||
//!< The gauge metrics do not require aggregations
|
||||
typedef std::map<RdcFieldKey, uint64_t> rdc_gpu_gauges_t;
|
||||
@@ -62,4 +67,4 @@ typedef std::map<RdcFieldKey, uint64_t> rdc_gpu_gauges_t;
|
||||
char *strncpy_with_null(char *dest, const char *src, size_t n);
|
||||
|
||||
|
||||
#endif // RDC_LIB_RDC_COMMON_H_
|
||||
#endif // INCLUDE_RDC_LIB_RDC_COMMON_H_
|
||||
|
||||
@@ -63,6 +63,7 @@ endif()
|
||||
|
||||
set(SRC_DIR "${PROJECT_SOURCE_DIR}/rdc_libs")
|
||||
set(RDC_LIB_INC_DIR "${PROJECT_SOURCE_DIR}/include")
|
||||
set(COMMON_DIR "${PROJECT_SOURCE_DIR}/common")
|
||||
|
||||
|
||||
################# Determine the library version #########################
|
||||
@@ -112,12 +113,13 @@ set(BOOTSTRAP_LIB_COMPONENT "lib${BOOTSTRAP_LIB}")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST "${SRC_DIR}/bootstrap/src/RdcBootStrap.cc")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLogger.cc")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${SRC_DIR}/bootstrap/src/RdcLibraryLoader.cc")
|
||||
set(BOOTSTRAP_LIB_SRC_LIST ${BOOTSTRAP_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc")
|
||||
set(BOOTSTRAP_LIB_INC_LIST "${RDC_LIB_INC_DIR}/rdc/rdc.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/rdc_common.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLogger.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcHandler.h")
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcLibraryLoader.h")
|
||||
|
||||
set(BOOTSTRAP_LIB_INC_LIST ${BOOTSTRAP_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h")
|
||||
message("BOOTSTRAP_LIB_INC_LIST=${BOOTSTRAP_LIB_INC_LIST}")
|
||||
|
||||
add_library(${BOOTSTRAP_LIB} SHARED ${BOOTSTRAP_LIB_SRC_LIST} ${BOOTSTRAP_LIB_INC_LIST})
|
||||
@@ -125,6 +127,7 @@ target_link_libraries(${BOOTSTRAP_LIB} pthread dl)
|
||||
target_include_directories(${BOOTSTRAP_LIB} PRIVATE
|
||||
"${PROJECT_SOURCE_DIR}"
|
||||
"${PROJECT_SOURCE_DIR}/include"
|
||||
"${COMMON_DIR}"
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include")
|
||||
|
||||
# TODO: set the properties for the library once we have one
|
||||
@@ -143,6 +146,7 @@ set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcGroupSettingsImp
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcCacheManagerImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcMetricsUpdaterImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${SRC_DIR}/rdc/src/RdcWatchTableImpl.cc")
|
||||
set(RDC_LIB_SRC_LIST ${RDC_LIB_SRC_LIST} "${COMMON_DIR}/rdc_fields_supported.cc")
|
||||
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcEmbeddedHandler.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricFetcher.h")
|
||||
@@ -155,6 +159,7 @@ set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcMetricsU
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcMetricsUpdaterImpl.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/RdcWatchTable.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${RDC_LIB_INC_DIR}/rdc_lib/impl/RdcWatchTableImpl.h")
|
||||
set(RDC_LIB_INC_LIST ${RDC_LIB_INC_LIST} "${COMMON_DIR}/rdc_fields_supported.h")
|
||||
|
||||
message("RDC_LIB_INC_LIST=${RDC_LIB_INC_LIST}")
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ THE SOFTWARE.
|
||||
#include <dlfcn.h>
|
||||
#include <string.h>
|
||||
#include <map>
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcHandler.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
@@ -204,7 +205,7 @@ rdc_status_t rdc_device_get_attributes(rdc_handle_t p_rdc_handle,
|
||||
}
|
||||
|
||||
rdc_status_t rdc_group_field_create(rdc_handle_t p_rdc_handle,
|
||||
uint32_t num_field_ids, uint32_t* field_ids,
|
||||
uint32_t num_field_ids, rdc_field_t* field_ids,
|
||||
const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) {
|
||||
if (!p_rdc_handle || !field_ids ||
|
||||
!field_group_name || !rdc_field_group_id) {
|
||||
@@ -270,7 +271,7 @@ rdc_status_t rdc_field_watch(rdc_handle_t p_rdc_handle,
|
||||
}
|
||||
|
||||
rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle,
|
||||
uint32_t gpu_index, uint32_t field, rdc_field_value* value) {
|
||||
uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) {
|
||||
if (!p_rdc_handle || !value) {
|
||||
return RDC_ST_INVALID_HANDLER;
|
||||
}
|
||||
@@ -280,7 +281,7 @@ rdc_status_t rdc_field_get_latest_value(rdc_handle_t p_rdc_handle,
|
||||
}
|
||||
|
||||
rdc_status_t rdc_field_get_value_since(rdc_handle_t p_rdc_handle,
|
||||
uint32_t gpu_index, uint32_t field, uint64_t since_time_stamp,
|
||||
uint32_t gpu_index, rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) {
|
||||
if (!p_rdc_handle || !next_since_time_stamp || !value) {
|
||||
return RDC_ST_INVALID_HANDLER;
|
||||
@@ -350,30 +351,10 @@ const char* rdc_status_string(rdc_status_t result) {
|
||||
}
|
||||
}
|
||||
|
||||
const char* field_id_string(uint32_t field_id) {
|
||||
const std::map<uint16_t, const char*> id_name = {
|
||||
{RDC_FI_GPU_MEMORY_USAGE, "GPU_MEMORY_USAGE"},
|
||||
{RDC_FI_GPU_MEMORY_TOTAL, "GPU_MEMORY_TOTAL"},
|
||||
{RDC_FI_POWER_USAGE, "POWER_USAGE"},
|
||||
{RDC_FI_GPU_CLOCK, "GPU_CLOCK"},
|
||||
{RDC_FI_GPU_UTIL, "GPU_UTIL"},
|
||||
{RDC_FI_GPU_TEMP, "GPU_TEMP"},
|
||||
{RDC_FI_GPU_COUNT, "GPU_COUNT"},
|
||||
{RDC_FI_MEM_CLOCK, "MEM_CLOCK"},
|
||||
{RDC_FI_PCIE_TX, "PCIE_TX"},
|
||||
{RDC_FI_PCIE_RX, "PCIE_RX"},
|
||||
{RDC_FI_ECC_CORRECT_TOTAL, "ECC_CORRECT"},
|
||||
{RDC_FI_ECC_UNCORRECT_TOTAL, "ECC_UNCORRECT"},
|
||||
{RDC_FI_MEMORY_TEMP, "MEMORY_TEMP"},
|
||||
{RDC_FI_DEV_NAME, "DEV_NAME"}
|
||||
};
|
||||
|
||||
auto search = id_name.find(field_id);
|
||||
if (search == id_name.end()) {
|
||||
return "UNKNOWN_FIELD";
|
||||
}
|
||||
|
||||
return search->second;
|
||||
const char* field_id_string(rdc_field_t field_id) {
|
||||
amd::rdc::fld_id2name_map_t &field_id_to_descript =
|
||||
amd::rdc::get_field_id_description_from_id();
|
||||
return field_id_to_descript.find(field_id)->second.label.c_str();
|
||||
}
|
||||
|
||||
char *strncpy_with_null(char *dest, const char *src, size_t n) {
|
||||
|
||||
@@ -32,7 +32,7 @@ namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since(
|
||||
uint32_t gpu_index, uint32_t field_id, uint64_t since_time_stamp,
|
||||
uint32_t gpu_index, rdc_field_t field_id, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) {
|
||||
if (!next_since_time_stamp || !value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
@@ -72,7 +72,7 @@ rdc_status_t RdcCacheManagerImpl::rdc_field_get_value_since(
|
||||
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index,
|
||||
uint32_t field_id, uint64_t max_keep_samples, double max_keep_age) {
|
||||
rdc_field_t field_id, uint64_t max_keep_samples, double max_keep_age) {
|
||||
std::lock_guard<std::mutex> guard(cache_mutex_);
|
||||
|
||||
RdcFieldKey field{gpu_index, field_id};
|
||||
@@ -108,7 +108,7 @@ rdc_status_t RdcCacheManagerImpl::evict_cache(uint32_t gpu_index,
|
||||
}
|
||||
|
||||
rdc_status_t RdcCacheManagerImpl::rdc_field_get_latest_value(
|
||||
uint32_t gpu_index, uint32_t field_id, rdc_field_value* value) {
|
||||
uint32_t gpu_index, rdc_field_t field_id, rdc_field_value* value) {
|
||||
if (!value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
@@ -29,6 +29,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
namespace {
|
||||
@@ -259,7 +260,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids,
|
||||
uint32_t* field_ids, const char* field_group_name,
|
||||
rdc_field_t* field_ids, const char* field_group_name,
|
||||
rdc_field_grp_t* rdc_field_group_id) {
|
||||
if (!field_group_name || !rdc_field_group_id || !field_ids) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
@@ -268,7 +269,7 @@ rdc_status_t RdcEmbeddedHandler::rdc_group_field_create(uint32_t num_field_ids,
|
||||
// Check the field is valid or not
|
||||
if (num_field_ids <= RDC_MAX_FIELD_IDS_PER_FIELD_GROUP) {
|
||||
for (uint32_t i = 0; i < num_field_ids; i++) {
|
||||
if (!metric_fetcher_->is_field_valid(field_ids[i])) {
|
||||
if (!is_field_valid(field_ids[i])) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to create field group with unknown field id "
|
||||
<< field_ids[i]);
|
||||
@@ -341,11 +342,11 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value(
|
||||
uint32_t gpu_index, uint32_t field, rdc_field_value* value) {
|
||||
uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) {
|
||||
if (!value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!metric_fetcher_->is_field_valid(field)) {
|
||||
if (!is_field_valid(field)) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to get latest value with unknown field id "
|
||||
<< field);
|
||||
@@ -355,12 +356,12 @@ rdc_status_t RdcEmbeddedHandler::rdc_field_get_latest_value(
|
||||
}
|
||||
|
||||
rdc_status_t RdcEmbeddedHandler::rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) {
|
||||
if (!next_since_time_stamp || !value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
if (!metric_fetcher_->is_field_valid(field)) {
|
||||
if (!is_field_valid(field)) {
|
||||
RDC_LOG(RDC_INFO,
|
||||
"Fail to get value since with unknown field id "
|
||||
<< field);
|
||||
|
||||
@@ -29,7 +29,7 @@ namespace rdc {
|
||||
|
||||
RdcGroupSettingsImpl::RdcGroupSettingsImpl() {
|
||||
// Add the default job stats fields
|
||||
uint32_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE,
|
||||
rdc_field_t job_fields[] = {RDC_FI_GPU_MEMORY_USAGE,
|
||||
RDC_FI_POWER_USAGE, RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL,
|
||||
RDC_FI_PCIE_TX, RDC_FI_PCIE_RX, RDC_FI_MEM_CLOCK,
|
||||
RDC_FI_GPU_TEMP};
|
||||
@@ -37,7 +37,7 @@ RdcGroupSettingsImpl::RdcGroupSettingsImpl() {
|
||||
rdc_field_grp_t fgid = JOB_FIELD_ID;
|
||||
|
||||
rdc_group_field_create(sizeof(job_fields)/sizeof(uint32_t),
|
||||
job_fields, job_field_group, &fgid);
|
||||
job_fields, job_field_group, &fgid);
|
||||
}
|
||||
|
||||
rdc_status_t RdcGroupSettingsImpl::rdc_group_gpu_create(
|
||||
@@ -133,7 +133,7 @@ rdc_status_t RdcGroupSettingsImpl::rdc_group_get_all_ids(
|
||||
}
|
||||
|
||||
rdc_status_t RdcGroupSettingsImpl::rdc_group_field_create(
|
||||
uint32_t num_field_ids, uint32_t* field_ids,
|
||||
uint32_t num_field_ids, rdc_field_t* field_ids,
|
||||
const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) {
|
||||
|
||||
RDC_LOG(RDC_DEBUG, "Create field group " << field_group_name);
|
||||
|
||||
@@ -26,23 +26,13 @@ THE SOFTWARE.
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rdc_lib/RdcLogger.h"
|
||||
#include "rocm_smi/rocm_smi.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
|
||||
bool RdcMetricFetcherImpl::is_field_valid(uint32_t field_id) const {
|
||||
const std::vector<uint32_t> all_fields = {RDC_FI_GPU_MEMORY_USAGE,
|
||||
RDC_FI_GPU_MEMORY_TOTAL, RDC_FI_GPU_COUNT, RDC_FI_POWER_USAGE,
|
||||
RDC_FI_GPU_CLOCK, RDC_FI_GPU_UTIL, RDC_FI_DEV_NAME, RDC_FI_GPU_TEMP,
|
||||
RDC_FI_MEM_CLOCK, RDC_FI_PCIE_TX, RDC_FI_PCIE_RX,
|
||||
RDC_FI_ECC_CORRECT_TOTAL, RDC_FI_ECC_UNCORRECT_TOTAL, RDC_FI_MEMORY_TEMP};
|
||||
|
||||
return std::find(all_fields.begin(), all_fields.end(), field_id)
|
||||
!= all_fields.end();
|
||||
}
|
||||
|
||||
RdcMetricFetcherImpl::RdcMetricFetcherImpl() {
|
||||
task_started_ = true;
|
||||
|
||||
@@ -81,7 +71,7 @@ uint64_t RdcMetricFetcherImpl::now() {
|
||||
}
|
||||
|
||||
void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value) {
|
||||
rdc_field_t field_id, rdc_field_value* value) {
|
||||
rsmi_status_t err = RSMI_STATUS_SUCCESS;
|
||||
uint64_t correctable_err = 0;
|
||||
uint64_t uncorrectable_err = 0;
|
||||
@@ -121,7 +111,7 @@ void RdcMetricFetcherImpl::get_ecc_error(uint32_t gpu_index,
|
||||
}
|
||||
|
||||
bool RdcMetricFetcherImpl::async_get_pcie_throughput(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value) {
|
||||
rdc_field_t field_id, rdc_field_value* value) {
|
||||
if (!value) {
|
||||
return false;
|
||||
}
|
||||
@@ -216,7 +206,7 @@ void RdcMetricFetcherImpl::get_pcie_throughput(const RdcFieldKey& key) {
|
||||
}
|
||||
|
||||
rdc_status_t RdcMetricFetcherImpl::fetch_smi_field(uint32_t gpu_index,
|
||||
uint32_t field_id, rdc_field_value* value) {
|
||||
rdc_field_t field_id, rdc_field_value* value) {
|
||||
if (!value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@ THE SOFTWARE.
|
||||
#include "rdc_lib/impl/RdcMetricsUpdaterImpl.h"
|
||||
#include <sys/time.h>
|
||||
#include <ctime>
|
||||
#include <chrono>
|
||||
#include <chrono> // NOLINT(build/c++11)
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
|
||||
namespace amd {
|
||||
|
||||
@@ -176,7 +176,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
rdc_field_grp_t field_group_id, uint64_t update_freq,
|
||||
double max_keep_age, uint32_t max_keep_samples) {
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
RdcFieldKey gkey({group_id, field_group_id});
|
||||
RdcFieldGroupKey gkey({group_id, field_group_id});
|
||||
auto table_iter = watch_table_.find(gkey);
|
||||
|
||||
// Already in the watch table
|
||||
@@ -234,7 +234,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
rdc_status_t RdcWatchTableImpl::update_field_in_table_when_unwatch(
|
||||
const RdcFieldKey& entry) {
|
||||
const RdcFieldGroupKey& entry) {
|
||||
// Get individual fields for this unwatch
|
||||
std::vector<RdcFieldKey> fields;
|
||||
rdc_status_t result = get_fields_from_group(
|
||||
@@ -306,7 +306,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch(
|
||||
|
||||
std::lock_guard<std::mutex> guard(watch_mutex_);
|
||||
// Set is_watching = false
|
||||
auto ite = watch_table_.find(RdcFieldKey({group_id, field_group_id}));
|
||||
auto ite = watch_table_.find(RdcFieldGroupKey({group_id, field_group_id}));
|
||||
if (ite == watch_table_.end()) {
|
||||
return RDC_ST_NOT_FOUND;
|
||||
}
|
||||
@@ -318,7 +318,7 @@ rdc_status_t RdcWatchTableImpl::rdc_field_unwatch(
|
||||
}
|
||||
|
||||
bool RdcWatchTableImpl::is_job_watch_field(uint32_t gpu_index,
|
||||
uint32_t field_id, std::string& job_id) const {
|
||||
rdc_field_t field_id, std::string& job_id) const {
|
||||
RdcFieldKey key{gpu_index, field_id};
|
||||
|
||||
for (auto ite = job_watch_table_.begin();
|
||||
|
||||
@@ -290,7 +290,7 @@ rdc_status_t RdcStandaloneHandler::rdc_group_gpu_add(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_group_field_create(
|
||||
uint32_t num_field_ids, uint32_t* field_ids,
|
||||
uint32_t num_field_ids, rdc_field_t* field_ids,
|
||||
const char* field_group_name, rdc_field_grp_t* rdc_field_group_id) {
|
||||
if (!field_ids || !field_group_name || !rdc_field_group_id) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
@@ -339,7 +339,8 @@ rdc_status_t RdcStandaloneHandler::rdc_group_field_get_info(
|
||||
strncpy_with_null(field_group_info->group_name,
|
||||
reply.filed_group_name().c_str(), RDC_MAX_STR_LENGTH);
|
||||
for (int i = 0; i < reply.field_ids_size(); i++) {
|
||||
field_group_info->field_ids[i] = reply.field_ids(i);
|
||||
field_group_info->field_ids[i] =
|
||||
static_cast<rdc_field_t>(reply.field_ids(i));
|
||||
}
|
||||
|
||||
return RDC_ST_OK;
|
||||
@@ -471,7 +472,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_watch(rdc_gpu_group_t group_id,
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value(
|
||||
uint32_t gpu_index, uint32_t field, rdc_field_value* value) {
|
||||
uint32_t gpu_index, rdc_field_t field, rdc_field_value* value) {
|
||||
if (!value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
}
|
||||
@@ -487,7 +488,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value(
|
||||
rdc_status_t err_status = error_handle(status, reply.status());
|
||||
if (err_status != RDC_ST_OK) return err_status;
|
||||
|
||||
value->field_id = reply.field_id();
|
||||
value->field_id = static_cast<rdc_field_t>(reply.field_id());
|
||||
value->status = reply.rdc_status();
|
||||
value->ts = reply.ts();
|
||||
value->type = static_cast<rdc_field_type_t>(reply.type());
|
||||
@@ -504,7 +505,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_latest_value(
|
||||
}
|
||||
|
||||
rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index,
|
||||
uint32_t field, uint64_t since_time_stamp,
|
||||
rdc_field_t field, uint64_t since_time_stamp,
|
||||
uint64_t *next_since_time_stamp, rdc_field_value* value) {
|
||||
if (!next_since_time_stamp || !value) {
|
||||
return RDC_ST_BAD_PARAMETER;
|
||||
@@ -522,7 +523,7 @@ rdc_status_t RdcStandaloneHandler::rdc_field_get_value_since(uint32_t gpu_index,
|
||||
rdc_status_t err_status = error_handle(status, reply.status());
|
||||
if (err_status != RDC_ST_OK) return err_status;
|
||||
|
||||
value->field_id = reply.field_id();
|
||||
value->field_id = static_cast<rdc_field_t>(reply.field_id());
|
||||
value->status = reply.rdc_status();
|
||||
value->ts = reply.ts();
|
||||
value->type = static_cast<rdc_field_type_t>(reply.type());
|
||||
|
||||
@@ -80,6 +80,8 @@ set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciFieldGroupSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciDmonSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${SRC_DIR}/RdciStatsSubSystem.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST} "${PROJECT_SOURCE_DIR}/common/rdc_utils.cc")
|
||||
set(RDCI_SRC_LIST ${RDCI_SRC_LIST}
|
||||
"${PROJECT_SOURCE_DIR}/common/rdc_fields_supported.cc")
|
||||
message("RDCI_SRC_LIST=${RDCI_SRC_LIST}")
|
||||
set(RDCI_EXE "rdci")
|
||||
|
||||
|
||||
@@ -61,7 +61,7 @@ class RdciDmonSubSystem: public RdciSubSystem {
|
||||
};
|
||||
|
||||
std::map<OPTIONS, uint32_t> options_;
|
||||
std::vector<uint32_t> field_ids_;
|
||||
std::vector<rdc_field_t> field_ids_;
|
||||
std::vector<uint32_t> gpu_indexes_;
|
||||
bool need_cleanup_;
|
||||
|
||||
|
||||
@@ -47,8 +47,6 @@ class RdciSubSystem {
|
||||
std::vector<std::string> split_string(const std::string& s,
|
||||
char delimiter) const;
|
||||
void show_common_usage() const;
|
||||
bool get_field_id_from_name(const std::string& name,
|
||||
uint32_t& value) const; // NOLINT(runtime/references)
|
||||
rdc_handle_t rdc_handle_;
|
||||
std::string ip_port_;
|
||||
|
||||
|
||||
@@ -25,8 +25,13 @@ THE SOFTWARE.
|
||||
#include <signal.h>
|
||||
#include <limits>
|
||||
#include <iomanip>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
|
||||
@@ -147,14 +152,16 @@ void RdciDmonSubSystem::parse_cmd_opts(int argc, char ** argv) {
|
||||
std::vector<std::string> vec_ids = split_string(field_ids, ',');
|
||||
for (uint32_t i = 0; i < vec_ids.size(); i++) {
|
||||
if (!IsNumber(vec_ids[i])) {
|
||||
uint32_t field_id = 0;
|
||||
if (!get_field_id_from_name(vec_ids[i], field_id)) {
|
||||
rdc_field_t field_id = RDC_FI_INVALID;
|
||||
if (!amd::rdc::get_field_id_from_name(vec_ids[i],
|
||||
&field_id)) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The field name "+vec_ids[i]+" is not valid");
|
||||
}
|
||||
field_ids_.push_back(field_id);
|
||||
} else {
|
||||
field_ids_.push_back(std::stoi(vec_ids[i]));
|
||||
field_ids_.push_back(static_cast<rdc_field_t>(
|
||||
std::stoi(vec_ids[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -265,7 +272,7 @@ void RdciDmonSubSystem::create_temp_field_group() {
|
||||
|
||||
const std::string field_group_name("rdci-dmon-field-group");
|
||||
rdc_field_grp_t group_id;
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
for (uint32_t i = 0; i < field_ids_.size(); i++) {
|
||||
field_ids[i] = field_ids_[i];
|
||||
}
|
||||
@@ -280,24 +287,22 @@ void RdciDmonSubSystem::create_temp_field_group() {
|
||||
options_.insert({OPTIONS_FIELD_GROUP_ID, group_id});
|
||||
}
|
||||
|
||||
|
||||
void RdciDmonSubSystem::show_field_usage() const {
|
||||
std::cout << "Supported fields Ids:\n";
|
||||
std::cout << "100 RDC_FI_GPU_CLOCK: Current GPU clock frequencies.\n";
|
||||
std::cout << "101 RDC_FI_MEM_CLOCK: Current Memory clock frequencies.\n";
|
||||
std::cout << "140 RDC_FI_MEMORY_TEMP: Memory "
|
||||
<< "temperature in millidegrees Celsius.\n";
|
||||
std::cout << "150 RDC_FI_GPU_TEMP: GPU "
|
||||
<< "temperature in millidegrees Celsius.\n";
|
||||
std::cout << "155 RDC_FI_POWER_USAGE: Power usage in microwatts.\n";
|
||||
std::cout << "200 RDC_FI_PCIE_TX: PCIe Tx utilization in bytes/second.\n";
|
||||
std::cout << "201 RDC_FI_PCIE_RX: PCIe Rx utilization in bytes/second.\n";
|
||||
std::cout << "203 RDC_FI_GPU_UTIL: GPU busy percentage.\n";
|
||||
std::cout << "312 RDC_FI_ECC_CORRECT_TOTAL: Accumulated "
|
||||
<< "correctable ECC errors.\n";
|
||||
std::cout << "313 RDC_FI_ECC_UNCORRECT_TOTAL: Accumulated "
|
||||
<< "uncorrectable ECC errors.\n";
|
||||
std::cout << "525 RDC_FI_GPU_MEMORY_USAGE: Memory usage of the GPU "
|
||||
<< "instance in bytes.\n";
|
||||
std::cout << "Supported fields Ids:" << std::endl;
|
||||
|
||||
amd::rdc::fld_id2name_map_t &field_id_to_descript =
|
||||
amd::rdc::get_field_id_description_from_id();
|
||||
for (auto i = field_id_to_descript.begin();
|
||||
i != field_id_to_descript.end(); i++) {
|
||||
std::cout << i->first << " " << i->second.enum_name << " : " <<
|
||||
i->second.description << "." << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "* Note: The field ID number associated with a field ID can "
|
||||
"change" << std::endl;
|
||||
std::cout << " from release to release. Field name strings should be "
|
||||
"used in scripts." << std::endl;
|
||||
}
|
||||
|
||||
void RdciDmonSubSystem::process() {
|
||||
|
||||
@@ -24,6 +24,7 @@ THE SOFTWARE.
|
||||
#include <unistd.h>
|
||||
#include "rdc_lib/rdc_common.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "common/rdc_fields_supported.h"
|
||||
#include "rdc/rdc.h"
|
||||
#include "rdc_lib/RdcException.h"
|
||||
|
||||
@@ -165,15 +166,16 @@ void RdciFieldGroupSubSystem::process() {
|
||||
"Must specify the group name when create a field group");
|
||||
}
|
||||
std::vector<std::string> fields = split_string(field_ids_, ',');
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
for (uint32_t i = 0; i < fields.size(); i++) {
|
||||
if (!IsNumber(fields[i])) {
|
||||
if (!get_field_id_from_name(fields[i], field_ids[i])) {
|
||||
if (!get_field_id_from_name(fields[i], &field_ids[i])) {
|
||||
throw RdcException(RDC_ST_BAD_PARAMETER,
|
||||
"The field name "+fields[i]+" is not valid");
|
||||
}
|
||||
} else {
|
||||
field_ids[i] = std::stoi(fields[i]);
|
||||
field_ids[i] =
|
||||
static_cast<rdc_field_t>(std::stoi(fields[i]));
|
||||
}
|
||||
}
|
||||
rdc_field_grp_t group_id;
|
||||
|
||||
@@ -23,6 +23,7 @@ THE SOFTWARE.
|
||||
#include <sstream>
|
||||
#include "rdc_lib/RdcException.h"
|
||||
#include "common/rdc_utils.h"
|
||||
#include "common/rdc_fields_supported.h"
|
||||
|
||||
namespace amd {
|
||||
namespace rdc {
|
||||
@@ -45,34 +46,6 @@ bool RdciSubSystem::is_json_output() const {
|
||||
return is_json_output_;
|
||||
}
|
||||
|
||||
bool RdciSubSystem::get_field_id_from_name(
|
||||
const std::string& name, uint32_t& value) const {
|
||||
const std::map<std::string, uint32_t> field_name_to_id = {
|
||||
{"RDC_FI_GPU_MEMORY_USAGE", 525},
|
||||
{"RDC_FI_GPU_MEMORY_TOTAL", 580},
|
||||
{"RDC_FI_POWER_USAGE", 155},
|
||||
{"RDC_FI_GPU_CLOCK", 100},
|
||||
{"RDC_FI_MEM_CLOCK", 101},
|
||||
{"RDC_FI_PCIE_TX", 200},
|
||||
{"RDC_FI_PCIE_RX", 201},
|
||||
{"RDC_FI_GPU_UTIL", 203},
|
||||
{"RDC_FI_ECC_CORRECT_TOTAL", 312},
|
||||
{"RDC_FI_ECC_UNCORRECT_TOTAL", 313},
|
||||
{"RDC_FI_MEMORY_TEMP", 140},
|
||||
{"RDC_FI_GPU_TEMP", 150},
|
||||
{"RDC_FI_GPU_COUNT", 4},
|
||||
{"RDC_FI_DEV_NAME", 50}
|
||||
};
|
||||
|
||||
auto id = field_name_to_id.find(name);
|
||||
if (id == field_name_to_id.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
value = id->second;
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::string> RdciSubSystem::split_string(const std::string& s,
|
||||
char delimiter) const {
|
||||
std::vector<std::string> tokens;
|
||||
|
||||
@@ -245,9 +245,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() {
|
||||
}
|
||||
|
||||
rdc_field_grp_t field_group_id;
|
||||
uint32_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
rdc_field_t field_ids[RDC_MAX_FIELD_IDS_PER_FIELD_GROUP];
|
||||
for (int i = 0; i < request->field_ids_size(); i++) {
|
||||
field_ids[i] = request->field_ids(i);
|
||||
field_ids[i] = static_cast<rdc_field_t>(request->field_ids(i));
|
||||
}
|
||||
rdc_status_t result = rdc_group_field_create(
|
||||
rdc_handle_, request->field_ids_size() , &field_ids[0],
|
||||
@@ -331,8 +331,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() {
|
||||
}
|
||||
|
||||
rdc_field_value value;
|
||||
rdc_status_t result = rdc_field_get_latest_value(
|
||||
rdc_handle_, request->gpu_index(), request->field_id(), &value);
|
||||
rdc_status_t result = rdc_field_get_latest_value(rdc_handle_,
|
||||
request->gpu_index(), static_cast<rdc_field_t>(request->field_id()),
|
||||
&value);
|
||||
reply->set_status(result);
|
||||
if (result != RDC_ST_OK) {
|
||||
return ::grpc::Status::OK;
|
||||
@@ -365,9 +366,9 @@ RdcAPIServiceImpl::~RdcAPIServiceImpl() {
|
||||
|
||||
rdc_field_value value;
|
||||
uint64_t next_timestamp;
|
||||
rdc_status_t result = rdc_field_get_value_since(
|
||||
rdc_handle_, request->gpu_index(), request->field_id(),
|
||||
request->since_time_stamp(), &next_timestamp, &value);
|
||||
rdc_status_t result = rdc_field_get_value_since(rdc_handle_,
|
||||
request->gpu_index(), static_cast<rdc_field_t>(request->field_id()),
|
||||
request->since_time_stamp(), &next_timestamp, &value);
|
||||
reply->set_status(result);
|
||||
if (result != RDC_ST_OK) {
|
||||
return ::grpc::Status::OK;
|
||||
|
||||
@@ -33,7 +33,8 @@ THE SOFTWARE.
|
||||
|
||||
TestRdciDiscovery::TestRdciDiscovery() : TestBase() {
|
||||
set_title("\tRDC Discovery Test");
|
||||
set_description("\tThe Discovery tests verifies that the GPUs are discovered and identified .");
|
||||
set_description("\tThe Discovery tests verifies that the GPUs are "
|
||||
"discovered and identified .");
|
||||
}
|
||||
|
||||
TestRdciDiscovery::~TestRdciDiscovery(void) {
|
||||
@@ -58,17 +59,16 @@ void TestRdciDiscovery::DisplayResults(void) const {
|
||||
void TestRdciDiscovery::Close() {
|
||||
TestBase::Close();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
result = rdc_disconnect(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
}
|
||||
result = rdc_stop_embedded(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
@@ -77,20 +77,17 @@ void TestRdciDiscovery::Close() {
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
|
||||
void TestRdciDiscovery::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Connecting to host....\n" << std::endl;
|
||||
}
|
||||
char hostIpAddress[] = {"localhost:50051"};
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Starting embedded RDC engine....\n" << std::endl;
|
||||
}
|
||||
@@ -121,17 +118,17 @@ void TestRdciDiscovery::Run(void) {
|
||||
|
||||
rdc_device_attributes_t attribute;
|
||||
for (uint32_t i = 0; i < count; i++) {
|
||||
|
||||
result = rdc_device_get_attributes(0, gpu_index_list[i], &attribute);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i], 0);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_device_get_attributes(rdc_handle,gpu_index_list[i], &attribute);
|
||||
result = rdc_device_get_attributes(rdc_handle, gpu_index_list[i],
|
||||
&attribute);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
std::cout << "\tGPU ID "<< i << " || " << attribute.device_name <<std::endl;
|
||||
std::cout << "\tGPU ID "<< i << " || " <<
|
||||
attribute.device_name <<std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -33,7 +33,8 @@ THE SOFTWARE.
|
||||
|
||||
TestRdciDmon::TestRdciDmon() : TestBase() {
|
||||
set_title("\tRDC Dmon Test");
|
||||
set_description("\tThe Dmon tests verifies that the GPUs metrics are being monitored. ");
|
||||
set_description(
|
||||
"\tThe Dmon tests verifies that the GPUs metrics are being monitored. ");
|
||||
}
|
||||
|
||||
TestRdciDmon::~TestRdciDmon(void) {
|
||||
@@ -58,39 +59,35 @@ void TestRdciDmon::DisplayResults(void) const {
|
||||
void TestRdciDmon::Close() {
|
||||
TestBase::Close();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
result = rdc_disconnect(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
result = rdc_stop_embedded(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
};
|
||||
}
|
||||
|
||||
result = rdc_shutdown();
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
|
||||
void TestRdciDmon::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Connecting to host....\n" << std::endl;
|
||||
}
|
||||
char hostIpAddress[] = {"localhost:50051"};
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Starting embedded RDC engine....\n" << std::endl;
|
||||
}
|
||||
@@ -102,7 +99,7 @@ void TestRdciDmon::Run(void) {
|
||||
rdc_gpu_group_t group_id;
|
||||
rdc_field_grp_t field_group_id;
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY,
|
||||
"GRP_DMON", &group_id);
|
||||
"GRP_DMON", &group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
result = rdc_group_gpu_add(rdc_handle, group_id, 0);
|
||||
@@ -118,9 +115,11 @@ void TestRdciDmon::Run(void) {
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
ASSERT_GT(group_info.count, 0);
|
||||
|
||||
uint32_t field_ids[]= {150,155,203};
|
||||
rdc_field_t field_ids[]= {RDC_FI_GPU_TEMP, RDC_FI_POWER_USAGE,
|
||||
RDC_FI_GPU_UTIL};
|
||||
uint32_t fsize = sizeof(field_ids)/sizeof(field_ids[0]);
|
||||
result = rdc_group_field_create(rdc_handle, fsize , &field_ids[0], "FIELD_GRP", &field_group_id);
|
||||
result = rdc_group_field_create(rdc_handle, fsize , &field_ids[0],
|
||||
"FIELD_GRP", &field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
result = rdc_field_watch(rdc_handle, -1, field_group_id, 0, 60, 10);
|
||||
@@ -140,5 +139,4 @@ void TestRdciDmon::Run(void) {
|
||||
|
||||
result = rdc_group_field_destroy(rdc_handle, field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
}
|
||||
|
||||
@@ -30,10 +30,10 @@ THE SOFTWARE.
|
||||
#include "rdc_tests/test_common.h"
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
|
||||
TestRdciFieldgroup::TestRdciFieldgroup() : TestBase() {
|
||||
set_title("\tRDC Fieldgroup Test");
|
||||
set_description("\tThe Fieldgroup tests verifies the creation/deletion of fieldgroups.");
|
||||
set_description(
|
||||
"\tThe Fieldgroup tests verifies the creation/deletion of fieldgroups.");
|
||||
}
|
||||
|
||||
TestRdciFieldgroup::~TestRdciFieldgroup(void) {
|
||||
@@ -58,17 +58,16 @@ void TestRdciFieldgroup::DisplayResults(void) const {
|
||||
void TestRdciFieldgroup::Close() {
|
||||
TestBase::Close();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
result = rdc_disconnect(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
result = rdc_stop_embedded(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
@@ -77,20 +76,17 @@ void TestRdciFieldgroup::Close() {
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
|
||||
void TestRdciFieldgroup::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Connecting to host....\n" << std::endl;
|
||||
}
|
||||
char hostIpAddress[] = {"localhost:50051"};
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Starting embedded RDC engine....\n" << std::endl;
|
||||
}
|
||||
@@ -99,25 +95,30 @@ void TestRdciFieldgroup::Run(void) {
|
||||
}
|
||||
|
||||
rdc_field_grp_t field_group_id;
|
||||
uint32_t field_ids[]= {150,155};
|
||||
uint32_t invalid_field_ids[]= {10,20};
|
||||
rdc_field_t field_ids[]= {RDC_FI_GPU_TEMP, RDC_FI_POWER_USAGE};
|
||||
rdc_field_t invalid_field_ids[]= {RDC_FI_INVALID, RDC_FI_INVALID};
|
||||
uint32_t fsize = sizeof(field_ids)/sizeof(field_ids[0]);
|
||||
uint32_t count = 0;
|
||||
rdc_field_group_info_t group_info;
|
||||
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &invalid_field_ids[0], "FIELD_GRP", &field_group_id);
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &invalid_field_ids[0],
|
||||
"FIELD_GRP", &field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_NOT_SUPPORTED);
|
||||
|
||||
result = rdc_group_field_create(NULL, fsize, &field_ids[0], "FIELD_GRP", &field_group_id);
|
||||
result = rdc_group_field_create(NULL, fsize, &field_ids[0],
|
||||
"FIELD_GRP", &field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &field_ids[0], NULL, NULL);
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &field_ids[0],
|
||||
NULL, NULL);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_group_field_create(rdc_handle, (RDC_MAX_FIELD_IDS_PER_FIELD_GROUP+1), &field_ids[0], "FIELD_GRP", NULL);
|
||||
result = rdc_group_field_create(rdc_handle,
|
||||
(RDC_MAX_FIELD_IDS_PER_FIELD_GROUP+1), &field_ids[0], "FIELD_GRP", NULL);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &field_ids[0], "FIELD_GRP", &field_group_id);
|
||||
result = rdc_group_field_create(rdc_handle, fsize, &field_ids[0],
|
||||
"FIELD_GRP", &field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
rdc_field_grp_t group_id_list[RDC_MAX_NUM_FIELD_GROUPS];
|
||||
@@ -134,7 +135,8 @@ void TestRdciFieldgroup::Run(void) {
|
||||
result = rdc_group_field_get_info(rdc_handle, group_id_list[i], 0);
|
||||
ASSERT_EQ(result, RDC_ST_BAD_PARAMETER);
|
||||
|
||||
result = rdc_group_field_get_info(rdc_handle, group_id_list[i], &group_info);
|
||||
result = rdc_group_field_get_info(rdc_handle, group_id_list[i],
|
||||
&group_info);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
@@ -143,5 +145,4 @@ void TestRdciFieldgroup::Run(void) {
|
||||
|
||||
result = rdc_group_field_destroy(rdc_handle, field_group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
}
|
||||
|
||||
@@ -30,8 +30,6 @@ THE SOFTWARE.
|
||||
#include "rdc_tests/test_common.h"
|
||||
#include "rdc/rdc.h"
|
||||
|
||||
|
||||
|
||||
TestRdciGroup::TestRdciGroup() : TestBase() {
|
||||
set_title("\tRDC Group Test");
|
||||
set_description("\tThe Group tests verifies creation/deletion of GPU groups");
|
||||
@@ -59,39 +57,35 @@ void TestRdciGroup::DisplayResults(void) const {
|
||||
void TestRdciGroup::Close() {
|
||||
TestBase::Close();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
result = rdc_disconnect(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
result = rdc_stop_embedded(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
};
|
||||
}
|
||||
|
||||
result = rdc_shutdown();
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
|
||||
void TestRdciGroup::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Connecting to host....\n" << std::endl;
|
||||
}
|
||||
char hostIpAddress[] = {"localhost:50051"};
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Starting embedded RDC engine....\n" << std::endl;
|
||||
}
|
||||
@@ -103,31 +97,26 @@ void TestRdciGroup::Run(void) {
|
||||
uint32_t count = 0;
|
||||
rdc_group_info_t group_info;
|
||||
uint32_t gpu_index_list[RDC_MAX_NUM_DEVICES];
|
||||
result = rdc_group_gpu_create(0, RDC_GROUP_EMPTY,
|
||||
"GRP_NAME", &group_id);
|
||||
result = rdc_group_gpu_create(0, RDC_GROUP_EMPTY, "GRP_NAME", &group_id);
|
||||
ASSERT_EQ(result, RDC_ST_INVALID_HANDLER);
|
||||
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY,
|
||||
NULL, &group_id);
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY, NULL, &group_id);
|
||||
ASSERT_EQ(result, RDC_ST_BAD_PARAMETER);
|
||||
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY, "GRP_NAME", NULL);
|
||||
ASSERT_EQ(result, RDC_ST_BAD_PARAMETER);
|
||||
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY,
|
||||
"GRP_NAME", NULL);
|
||||
ASSERT_EQ(result, RDC_ST_BAD_PARAMETER);
|
||||
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY,
|
||||
"GRP_NAME", &group_id);
|
||||
"GRP_NAME", &group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
result = rdc_group_gpu_add(rdc_handle,
|
||||
group_id, -1);
|
||||
result = rdc_group_gpu_add(rdc_handle, group_id, -1);
|
||||
ASSERT_EQ(result, RDC_ST_NOT_FOUND);
|
||||
|
||||
result = rdc_device_get_all(rdc_handle, gpu_index_list, &count);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
for (uint32_t i=0; i < count; i++) {
|
||||
result = rdc_group_gpu_add(rdc_handle,
|
||||
group_id, gpu_index_list[i]);
|
||||
result = rdc_group_gpu_add(rdc_handle, group_id, gpu_index_list[i]);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
@@ -161,6 +150,4 @@ void TestRdciGroup::Run(void) {
|
||||
|
||||
result = rdc_group_gpu_destroy(rdc_handle, group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -33,7 +33,8 @@ THE SOFTWARE.
|
||||
|
||||
TestRdciStats::TestRdciStats() : TestBase() {
|
||||
set_title("\tRDC Stats Test");
|
||||
set_description("\tThe Stats tests collects and verifies job statistics running on gpu groups.");
|
||||
set_description("\tThe Stats tests collects and verifies job "
|
||||
"statistics running on gpu groups.");
|
||||
}
|
||||
|
||||
TestRdciStats::~TestRdciStats(void) {
|
||||
@@ -57,18 +58,17 @@ void TestRdciStats::DisplayResults(void) const {
|
||||
|
||||
void TestRdciStats::Close() {
|
||||
TestBase::Close();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
rdc_status_t result;
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Disconnecting from host....\n" << std::endl;
|
||||
}
|
||||
}
|
||||
result = rdc_disconnect(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Stopping Embedded RDC Engine....\n" << std::endl;
|
||||
}
|
||||
}
|
||||
result = rdc_stop_embedded(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
@@ -77,20 +77,18 @@ void TestRdciStats::Close() {
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
|
||||
|
||||
void TestRdciStats::Run(void) {
|
||||
|
||||
TestBase::Run();
|
||||
rdc_status_t result;
|
||||
if(standalone_){
|
||||
if (standalone_) {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Connecting to host....\n" << std::endl;
|
||||
}
|
||||
char hostIpAddress[] = {"localhost:50051"};
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr, nullptr, nullptr);
|
||||
result = rdc_connect(hostIpAddress, &rdc_handle, nullptr,
|
||||
nullptr, nullptr);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
}
|
||||
else{
|
||||
} else {
|
||||
IF_VERB(STANDARD) {
|
||||
std::cout << "\t**Starting embedded RDC engine....\n" << std::endl;
|
||||
}
|
||||
@@ -100,7 +98,7 @@ void TestRdciStats::Run(void) {
|
||||
|
||||
rdc_gpu_group_t group_id;
|
||||
result = rdc_group_gpu_create(rdc_handle, RDC_GROUP_EMPTY,
|
||||
"GRP_NAME", &group_id);
|
||||
"GRP_NAME", &group_id);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
result = rdc_group_gpu_add(rdc_handle, group_id, 0);
|
||||
@@ -121,10 +119,9 @@ void TestRdciStats::Run(void) {
|
||||
rdc_job_info_t job_info;
|
||||
result = rdc_job_get_stats(rdc_handle, "0", &job_info);
|
||||
if (result == RDC_ST_NOT_SUPPORTED) {
|
||||
std::cout <<
|
||||
"\t** GPU Metric is not supported"
|
||||
" on this machine" << std::endl;
|
||||
return;
|
||||
std::cout << "\t** GPU Metric is not supported"
|
||||
" on this machine" << std::endl;
|
||||
return;
|
||||
}
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
@@ -133,5 +130,4 @@ void TestRdciStats::Run(void) {
|
||||
|
||||
result = rdc_job_remove_all(rdc_handle);
|
||||
ASSERT_EQ(result, RDC_ST_OK);
|
||||
|
||||
}
|
||||
|
||||
Criar uma nova questão referindo esta
Bloquear um utilizador